~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to drizzled/temporal_format.cc

  • Committer: Jay Pipes
  • Date: 2009-12-24 22:16:14 UTC
  • mto: This revision was merged to the branch mainline in revision 1254.
  • Revision ID: jpipes@serialcoder-20091224221614-230j8vbqtfgqnt0c
Fixes LP Bug #500031:

"dbt2 fails with 1024 connections"

After investigation into this, I discovered that there was a 
race condition in TemporalFormat::match():

TemporalFormat::_re is the compiled PCRE regular expression object 
inside each of the TemporalFormat objects, which are shared 
among all threads and live in global scope.

Unfortunately, TemporalFormat::match() was using the member 
variable TemporalFormat::_match_vector as its match state. 
At high concurrency, this means that the following race
condition could happen:

Thread 1 executes pcre_exec() and finds a match, therefore 
populating TemporalFormat::_match_vector of integers 
with the position offsets of the matched pieces of the temporal object.

Thread 1, during construction of the Temporal output of 
TemporalFormat::match(), uses these _match_vector position 
offsets in calling std::string::substr on a copy of the 
matched string, essentially "cutting up" the string 
into year, month, day, etc.

Thread 2 executes pcre_exec() and also finds a match, 
thereby changing TemporalFormat::_match_vector to something 
different

Thread 1 continues trying to use std::string::substr(), 
but now uses offsets that are invalid for its string, 
thereby producing an out_of_range exception.

The solution is to pull the TemporalFormat::_match_vector 
member variable and instead put a function-scope-level 
match_vector variable on the stack inside TemporalFormat::match().

Show diffs side-by-side

added added

removed removed

Lines of Context:
41
41
namespace drizzled
42
42
{
43
43
 
44
 
  TemporalFormat::TemporalFormat(const char *pattern)
45
 
  :
 
44
TemporalFormat::TemporalFormat(const char *pattern) :
46
45
  _pattern(pattern)
47
46
, _error_offset(0)
48
47
, _error(NULL)
55
54
, _usecond_part_index(0)
56
55
, _nsecond_part_index(0)
57
56
{
58
 
  /* Make sure we've got no junk in the match_vector. */
59
 
  memset(_match_vector, 0, sizeof(_match_vector));
60
 
 
61
57
  /* Compile our regular expression */
62
58
  _re= pcre_compile(pattern
63
59
                    , 0 /* Default options */
71
67
{
72
68
  if (! is_valid()) 
73
69
    return false;
 
70
 
 
71
  int32_t match_vector[OUT_VECTOR_SIZE]; /**< Stores match substring indexes */
74
72
  
 
73
  /* Make sure we've got no junk in the match_vector. */
 
74
  memset(match_vector, 0, sizeof(match_vector));
 
75
 
75
76
  /* Simply check the subject against the compiled regular expression */
76
77
  int32_t result= pcre_exec(_re
77
78
                            , NULL /* No extra data */
79
80
                            , data_len
80
81
                            , 0 /* Start at offset 0 of subject...*/
81
82
                            , 0 /* Default options */
82
 
                            , _match_vector
 
83
                            , match_vector
83
84
                            , OUT_VECTOR_SIZE
84
85
                            );
85
86
  if (result < 0)
119
120
   */
120
121
  if (_year_part_index > 1)
121
122
  {
122
 
    size_t year_start= _match_vector[_year_part_index];
123
 
    size_t year_len= _match_vector[_year_part_index + 1] - _match_vector[_year_part_index];
 
123
    size_t year_start= match_vector[_year_part_index];
 
124
    size_t year_len= match_vector[_year_part_index + 1] - match_vector[_year_part_index];
124
125
    to->_years= atoi(copy_data.substr(year_start, year_len).c_str());
125
126
    if (year_len == 2)
126
127
      to->_years+= (to->_years >= DRIZZLE_YY_PART_YEAR ? 1900 : 2000);
127
128
  }
128
129
  if (_month_part_index > 1)
129
130
  {
130
 
    size_t month_start= _match_vector[_month_part_index];
131
 
    size_t month_len= _match_vector[_month_part_index + 1] - _match_vector[_month_part_index];
 
131
    size_t month_start= match_vector[_month_part_index];
 
132
    size_t month_len= match_vector[_month_part_index + 1] - match_vector[_month_part_index];
132
133
    to->_months= atoi(copy_data.substr(month_start, month_len).c_str());
133
134
  }
134
135
  if (_day_part_index > 1)
135
136
  {
136
 
    size_t day_start= _match_vector[_day_part_index];
137
 
    size_t day_len= _match_vector[_day_part_index + 1] - _match_vector[_day_part_index];
 
137
    size_t day_start= match_vector[_day_part_index];
 
138
    size_t day_len= match_vector[_day_part_index + 1] - match_vector[_day_part_index];
138
139
    to->_days= atoi(copy_data.substr(day_start, day_len).c_str());
139
140
  }
140
141
  if (_hour_part_index > 1)
141
142
  {
142
 
    size_t hour_start= _match_vector[_hour_part_index];
143
 
    size_t hour_len= _match_vector[_hour_part_index + 1] - _match_vector[_hour_part_index];
 
143
    size_t hour_start= match_vector[_hour_part_index];
 
144
    size_t hour_len= match_vector[_hour_part_index + 1] - match_vector[_hour_part_index];
144
145
    to->_hours= atoi(copy_data.substr(hour_start, hour_len).c_str());
145
146
  }
146
147
  if (_minute_part_index > 1)
147
148
  {
148
 
    size_t minute_start= _match_vector[_minute_part_index];
149
 
    size_t minute_len= _match_vector[_minute_part_index + 1] - _match_vector[_minute_part_index];
 
149
    size_t minute_start= match_vector[_minute_part_index];
 
150
    size_t minute_len= match_vector[_minute_part_index + 1] - match_vector[_minute_part_index];
150
151
    to->_minutes= atoi(copy_data.substr(minute_start, minute_len).c_str());
151
152
  }
152
153
  if (_second_part_index > 1)
153
154
  {
154
 
    size_t second_start= _match_vector[_second_part_index];
155
 
    size_t second_len= _match_vector[_second_part_index + 1] - _match_vector[_second_part_index];
 
155
    size_t second_start= match_vector[_second_part_index];
 
156
    size_t second_len= match_vector[_second_part_index + 1] - match_vector[_second_part_index];
156
157
    to->_seconds= atoi(copy_data.substr(second_start, second_len).c_str());
157
158
  }
158
159
  if (_usecond_part_index > 1)
159
160
  {
160
 
    size_t usecond_start= _match_vector[_usecond_part_index];
161
 
    size_t usecond_len= _match_vector[_usecond_part_index + 1] - _match_vector[_usecond_part_index];
 
161
    size_t usecond_start= match_vector[_usecond_part_index];
 
162
    size_t usecond_len= match_vector[_usecond_part_index + 1] - match_vector[_usecond_part_index];
162
163
    /* 
163
164
     * For microseconds, which are millionth of 1 second, 
164
165
     * we must ensure that we produce a correct result, 
176
177
  }
177
178
  if (_nsecond_part_index > 1)
178
179
  {
179
 
    size_t nsecond_start= _match_vector[_nsecond_part_index];
180
 
    size_t nsecond_len= _match_vector[_nsecond_part_index + 1] - _match_vector[_nsecond_part_index];
 
180
    size_t nsecond_start= match_vector[_nsecond_part_index];
 
181
    size_t nsecond_len= match_vector[_nsecond_part_index + 1] - match_vector[_nsecond_part_index];
181
182
    /* 
182
183
     * For nanoseconds, which are 1 billionth of a second, 
183
184
     * we must ensure that we produce a correct result,