~drizzle-trunk/drizzle/development

813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
1
/* - mode: c; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2
 *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3
 *
4
 *  Copyright (C) 2008 Sun Microsystems
5
 *
6
 *  Authors:
7
 *
8
 *  Jay Pipes <jay.pipes@sun.com>
9
 *
10
 *  This program is free software; you can redistribute it and/or modify
11
 *  it under the terms of the GNU General Public License as published by
12
 *  the Free Software Foundation; either version 2 of the License, or
13
 *  (at your option) any later version.
14
 *
15
 *  This program is distributed in the hope that it will be useful,
16
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 *  GNU General Public License for more details.
19
 *
20
 *  You should have received a copy of the GNU General Public License
21
 *  along with this program; if not, write to the Free Software
22
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
23
 */
24
25
/**
26
 * @file 
27
 *
28
 * Implementation of the server's date and time string matching utility.
29
 */
30
31
#include <string> /** C++ string class used */
32
#include <string.h>
33
#include <vector>
34
#include <pcre.h>
35
36
#include "drizzled/global.h"
37
#include "drizzled/temporal_format.h"
38
#include "drizzled/temporal.h"
39
40
namespace drizzled
41
{
42
43
  TemporalFormat::TemporalFormat(const char *pattern)
44
  :
45
  _pattern(pattern)
46
, _error_offset(0)
47
, _error(NULL)
48
, _year_part_index(0)
49
, _month_part_index(0)
50
, _day_part_index(0)
51
, _hour_part_index(0)
52
, _minute_part_index(0)
53
, _second_part_index(0)
54
, _usecond_part_index(0)
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
55
, _nsecond_part_index(0)
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
56
{
57
  /* Make sure we've got no junk in the match_vector. */
58
  memset(_match_vector, 0, sizeof(_match_vector));
59
60
  /* Compile our regular expression */
61
  _re= pcre_compile(pattern
62
                    , 0 /* Default options */
63
                    , &_error
64
                    , &_error_offset
65
                    , NULL /* Use default character table */
66
                    );
67
}
68
69
bool TemporalFormat::matches(const char *data, size_t data_len, Temporal *to)
70
{
71
  if (! is_valid()) 
72
    return false;
73
  
74
  /* Simply check the subject against the compiled regular expression */
75
  int32_t result= pcre_exec(_re
76
                            , NULL /* No extra data */
77
                            , data
78
                            , data_len
79
                            , 0 /* Start at offset 0 of subject...*/
80
                            , 0 /* Default options */
81
                            , _match_vector
82
                            , OUT_VECTOR_SIZE
83
                            );
84
  if (result < 0)
85
  {
86
    switch (result)
87
    {
88
      case PCRE_ERROR_NOMATCH:
89
        return false; /* No match, just return false */
90
      default:
91
        return false;
92
    }
93
    return false;
94
  }
95
96
  int32_t expected_match_count= (_year_part_index > 1 ? 1 : 0)
97
                              + (_month_part_index > 1 ? 1 : 0)
98
                              + (_day_part_index > 1 ? 1 : 0)
99
                              + (_hour_part_index > 1 ? 1 : 0)
100
                              + (_minute_part_index > 1 ? 1 : 0)
101
                              + (_second_part_index > 1 ? 1 : 0)
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
102
                              + (_usecond_part_index > 1 ? 1 : 0)
103
                              + (_nsecond_part_index > 1 ? 1 : 0)
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
104
                              + 1; /* Add one for the entire match... */
105
  if (result != expected_match_count)
106
    return false;
107
108
  /* C++ string class easy to use substr() method is very useful here */
109
  std::string copy_data(data, data_len);
110
  /* 
111
   * OK, we have the expected substring matches, so grab
112
   * the various temporal parts from the subject string
113
   *
114
   * @note 
115
   *
116
   * TemporalFormatMatch is a friend class to Temporal, so
117
   * we can access the temporal instance's protected data.
118
   */
119
  if (_year_part_index > 1)
120
  {
121
    size_t year_start= _match_vector[_year_part_index];
122
    size_t year_len= _match_vector[_year_part_index + 1] - _match_vector[_year_part_index];
123
    to->_years= atoi(copy_data.substr(year_start, year_len).c_str());
124
    if (year_len == 2)
125
      to->_years+= (to->_years >= DRIZZLE_YY_PART_YEAR ? 1900 : 2000);
126
  }
127
  if (_month_part_index > 1)
128
  {
129
    size_t month_start= _match_vector[_month_part_index];
130
    size_t month_len= _match_vector[_month_part_index + 1] - _match_vector[_month_part_index];
131
    to->_months= atoi(copy_data.substr(month_start, month_len).c_str());
132
  }
133
  if (_day_part_index > 1)
134
  {
135
    size_t day_start= _match_vector[_day_part_index];
136
    size_t day_len= _match_vector[_day_part_index + 1] - _match_vector[_day_part_index];
137
    to->_days= atoi(copy_data.substr(day_start, day_len).c_str());
138
  }
139
  if (_hour_part_index > 1)
140
  {
141
    size_t hour_start= _match_vector[_hour_part_index];
142
    size_t hour_len= _match_vector[_hour_part_index + 1] - _match_vector[_hour_part_index];
143
    to->_hours= atoi(copy_data.substr(hour_start, hour_len).c_str());
144
  }
145
  if (_minute_part_index > 1)
146
  {
147
    size_t minute_start= _match_vector[_minute_part_index];
148
    size_t minute_len= _match_vector[_minute_part_index + 1] - _match_vector[_minute_part_index];
149
    to->_minutes= atoi(copy_data.substr(minute_start, minute_len).c_str());
150
  }
151
  if (_second_part_index > 1)
152
  {
153
    size_t second_start= _match_vector[_second_part_index];
154
    size_t second_len= _match_vector[_second_part_index + 1] - _match_vector[_second_part_index];
155
    to->_seconds= atoi(copy_data.substr(second_start, second_len).c_str());
156
  }
157
  if (_usecond_part_index > 1)
158
  {
159
    size_t usecond_start= _match_vector[_usecond_part_index];
160
    size_t usecond_len= _match_vector[_usecond_part_index + 1] - _match_vector[_usecond_part_index];
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
161
    /* 
162
     * For microseconds, which are millionth of 1 second, 
163
     * we must ensure that we produce a correct result, 
164
     * even if < 6 places were specified.  For instance, if we get .1, 
165
     * we must produce 100000. .11 should produce 110000, etc.
166
     */
167
    uint32_t multiplier= 1;
168
    int32_t x= usecond_len;
169
    while (x < 6)
170
    {
171
      multiplier*= 10;
172
      ++x;
173
    }
174
    to->_useconds= atoi(copy_data.substr(usecond_start, usecond_len).c_str()) * multiplier;
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
175
  }
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
176
  if (_nsecond_part_index > 1)
177
  {
178
    size_t nsecond_start= _match_vector[_nsecond_part_index];
179
    size_t nsecond_len= _match_vector[_nsecond_part_index + 1] - _match_vector[_nsecond_part_index];
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
180
    /* 
181
     * For nanoseconds, which are 1 billionth of a second, 
182
     * we must ensure that we produce a correct result, 
183
     * even if < 9 places were specified.  For instance, if we get .1, 
184
     * we must produce 100000000. .11 should produce 110000000, etc.
185
     */
186
    uint32_t multiplier= 1;
187
    int32_t x= nsecond_len;
188
    while (x < 9)
189
    {
190
      multiplier*= 10;
191
      ++x;
192
    }
193
    to->_nseconds= atoi(copy_data.substr(nsecond_start, nsecond_len).c_str()) * multiplier;
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
194
  }
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
195
  return true;
196
}
197
198
} /* end namespace drizzled */
199
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
200
#define COUNT_KNOWN_FORMATS 14
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
201
202
struct temporal_format_args
203
{
204
  const char *pattern;
205
  int32_t year_part_index;
206
  int32_t month_part_index;
207
  int32_t day_part_index;
208
  int32_t hour_part_index;
209
  int32_t minute_part_index;
210
  int32_t second_part_index;
211
  int32_t usecond_part_index;
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
212
  int32_t nsecond_part_index;
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
213
};
214
215
/**
216
 * A collection of all known format strings.
217
 *
218
 * @note
219
 *
220
 * IMPORTANT: Make sure TIMESTAMP and DATETIME formats precede DATE formats and TIME formats, 
221
 * as the matching functionality matches on the first hit.
222
 *
223
 * @note 
224
 *
225
 * Remember to increment COUNT_KNOWN_FORMATS when you add a known format!
226
 */
227
static struct temporal_format_args __format_args[COUNT_KNOWN_FORMATS]= 
228
{
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
229
  {"^(\\d{4})(\\d{2})(\\d{2})(\\d{2})(\\d{2})(\\d{2})\\.(\\d{1,6})$", 1, 2, 3, 4, 5, 6, 7, 0} /* YYYYMMDDHHmmSS.uuuuuu */
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
230
, {"^(\\d{4})(\\d{2})(\\d{2})(\\d{2})(\\d{2})(\\d{2})$", 1, 2, 3, 4, 5, 6, 0, 0} /* YYYYMMDDHHmmSS */
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
231
, {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})[T|\\s+](\\d{2}):(\\d{2}):(\\d{2})\\.(\\d{1,6})$", 1, 2, 3, 4, 5, 6, 7, 0} /* YYYY[/-.]MM[/-.]DD[T]HH:mm:SS.uuuuuu */
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
232
, {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})[T|\\s+](\\d{2}):(\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 6, 0, 0} /* YYYY[/-.]MM[/-.]DD[T]HH:mm:SS */
233
, {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YYYY-MM-DD, YYYY.MM.DD, YYYY/MM/DD */
234
, {"^(\\d{4})(\\d{2})(\\d{2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YYYYMMDD */
235
, {"^(\\d{2})[-/.]*(\\d{2})[-/.]*(\\d{4})$", 3, 1, 2, 0, 0, 0, 0, 0} /* MM[-/.]DD[-/.]YYYY (US common format)*/
236
, {"^(\\d{2})[-/.]*(\\d{2})[-/.]*(\\d{2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YY[-/.]MM[-/.]DD */
237
, {"^(\\d{2})[-/.]*(\\d{1,2})[-/.]*(\\d{1,2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YY[-/.][M]M[-/.][D]D */
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
238
, {"^(\\d{2}):*(\\d{2}):*(\\d{2})\\.(\\d{1,6})$", 0, 0, 0, 1, 2, 3, 4, 0} /* HHmmSS.uuuuuu, HH:mm:SS.uuuuuu */
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
239
, {"^(\\d{1,2}):*(\\d{2}):*(\\d{2})$", 0, 0, 0, 1, 2, 3, 0, 0} /* [H]HmmSS, [H]H:mm:SS */
240
, {"^(\\d{1,2}):*(\\d{2})$", 0, 0, 0, 0, 1, 2, 0, 0} /* [m]mSS, [m]m:SS */
241
, {"^(\\d{1,2})$", 0, 0, 0, 0, 0, 1, 0, 0} /* SS, S */
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
242
, {"^(\\d{1,2})\\.(\\d{1,6})$", 0, 0, 0, 0, 0, 1, 2, 0} /* [S]S.uuuuuu */
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
243
};
244
245
std::vector<drizzled::TemporalFormat*> known_datetime_formats;
246
std::vector<drizzled::TemporalFormat*> known_date_formats;
247
std::vector<drizzled::TemporalFormat*> known_time_formats;
248
249
/**
250
 * We allocate and initialize all known date/time formats.
251
 *
252
 * @TODO Cut down calls to new. Allocate as a block...
253
 */
254
bool init_temporal_formats()
255
{
256
  /* Compile all the regular expressions for the datetime formats */
257
  drizzled::TemporalFormat *tmp;
258
  struct temporal_format_args current_format_args;
259
  int32_t x;
260
  
261
  for (x= 0; x<COUNT_KNOWN_FORMATS; ++x)
262
  {
263
    current_format_args= __format_args[x];
264
    tmp= new drizzled::TemporalFormat(current_format_args.pattern);
265
    tmp->set_year_part_index(current_format_args.year_part_index);
266
    tmp->set_month_part_index(current_format_args.month_part_index);
267
    tmp->set_day_part_index(current_format_args.day_part_index);
268
    tmp->set_hour_part_index(current_format_args.hour_part_index);
269
    tmp->set_minute_part_index(current_format_args.minute_part_index);
270
    tmp->set_second_part_index(current_format_args.second_part_index);
271
    tmp->set_usecond_part_index(current_format_args.usecond_part_index);
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
272
    tmp->set_nsecond_part_index(current_format_args.nsecond_part_index);
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
273
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
274
    if (current_format_args.year_part_index > 0) /* A date must have a year */
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
275
    {
276
      known_datetime_formats.push_back(tmp);
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
277
      if (current_format_args.second_part_index == 0) /* A time must have seconds. */
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
278
        known_date_formats.push_back(tmp);
279
    }
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
280
    if (current_format_args.second_part_index > 0) /* A time must have seconds, but may not have minutes or hours */
281
      if (current_format_args.year_part_index == 0) /* A time may not have a date part, and date parts must have a year */
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
282
        known_time_formats.push_back(tmp);
283
  }
284
  return true;
285
}