~drizzle-trunk/drizzle/development

813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
1
/* - mode: c; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2
 *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3
 *
1999.6.1 by kalebral at gmail
update Copyright strings to a more common format to help with creating the master debian copyright file
4
 *  Copyright (C) 2008 Sun Microsystems, Inc.
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
5
 *
6
 *  Authors:
7
 *
8
 *  Jay Pipes <jay.pipes@sun.com>
9
 *
10
 *  This program is free software; you can redistribute it and/or modify
11
 *  it under the terms of the GNU General Public License as published by
12
 *  the Free Software Foundation; either version 2 of the License, or
13
 *  (at your option) any later version.
14
 *
15
 *  This program is distributed in the hope that it will be useful,
16
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18
 *  GNU General Public License for more details.
19
 *
20
 *  You should have received a copy of the GNU General Public License
21
 *  along with this program; if not, write to the Free Software
22
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
23
 */
24
25
/**
26
 * @file 
27
 *
28
 * Implementation of the server's date and time string matching utility.
29
 */
30
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
31
#include <config.h>
873.1.3 by Jay Pipes
Fixed PCRE header and include order.
32
2318.6.63 by Olaf van der Spek
Refactor
33
#include <boost/foreach.hpp>
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
34
#include <drizzled/temporal_format.h>
35
#include <drizzled/temporal.h>
873.1.3 by Jay Pipes
Fixed PCRE header and include order.
36
873.1.1 by Jay Pipes
Fixes the Field_date class to not allow any invalid input at
37
#include <string.h>
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
38
#include PCRE_HEADER
39
40
#include <string>
873.1.1 by Jay Pipes
Fixes the Field_date class to not allow any invalid input at
41
#include <vector>
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
42
43
using namespace std;
859.1.6 by Monty Taylor
Fix for multi-versions of PCRE thing.
44
2318.6.63 by Olaf van der Spek
Refactor
45
namespace drizzled {
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
46
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
47
TemporalFormat::TemporalFormat(const char *pattern) :
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
48
  _pattern(pattern)
49
, _error_offset(0)
50
, _error(NULL)
51
, _year_part_index(0)
52
, _month_part_index(0)
53
, _day_part_index(0)
54
, _hour_part_index(0)
55
, _minute_part_index(0)
56
, _second_part_index(0)
57
, _usecond_part_index(0)
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
58
, _nsecond_part_index(0)
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
59
{
60
  /* Compile our regular expression */
61
  _re= pcre_compile(pattern
62
                    , 0 /* Default options */
63
                    , &_error
64
                    , &_error_offset
65
                    , NULL /* Use default character table */
66
                    );
67
}
68
2363.1.1 by Brian Aker
Fix memory leak in temporal and json server.
69
TemporalFormat::~TemporalFormat()
70
{
71
  pcre_free(_re);
72
}
73
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
74
bool TemporalFormat::matches(const char *data, size_t data_len, Temporal *to)
75
{
76
  if (! is_valid()) 
77
    return false;
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
78
79
  int32_t match_vector[OUT_VECTOR_SIZE]; /**< Stores match substring indexes */
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
80
  
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
81
  /* Make sure we've got no junk in the match_vector. */
82
  memset(match_vector, 0, sizeof(match_vector));
83
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
84
  /* Simply check the subject against the compiled regular expression */
85
  int32_t result= pcre_exec(_re
86
                            , NULL /* No extra data */
87
                            , data
88
                            , data_len
89
                            , 0 /* Start at offset 0 of subject...*/
90
                            , 0 /* Default options */
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
91
                            , match_vector
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
92
                            , OUT_VECTOR_SIZE
93
                            );
94
  if (result < 0)
95
  {
96
    switch (result)
97
    {
98
      case PCRE_ERROR_NOMATCH:
99
        return false; /* No match, just return false */
100
      default:
101
        return false;
102
    }
103
    return false;
104
  }
105
106
  int32_t expected_match_count= (_year_part_index > 1 ? 1 : 0)
107
                              + (_month_part_index > 1 ? 1 : 0)
108
                              + (_day_part_index > 1 ? 1 : 0)
109
                              + (_hour_part_index > 1 ? 1 : 0)
110
                              + (_minute_part_index > 1 ? 1 : 0)
111
                              + (_second_part_index > 1 ? 1 : 0)
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
112
                              + (_usecond_part_index > 1 ? 1 : 0)
113
                              + (_nsecond_part_index > 1 ? 1 : 0)
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
114
                              + 1; /* Add one for the entire match... */
115
  if (result != expected_match_count)
116
    return false;
117
118
  /* C++ string class easy to use substr() method is very useful here */
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
119
  string copy_data(data, data_len);
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
120
  /* 
121
   * OK, we have the expected substring matches, so grab
122
   * the various temporal parts from the subject string
123
   *
124
   * @note 
125
   *
126
   * TemporalFormatMatch is a friend class to Temporal, so
127
   * we can access the temporal instance's protected data.
128
   */
129
  if (_year_part_index > 1)
130
  {
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
131
    size_t year_start= match_vector[_year_part_index];
132
    size_t year_len= match_vector[_year_part_index + 1] - match_vector[_year_part_index];
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
133
    to->_years= atoi(copy_data.substr(year_start, year_len).c_str());
134
    if (year_len == 2)
135
      to->_years+= (to->_years >= DRIZZLE_YY_PART_YEAR ? 1900 : 2000);
136
  }
137
  if (_month_part_index > 1)
138
  {
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
139
    size_t month_start= match_vector[_month_part_index];
140
    size_t month_len= match_vector[_month_part_index + 1] - match_vector[_month_part_index];
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
141
    to->_months= atoi(copy_data.substr(month_start, month_len).c_str());
142
  }
143
  if (_day_part_index > 1)
144
  {
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
145
    size_t day_start= match_vector[_day_part_index];
146
    size_t day_len= match_vector[_day_part_index + 1] - match_vector[_day_part_index];
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
147
    to->_days= atoi(copy_data.substr(day_start, day_len).c_str());
148
  }
149
  if (_hour_part_index > 1)
150
  {
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
151
    size_t hour_start= match_vector[_hour_part_index];
152
    size_t hour_len= match_vector[_hour_part_index + 1] - match_vector[_hour_part_index];
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
153
    to->_hours= atoi(copy_data.substr(hour_start, hour_len).c_str());
154
  }
155
  if (_minute_part_index > 1)
156
  {
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
157
    size_t minute_start= match_vector[_minute_part_index];
158
    size_t minute_len= match_vector[_minute_part_index + 1] - match_vector[_minute_part_index];
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
159
    to->_minutes= atoi(copy_data.substr(minute_start, minute_len).c_str());
160
  }
161
  if (_second_part_index > 1)
162
  {
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
163
    size_t second_start= match_vector[_second_part_index];
164
    size_t second_len= match_vector[_second_part_index + 1] - match_vector[_second_part_index];
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
165
    to->_seconds= atoi(copy_data.substr(second_start, second_len).c_str());
166
  }
167
  if (_usecond_part_index > 1)
168
  {
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
169
    size_t usecond_start= match_vector[_usecond_part_index];
170
    size_t usecond_len= match_vector[_usecond_part_index + 1] - match_vector[_usecond_part_index];
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
171
    /* 
172
     * For microseconds, which are millionth of 1 second, 
173
     * we must ensure that we produce a correct result, 
174
     * even if < 6 places were specified.  For instance, if we get .1, 
175
     * we must produce 100000. .11 should produce 110000, etc.
176
     */
177
    uint32_t multiplier= 1;
178
    int32_t x= usecond_len;
179
    while (x < 6)
180
    {
181
      multiplier*= 10;
182
      ++x;
183
    }
184
    to->_useconds= atoi(copy_data.substr(usecond_start, usecond_len).c_str()) * multiplier;
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
185
  }
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
186
  if (_nsecond_part_index > 1)
187
  {
1252.1.1 by Jay Pipes
Fixes LP Bug #500031:
188
    size_t nsecond_start= match_vector[_nsecond_part_index];
189
    size_t nsecond_len= match_vector[_nsecond_part_index + 1] - match_vector[_nsecond_part_index];
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
190
    /* 
191
     * For nanoseconds, which are 1 billionth of a second, 
192
     * we must ensure that we produce a correct result, 
193
     * even if < 9 places were specified.  For instance, if we get .1, 
194
     * we must produce 100000000. .11 should produce 110000000, etc.
195
     */
196
    uint32_t multiplier= 1;
197
    int32_t x= nsecond_len;
198
    while (x < 9)
199
    {
200
      multiplier*= 10;
201
      ++x;
202
    }
203
    to->_nseconds= atoi(copy_data.substr(nsecond_start, nsecond_len).c_str()) * multiplier;
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
204
  }
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
205
  return true;
206
}
207
208
907.1.7 by Jay Pipes
Merged in remove-timezone work
209
#define COUNT_KNOWN_FORMATS 19
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
210
211
struct temporal_format_args
212
{
213
  const char *pattern;
214
  int32_t year_part_index;
215
  int32_t month_part_index;
216
  int32_t day_part_index;
217
  int32_t hour_part_index;
218
  int32_t minute_part_index;
219
  int32_t second_part_index;
220
  int32_t usecond_part_index;
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
221
  int32_t nsecond_part_index;
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
222
};
223
224
/**
225
 * A collection of all known format strings.
226
 *
227
 * @note
228
 *
229
 * IMPORTANT: Make sure TIMESTAMP and DATETIME formats precede DATE formats and TIME formats, 
230
 * as the matching functionality matches on the first hit.
231
 *
232
 * @note 
233
 *
234
 * Remember to increment COUNT_KNOWN_FORMATS when you add a known format!
235
 */
236
static struct temporal_format_args __format_args[COUNT_KNOWN_FORMATS]= 
237
{
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
238
  {"^(\\d{4})(\\d{2})(\\d{2})(\\d{2})(\\d{2})(\\d{2})\\.(\\d{1,6})$", 1, 2, 3, 4, 5, 6, 7, 0} /* YYYYMMDDHHmmSS.uuuuuu */
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
239
, {"^(\\d{4})(\\d{2})(\\d{2})(\\d{2})(\\d{2})(\\d{2})$", 1, 2, 3, 4, 5, 6, 0, 0} /* YYYYMMDDHHmmSS */
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
240
, {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})[T|\\s+](\\d{2}):(\\d{2}):(\\d{2})\\.(\\d{1,6})$", 1, 2, 3, 4, 5, 6, 7, 0} /* YYYY[/-.]MM[/-.]DD[T]HH:mm:SS.uuuuuu */
907.1.7 by Jay Pipes
Merged in remove-timezone work
241
, {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})[T|\\s+](\\d{2}):(\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 6, 0, 0} /* YYYY[/-.][M]M[/-.][D]D[T]HH:mm:SS */
242
, {"^(\\d{2})[-/.](\\d{1,2})[-/.](\\d{1,2})[\\s+](\\d{2}):(\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 6, 0, 0} /* YY[/-.][M]M[/-.][D]D HH:mm:SS */
243
, {"^(\\d{2})[-/.](\\d{1,2})[-/.](\\d{1,2})[\\s+](\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 0, 0, 0} /* YY[/-.][M]M[/-.][D]D HH:mm */
244
, {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})[\\s+](\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 0, 0, 0} /* YYYY[/-.][M]M[/-.][D]D HH:mm */
1377.8.27 by Paweł Blokus
tests for to_decimal methods
245
, {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YYYY-[M]M-[D]D, YYYY.[M]M.[D]D, YYYY/[M]M/[D]D */ 
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
246
, {"^(\\d{4})(\\d{2})(\\d{2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YYYYMMDD */
247
, {"^(\\d{2})[-/.]*(\\d{2})[-/.]*(\\d{4})$", 3, 1, 2, 0, 0, 0, 0, 0} /* MM[-/.]DD[-/.]YYYY (US common format)*/
248
, {"^(\\d{2})[-/.]*(\\d{2})[-/.]*(\\d{2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YY[-/.]MM[-/.]DD */
249
, {"^(\\d{2})[-/.]*(\\d{1,2})[-/.]*(\\d{1,2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YY[-/.][M]M[-/.][D]D */
873.1.1 by Jay Pipes
Fixes the Field_date class to not allow any invalid input at
250
, {"^(\\d{4})[-/.]*(\\d{1,2})[-/.]*(\\d{1,2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YYYY[-/.][M]M[-/.][D]D */
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
251
, {"^(\\d{2}):*(\\d{2}):*(\\d{2})\\.(\\d{1,6})$", 0, 0, 0, 1, 2, 3, 4, 0} /* HHmmSS.uuuuuu, HH:mm:SS.uuuuuu */
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
252
, {"^(\\d{1,2}):*(\\d{2}):*(\\d{2})$", 0, 0, 0, 1, 2, 3, 0, 0} /* [H]HmmSS, [H]H:mm:SS */
873.1.1 by Jay Pipes
Fixes the Field_date class to not allow any invalid input at
253
, {"^(\\d{1,2}):(\\d{1,2}):(\\d{1,2})$", 0, 0, 0, 1, 2, 3, 0, 0} /* [H]H:[m]m:[S]S */
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
254
, {"^(\\d{1,2}):*(\\d{2})$", 0, 0, 0, 0, 1, 2, 0, 0} /* [m]mSS, [m]m:SS */
255
, {"^(\\d{1,2})$", 0, 0, 0, 0, 0, 1, 0, 0} /* SS, S */
813.1.19 by Jay Pipes
To remain in compatibility with MySQL, added ability to interpret
256
, {"^(\\d{1,2})\\.(\\d{1,6})$", 0, 0, 0, 0, 0, 1, 2, 0} /* [S]S.uuuuuu */
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
257
};
258
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
259
vector<TemporalFormat *> known_datetime_formats;
260
vector<TemporalFormat *> known_date_formats;
261
vector<TemporalFormat *> known_time_formats;
262
vector<TemporalFormat *> all_temporal_formats;
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
263
264
/**
265
 * We allocate and initialize all known date/time formats.
266
 *
267
 * @TODO Cut down calls to new. Allocate as a block...
268
 */
269
bool init_temporal_formats()
270
{
271
  /* Compile all the regular expressions for the datetime formats */
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
272
  TemporalFormat *tmp;
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
273
  struct temporal_format_args current_format_args;
274
  
2020 by Brian Aker
This takes time and turns it into a fuzzy type so that we can do
275
  for (int32_t x= 0; x < COUNT_KNOWN_FORMATS; ++x)
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
276
  {
277
    current_format_args= __format_args[x];
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
278
    tmp= new TemporalFormat(current_format_args.pattern);
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
279
    tmp->set_year_part_index(current_format_args.year_part_index);
280
    tmp->set_month_part_index(current_format_args.month_part_index);
281
    tmp->set_day_part_index(current_format_args.day_part_index);
282
    tmp->set_hour_part_index(current_format_args.hour_part_index);
283
    tmp->set_minute_part_index(current_format_args.minute_part_index);
284
    tmp->set_second_part_index(current_format_args.second_part_index);
285
    tmp->set_usecond_part_index(current_format_args.usecond_part_index);
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
286
    tmp->set_nsecond_part_index(current_format_args.nsecond_part_index);
1089.1.3 by Brian Aker
Fix protobuf to release memory. Add in assert() for wrong column usage. Fix
287
    
288
    /* 
289
     * We store the pointer in all_temporal_formats because we 
290
     * delete pointers from that vector and only that vector
291
     */
292
    all_temporal_formats.push_back(tmp); 
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
293
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
294
    if (current_format_args.year_part_index > 0) /* A date must have a year */
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
295
    {
296
      known_datetime_formats.push_back(tmp);
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
297
      if (current_format_args.second_part_index == 0) /* A time must have seconds. */
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
298
        known_date_formats.push_back(tmp);
299
    }
2019 by Brian Aker
Merge in change to allow for SQL Server style patterns.
300
813.1.12 by Jay Pipes
Fixes for SECOND() function to use new Temporal system. Because
301
    if (current_format_args.second_part_index > 0) /* A time must have seconds, but may not have minutes or hours */
2019 by Brian Aker
Merge in change to allow for SQL Server style patterns.
302
      known_time_formats.push_back(tmp);
813.1.2 by Jay Pipes
First function cleanup for temporal handling: YEAR()
303
  }
304
  return true;
305
}
1089.1.3 by Brian Aker
Fix protobuf to release memory. Add in assert() for wrong column usage. Fix
306
307
/** Free all allocated temporal formats */
308
void deinit_temporal_formats()
309
{
2318.6.63 by Olaf van der Spek
Refactor
310
  BOOST_FOREACH(TemporalFormat* it, all_temporal_formats)
311
    delete it;
1089.1.3 by Brian Aker
Fix protobuf to release memory. Add in assert() for wrong column usage. Fix
312
  known_date_formats.clear();
313
  known_datetime_formats.clear();
314
  known_time_formats.clear();
315
  all_temporal_formats.clear();
316
}
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
317
318
} /* end namespace drizzled */