~drizzle-trunk/drizzle/development

1300.4.4 by Stewart Smith
move SUBSTR, SUBSTRING and SUBSTR_INDEX to plugins. add parser hooks for substr being a plugin now.
1
/* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2
 *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3
 *
4
 *  Copyright (C) 2008 Sun Microsystems
5
 *  Copyright (C) 2010 Stewart Smith
6
 *
7
 *  This program is free software; you can redistribute it and/or modify
8
 *  it under the terms of the GNU General Public License as published by
9
 *  the Free Software Foundation; version 2 of the License.
10
 *
11
 *  This program is distributed in the hope that it will be useful,
12
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 *  GNU General Public License for more details.
15
 *
16
 *  You should have received a copy of the GNU General Public License
17
 *  along with this program; if not, write to the Free Software
18
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19
 */
20
21
#include "config.h"
22
23
#include <drizzled/plugin/function.h>
24
#include <algorithm>
25
26
using namespace std;
27
using namespace drizzled;
28
29
#include <drizzled/function/str/strfunc.h>
30
31
class SubstrFunction :public Item_str_func
32
{
33
  String tmp_value;
34
public:
35
  SubstrFunction() :Item_str_func() {}
36
37
  String *val_str(String *);
38
  void fix_length_and_dec();
39
  const char *func_name() const { return "substr"; }
40
41
  bool check_argument_count(int n) { return n == 2 || n == 3; }
42
};
43
44
45
class SubstrIndexFunction :public Item_str_func
46
{
47
  String tmp_value;
48
public:
49
  SubstrIndexFunction() :Item_str_func() {}
50
51
  String *val_str(String *);
52
  void fix_length_and_dec();
53
  const char *func_name() const { return "substring_index"; }
54
55
  bool check_argument_count(int n) { return n == 3; }
56
};
57
58
String *SubstrFunction::val_str(String *str)
59
{
60
  assert(fixed == 1);
61
  String *res  = args[0]->val_str(str);
62
  /* must be int64_t to avoid truncation */
63
  int64_t start= args[1]->val_int();
64
  /* Assumes that the maximum length of a String is < INT32_MAX. */
65
  /* Limit so that code sees out-of-bound value properly. */
66
  int64_t length= arg_count == 3 ? args[2]->val_int() : INT32_MAX;
67
  int64_t tmp_length;
68
69
  if ((null_value=(args[0]->null_value || args[1]->null_value ||
70
		   (arg_count == 3 && args[2]->null_value))))
71
    return 0;
72
73
  /* Negative or zero length, will return empty string. */
74
  if ((arg_count == 3) && (length <= 0) &&
75
      (length == 0 || !args[2]->unsigned_flag))
76
    return &my_empty_string;
77
78
  /* Assumes that the maximum length of a String is < INT32_MAX. */
79
  /* Set here so that rest of code sees out-of-bound value as such. */
80
  if ((length <= 0) || (length > INT32_MAX))
81
    length= INT32_MAX;
82
83
  /* if "unsigned_flag" is set, we have a *huge* positive number. */
84
  /* Assumes that the maximum length of a String is < INT32_MAX. */
85
  if ((!args[1]->unsigned_flag && (start < INT32_MIN || start > INT32_MAX)) ||
86
      (args[1]->unsigned_flag && ((uint64_t) start > INT32_MAX)))
87
    return &my_empty_string;
88
89
  start= ((start < 0) ? res->numchars() + start : start - 1);
90
  start= res->charpos((int) start);
91
  if ((start < 0) || ((uint) start + 1 > res->length()))
92
    return &my_empty_string;
93
94
  length= res->charpos((int) length, (uint32_t) start);
95
  tmp_length= res->length() - start;
96
  length= min(length, tmp_length);
97
98
  if (!start && (int64_t) res->length() == length)
99
    return res;
100
  tmp_value.set(*res, (uint32_t) start, (uint32_t) length);
101
  return &tmp_value;
102
}
103
104
void SubstrFunction::fix_length_and_dec()
105
{
106
  max_length=args[0]->max_length;
107
108
  collation.set(args[0]->collation);
109
  if (args[1]->const_item())
110
  {
111
    int32_t start= (int32_t) args[1]->val_int();
112
    if (start < 0)
113
      max_length= ((uint)(-start) > max_length) ? 0 : (uint)(-start);
114
    else
115
      max_length-= min((uint)(start - 1), max_length);
116
  }
117
  if (arg_count == 3 && args[2]->const_item())
118
  {
119
    int32_t length= (int32_t) args[2]->val_int();
120
    if (length <= 0)
121
      max_length=0;
122
    else
123
      set_if_smaller(max_length,(uint) length);
124
  }
125
  max_length*= collation.collation->mbmaxlen;
126
}
127
128
129
void SubstrIndexFunction::fix_length_and_dec()
130
{
131
  max_length= args[0]->max_length;
132
133
  if (agg_arg_charsets(collation, args, 2, MY_COLL_CMP_CONV, 1))
134
    return;
135
}
136
137
138
String *SubstrIndexFunction::val_str(String *str)
139
{
140
  assert(fixed == 1);
141
  String *res= args[0]->val_str(str);
142
  String *delimiter= args[1]->val_str(&tmp_value);
143
  int32_t count= (int32_t) args[2]->val_int();
144
  uint32_t offset;
145
146
  if (args[0]->null_value || args[1]->null_value || args[2]->null_value)
147
  {					// string and/or delim are null
148
    null_value=1;
149
    return 0;
150
  }
151
  null_value=0;
152
  uint32_t delimiter_length= delimiter->length();
153
  if (!res->length() || !delimiter_length || !count)
154
    return &my_empty_string;		// Wrong parameters
155
156
  res->set_charset(collation.collation);
157
158
  if (use_mb(res->charset()))
159
  {
160
    const char *ptr= res->ptr();
161
    const char *strend= ptr+res->length();
162
    const char *end= strend-delimiter_length+1;
163
    const char *search= delimiter->ptr();
164
    const char *search_end= search+delimiter_length;
165
    int32_t n=0,c=count,pass;
166
    register uint32_t l;
167
    for (pass=(count>0);pass<2;++pass)
168
    {
169
      while (ptr < end)
170
      {
171
        if (*ptr == *search)
172
        {
173
	  register char *i,*j;
174
	  i=(char*) ptr+1; j=(char*) search+1;
175
	  while (j != search_end)
176
	    if (*i++ != *j++) goto skip;
177
	  if (pass==0) ++n;
178
	  else if (!--c) break;
179
	  ptr+= delimiter_length;
180
	  continue;
181
	}
182
    skip:
183
        if ((l=my_ismbchar(res->charset(), ptr,strend))) ptr+=l;
184
        else ++ptr;
185
      } /* either not found or got total number when count<0 */
186
      if (pass == 0) /* count<0 */
187
      {
188
        c+=n+1;
189
        if (c<=0) return res; /* not found, return original string */
190
        ptr=res->ptr();
191
      }
192
      else
193
      {
194
        if (c) return res; /* Not found, return original string */
195
        if (count>0) /* return left part */
196
        {
197
	  tmp_value.set(*res,0,(ulong) (ptr-res->ptr()));
198
        }
199
        else /* return right part */
200
        {
201
	  ptr+= delimiter_length;
202
	  tmp_value.set(*res,(ulong) (ptr-res->ptr()), (ulong) (strend-ptr));
203
        }
204
      }
205
    }
206
  }
207
  else
208
  {
209
    if (count > 0)
210
    {					// start counting from the beginning
211
      for (offset=0; ; offset+= delimiter_length)
212
      {
213
        if ((int) (offset= res->strstr(*delimiter, offset)) < 0)
214
          return res;			// Didn't find, return org string
215
        if (!--count)
216
        {
217
          tmp_value.set(*res,0,offset);
218
          break;
219
        }
220
      }
221
    }
222
    else
223
    {
224
      /*
225
        Negative index, start counting at the end
226
      */
227
      for (offset=res->length(); offset ;)
228
      {
229
        /*
230
          this call will result in finding the position pointing to one
231
          address space less than where the found substring is located
232
          in res
233
        */
234
        if ((int) (offset= res->strrstr(*delimiter, offset)) < 0)
235
          return res;			// Didn't find, return org string
236
        /*
237
          At this point, we've searched for the substring
238
          the number of times as supplied by the index value
239
        */
240
        if (!++count)
241
        {
242
          offset+= delimiter_length;
243
          tmp_value.set(*res,offset,res->length()- offset);
244
          break;
245
        }
246
      }
247
    }
248
  }
249
  /*
250
    We always mark tmp_value as const so that if val_str() is called again
251
    on this object, we don't disrupt the contents of tmp_value when it was
252
    derived from another String.
253
  */
254
  tmp_value.mark_as_const();
255
  return (&tmp_value);
256
}
257
258
plugin::Create_function<SubstrFunction> *substr_function= NULL;
259
plugin::Create_function<SubstrIndexFunction> *substr_index_function= NULL;
260
1324.2.2 by Monty Taylor
Use the plugin::Context everywhere.
261
static int initialize(drizzled::plugin::Context &context)
1300.4.4 by Stewart Smith
move SUBSTR, SUBSTRING and SUBSTR_INDEX to plugins. add parser hooks for substr being a plugin now.
262
{
263
  substr_function= new plugin::Create_function<SubstrFunction>("substr");
264
  substr_index_function= new plugin::Create_function<SubstrIndexFunction>("substring_index");
1324.2.2 by Monty Taylor
Use the plugin::Context everywhere.
265
  context.add(substr_function);
266
  context.add(substr_index_function);
1300.4.4 by Stewart Smith
move SUBSTR, SUBSTRING and SUBSTR_INDEX to plugins. add parser hooks for substr being a plugin now.
267
  return 0;
268
}
269
270
DRIZZLE_DECLARE_PLUGIN
271
{
272
  DRIZZLE_VERSION_ID,
273
  "substr_functions",
274
  "1.0",
275
  "Stewart Smith",
276
  "SUBSTR and SUBSTR",
277
  PLUGIN_LICENSE_GPL,
278
  initialize, /* Plugin Init */
279
  NULL,   /* system variables */
280
  NULL    /* config options */
281
}
282
DRIZZLE_DECLARE_PLUGIN_END;