~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2003 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/* Some useful string utility functions used by the MySQL server */
17
18
#include "mysql_priv.h"
19
20
/*
21
  Return bitmap for strings used in a set
22
23
  SYNOPSIS
24
  find_set()
25
  lib			Strings in set
26
  str			Strings of set-strings separated by ','
27
  err_pos		If error, set to point to start of wrong set string
28
  err_len		If error, set to the length of wrong set string
29
  set_warning		Set to 1 if some string in set couldn't be used
30
31
  NOTE
32
    We delete all end space from str before comparison
33
34
  RETURN
35
    bitmap of all sets found in x.
36
    set_warning is set to 1 if there was any sets that couldn't be set
37
*/
38
39
static const char field_separator=',';
40
41
ulonglong find_set(TYPELIB *lib, const char *str, uint length, CHARSET_INFO *cs,
42
                   char **err_pos, uint *err_len, bool *set_warning)
43
{
44
  CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
45
  const char *end= str + strip->cset->lengthsp(strip, str, length);
46
  ulonglong found= 0;
47
  *err_pos= 0;                  // No error yet
48
  if (str != end)
49
  {
50
    const char *start= str;    
51
    for (;;)
52
    {
53
      const char *pos= start;
54
      uint var_len;
55
      int mblen= 1;
56
57
      if (cs && cs->mbminlen > 1)
58
      {
59
        for ( ; pos < end; pos+= mblen)
60
        {
61
          my_wc_t wc;
62
          if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos, 
63
                                               (const uchar *) end)) < 1)
64
            mblen= 1; // Not to hang on a wrong multibyte sequence
65
          if (wc == (my_wc_t) field_separator)
66
            break;
67
        }
68
      }
69
      else
70
        for (; pos != end && *pos != field_separator; pos++) ;
71
      var_len= (uint) (pos - start);
72
      uint find= cs ? find_type2(lib, start, var_len, cs) :
73
                      find_type(lib, start, var_len, (bool) 0);
74
      if (!find)
75
      {
76
        *err_pos= (char*) start;
77
        *err_len= var_len;
78
        *set_warning= 1;
79
      }
80
      else
81
        found|= ((longlong) 1 << (find - 1));
82
      if (pos >= end)
83
        break;
84
      start= pos + mblen;
85
    }
86
  }
87
  return found;
88
}
89
90
91
/*
92
  Function to find a string in a TYPELIB
93
  (Same format as mysys/typelib.c)
94
95
  SYNOPSIS
96
   find_type()
97
   lib			TYPELIB (struct of pointer to values + count)
98
   find			String to find
99
   length		Length of string to find
100
   part_match		Allow part matching of value
101
102
 RETURN
103
  0 error
104
  > 0 position in TYPELIB->type_names +1
105
*/
106
107
uint find_type(const TYPELIB *lib, const char *find, uint length,
108
               bool part_match)
109
{
110
  uint found_count=0, found_pos=0;
111
  const char *end= find+length;
112
  const char *i;
113
  const char *j;
114
  for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
115
  {
116
    for (i=find ; i != end && 
117
	   my_toupper(system_charset_info,*i) == 
118
	   my_toupper(system_charset_info,*j) ; i++, j++) ;
119
    if (i == end)
120
    {
121
      if (! *j)
122
	return(pos);
123
      found_count++;
124
      found_pos= pos;
125
    }
126
  }
127
  return(found_count == 1 && part_match ? found_pos : 0);
128
}
129
130
131
/*
132
  Find a string in a list of strings according to collation
133
134
  SYNOPSIS
135
   find_type2()
136
   lib			TYPELIB (struct of pointer to values + count)
137
   x			String to find
138
   length               String length
139
   cs			Character set + collation to use for comparison
140
141
  NOTES
142
143
  RETURN
144
    0	No matching value
145
    >0  Offset+1 in typelib for matched string
146
*/
147
148
uint find_type2(const TYPELIB *typelib, const char *x, uint length,
149
                CHARSET_INFO *cs)
150
{
151
  int pos;
152
  const char *j;
153
  DBUG_ENTER("find_type2");
154
  DBUG_PRINT("enter",("x: '%.*s'  lib: 0x%lx", length, x, (long) typelib));
155
156
  if (!typelib->count)
157
  {
158
    DBUG_PRINT("exit",("no count"));
159
    DBUG_RETURN(0);
160
  }
161
162
  for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
163
  {
164
    if (!my_strnncoll(cs, (const uchar*) x, length,
165
                          (const uchar*) j, typelib->type_lengths[pos]))
166
      DBUG_RETURN(pos+1);
167
  }
168
  DBUG_PRINT("exit",("Couldn't find type"));
169
  DBUG_RETURN(0);
170
} /* find_type */
171
172
173
/*
174
  Un-hex all elements in a typelib
175
176
  SYNOPSIS
177
   unhex_type2()
178
   interval       TYPELIB (struct of pointer to values + lengths + count)
179
180
  NOTES
181
182
  RETURN
183
    N/A
184
*/
185
186
void unhex_type2(TYPELIB *interval)
187
{
188
  for (uint pos= 0; pos < interval->count; pos++)
189
  {
190
    char *from, *to;
191
    for (from= to= (char*) interval->type_names[pos]; *from; )
192
    {
193
      /*
194
        Note, hexchar_to_int(*from++) doesn't work
195
        one some compilers, e.g. IRIX. Looks like a compiler
196
        bug in inline functions in combination with arguments
197
        that have a side effect. So, let's use from[0] and from[1]
198
        and increment 'from' by two later.
199
      */
200
201
      *to++= (char) (hexchar_to_int(from[0]) << 4) +
202
                     hexchar_to_int(from[1]);
203
      from+= 2;
204
    }
205
    interval->type_lengths[pos] /= 2;
206
  }
207
}
208
209
210
/*
211
  Check if the first word in a string is one of the ones in TYPELIB
212
213
  SYNOPSIS
214
    check_word()
215
    lib		TYPELIB
216
    val		String to check
217
    end		End of input
218
    end_of_word	Store value of last used byte here if we found word
219
220
  RETURN
221
    0	 No matching value
222
    > 1  lib->type_names[#-1] matched
223
	 end_of_word will point to separator character/end in 'val'
224
*/
225
226
uint check_word(TYPELIB *lib, const char *val, const char *end,
227
		const char **end_of_word)
228
{
229
  int res;
230
  const char *ptr;
231
232
  /* Fiend end of word */
233
  for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
234
    ;
235
  if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
236
    *end_of_word= ptr;
237
  return res;
238
}
239
240
241
/*
242
  Converts a string between character sets
243
244
  SYNOPSIS
245
    strconvert()
246
    from_cs       source character set
247
    from          source, a null terminated string
248
    to            destination buffer
249
    to_length     destination buffer length
250
251
  NOTES
252
    'to' is always terminated with a '\0' character.
253
    If there is no enough space to convert whole string,
254
    only prefix is converted, and terminated with '\0'.
255
256
  RETURN VALUES
257
    result string length
258
*/
259
260
261
uint strconvert(CHARSET_INFO *from_cs, const char *from,
262
                CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors)
263
{
264
  int cnvres;
265
  my_wc_t wc;
266
  char *to_start= to;
267
  uchar *to_end= (uchar*) to + to_length - 1;
268
  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
269
  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
270
  uint error_count= 0;
271
272
  while (1)
273
  {
274
    /*
275
      Using 'from + 10' is safe:
276
      - it is enough to scan a single character in any character set.
277
      - if remaining string is shorter than 10, then mb_wc will return
278
        with error because of unexpected '\0' character.
279
    */
280
    if ((cnvres= (*mb_wc)(from_cs, &wc,
281
                          (uchar*) from, (uchar*) from + 10)) > 0)
282
    {
283
      if (!wc)
284
        break;
285
      from+= cnvres;
286
    }
287
    else if (cnvres == MY_CS_ILSEQ)
288
    {
289
      error_count++;
290
      from++;
291
      wc= '?';
292
    }
293
    else
294
      break; // Impossible char.
295
296
outp:
297
298
    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
299
      to+= cnvres;
300
    else if (cnvres == MY_CS_ILUNI && wc != '?')
301
    {
302
      error_count++;
303
      wc= '?';
304
      goto outp;
305
    }
306
    else
307
      break;
308
  }
309
  *to= '\0';
310
  *errors= error_count;
311
  return (uint32) (to - to_start);
312
313
}
314
315
316
/*
317
  Searches for a LEX_STRING in an LEX_STRING array.
318
319
  SYNOPSIS
320
    find_string_in_array()
321
      heap    The array
322
      needle  The string to search for
323
324
  NOTE
325
    The last LEX_STRING in the array should have str member set to NULL
326
327
  RETURN VALUES
328
    -1   Not found
329
    >=0  Ordinal position
330
*/
331
332
int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
333
                         CHARSET_INFO * const cs)
334
{
335
  const LEX_STRING *pos;
336
  for (pos= haystack; pos->str; pos++)
337
    if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
338
                               (uchar *) needle->str, needle->length, 0))
339
    {
340
      return (pos - haystack);
341
    }
342
  return -1;
343
}