~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2003 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/* Some useful string utility functions used by the MySQL server */
17
18
#include "mysql_priv.h"
19
20
/*
21
  Return bitmap for strings used in a set
22
23
  SYNOPSIS
24
  find_set()
25
  lib			Strings in set
26
  str			Strings of set-strings separated by ','
27
  err_pos		If error, set to point to start of wrong set string
28
  err_len		If error, set to the length of wrong set string
29
  set_warning		Set to 1 if some string in set couldn't be used
30
31
  NOTE
32
    We delete all end space from str before comparison
33
34
  RETURN
35
    bitmap of all sets found in x.
36
    set_warning is set to 1 if there was any sets that couldn't be set
37
*/
38
39
static const char field_separator=',';
40
151 by Brian Aker
Ulonglong to uint64_t
41
uint64_t find_set(TYPELIB *lib, const char *str, uint length, CHARSET_INFO *cs,
1 by brian
clean slate
42
                   char **err_pos, uint *err_len, bool *set_warning)
43
{
44
  CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
45
  const char *end= str + strip->cset->lengthsp(strip, str, length);
151 by Brian Aker
Ulonglong to uint64_t
46
  uint64_t found= 0;
1 by brian
clean slate
47
  *err_pos= 0;                  // No error yet
48
  if (str != end)
49
  {
50
    const char *start= str;    
51
    for (;;)
52
    {
53
      const char *pos= start;
54
      uint var_len;
55
      int mblen= 1;
56
57
      if (cs && cs->mbminlen > 1)
58
      {
59
        for ( ; pos < end; pos+= mblen)
60
        {
61
          my_wc_t wc;
62
          if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos, 
63
                                               (const uchar *) end)) < 1)
64
            mblen= 1; // Not to hang on a wrong multibyte sequence
65
          if (wc == (my_wc_t) field_separator)
66
            break;
67
        }
68
      }
69
      else
70
        for (; pos != end && *pos != field_separator; pos++) ;
71
      var_len= (uint) (pos - start);
72
      uint find= cs ? find_type2(lib, start, var_len, cs) :
73
                      find_type(lib, start, var_len, (bool) 0);
74
      if (!find)
75
      {
76
        *err_pos= (char*) start;
77
        *err_len= var_len;
78
        *set_warning= 1;
79
      }
80
      else
152 by Brian Aker
longlong replacement
81
        found|= ((int64_t) 1 << (find - 1));
1 by brian
clean slate
82
      if (pos >= end)
83
        break;
84
      start= pos + mblen;
85
    }
86
  }
87
  return found;
88
}
89
90
91
/*
92
  Function to find a string in a TYPELIB
93
  (Same format as mysys/typelib.c)
94
95
  SYNOPSIS
96
   find_type()
97
   lib			TYPELIB (struct of pointer to values + count)
98
   find			String to find
99
   length		Length of string to find
100
   part_match		Allow part matching of value
101
102
 RETURN
103
  0 error
104
  > 0 position in TYPELIB->type_names +1
105
*/
106
107
uint find_type(const TYPELIB *lib, const char *find, uint length,
108
               bool part_match)
109
{
110
  uint found_count=0, found_pos=0;
111
  const char *end= find+length;
112
  const char *i;
113
  const char *j;
114
  for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
115
  {
116
    for (i=find ; i != end && 
117
	   my_toupper(system_charset_info,*i) == 
118
	   my_toupper(system_charset_info,*j) ; i++, j++) ;
119
    if (i == end)
120
    {
121
      if (! *j)
122
	return(pos);
123
      found_count++;
124
      found_pos= pos;
125
    }
126
  }
127
  return(found_count == 1 && part_match ? found_pos : 0);
128
}
129
130
131
/*
132
  Find a string in a list of strings according to collation
133
134
  SYNOPSIS
135
   find_type2()
136
   lib			TYPELIB (struct of pointer to values + count)
137
   x			String to find
138
   length               String length
139
   cs			Character set + collation to use for comparison
140
141
  NOTES
142
143
  RETURN
144
    0	No matching value
145
    >0  Offset+1 in typelib for matched string
146
*/
147
148
uint find_type2(const TYPELIB *typelib, const char *x, uint length,
149
                CHARSET_INFO *cs)
150
{
151
  int pos;
152
  const char *j;
153
154
  if (!typelib->count)
155
  {
51.1.69 by Jay Pipes
Removed/replaced DBUG symbols
156
    return(0);
1 by brian
clean slate
157
  }
158
159
  for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
160
  {
161
    if (!my_strnncoll(cs, (const uchar*) x, length,
162
                          (const uchar*) j, typelib->type_lengths[pos]))
51.1.69 by Jay Pipes
Removed/replaced DBUG symbols
163
      return(pos+1);
1 by brian
clean slate
164
  }
51.1.69 by Jay Pipes
Removed/replaced DBUG symbols
165
  return(0);
1 by brian
clean slate
166
} /* find_type */
167
168
169
/*
170
  Un-hex all elements in a typelib
171
172
  SYNOPSIS
173
   unhex_type2()
174
   interval       TYPELIB (struct of pointer to values + lengths + count)
175
176
  NOTES
177
178
  RETURN
179
    N/A
180
*/
181
182
void unhex_type2(TYPELIB *interval)
183
{
184
  for (uint pos= 0; pos < interval->count; pos++)
185
  {
186
    char *from, *to;
187
    for (from= to= (char*) interval->type_names[pos]; *from; )
188
    {
189
      /*
190
        Note, hexchar_to_int(*from++) doesn't work
191
        one some compilers, e.g. IRIX. Looks like a compiler
192
        bug in inline functions in combination with arguments
193
        that have a side effect. So, let's use from[0] and from[1]
194
        and increment 'from' by two later.
195
      */
196
197
      *to++= (char) (hexchar_to_int(from[0]) << 4) +
198
                     hexchar_to_int(from[1]);
199
      from+= 2;
200
    }
201
    interval->type_lengths[pos] /= 2;
202
  }
203
}
204
205
206
/*
207
  Check if the first word in a string is one of the ones in TYPELIB
208
209
  SYNOPSIS
210
    check_word()
211
    lib		TYPELIB
212
    val		String to check
213
    end		End of input
214
    end_of_word	Store value of last used byte here if we found word
215
216
  RETURN
217
    0	 No matching value
218
    > 1  lib->type_names[#-1] matched
219
	 end_of_word will point to separator character/end in 'val'
220
*/
221
222
uint check_word(TYPELIB *lib, const char *val, const char *end,
223
		const char **end_of_word)
224
{
225
  int res;
226
  const char *ptr;
227
228
  /* Fiend end of word */
229
  for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
230
    ;
231
  if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
232
    *end_of_word= ptr;
233
  return res;
234
}
235
236
237
/*
238
  Converts a string between character sets
239
240
  SYNOPSIS
241
    strconvert()
242
    from_cs       source character set
243
    from          source, a null terminated string
244
    to            destination buffer
245
    to_length     destination buffer length
246
247
  NOTES
248
    'to' is always terminated with a '\0' character.
249
    If there is no enough space to convert whole string,
250
    only prefix is converted, and terminated with '\0'.
251
252
  RETURN VALUES
253
    result string length
254
*/
255
256
257
uint strconvert(CHARSET_INFO *from_cs, const char *from,
258
                CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors)
259
{
260
  int cnvres;
261
  my_wc_t wc;
262
  char *to_start= to;
263
  uchar *to_end= (uchar*) to + to_length - 1;
264
  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
265
  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
266
  uint error_count= 0;
267
268
  while (1)
269
  {
270
    /*
271
      Using 'from + 10' is safe:
272
      - it is enough to scan a single character in any character set.
273
      - if remaining string is shorter than 10, then mb_wc will return
274
        with error because of unexpected '\0' character.
275
    */
276
    if ((cnvres= (*mb_wc)(from_cs, &wc,
277
                          (uchar*) from, (uchar*) from + 10)) > 0)
278
    {
279
      if (!wc)
280
        break;
281
      from+= cnvres;
282
    }
283
    else if (cnvres == MY_CS_ILSEQ)
284
    {
285
      error_count++;
286
      from++;
287
      wc= '?';
288
    }
289
    else
290
      break; // Impossible char.
291
292
outp:
293
294
    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
295
      to+= cnvres;
296
    else if (cnvres == MY_CS_ILUNI && wc != '?')
297
    {
298
      error_count++;
299
      wc= '?';
300
      goto outp;
301
    }
302
    else
303
      break;
304
  }
305
  *to= '\0';
306
  *errors= error_count;
205 by Brian Aker
uint32 -> uin32_t
307
  return (uint32_t) (to - to_start);
1 by brian
clean slate
308
309
}
310
311
312
/*
313
  Searches for a LEX_STRING in an LEX_STRING array.
314
315
  SYNOPSIS
316
    find_string_in_array()
317
      heap    The array
318
      needle  The string to search for
319
320
  NOTE
321
    The last LEX_STRING in the array should have str member set to NULL
322
323
  RETURN VALUES
324
    -1   Not found
325
    >=0  Ordinal position
326
*/
327
328
int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
329
                         CHARSET_INFO * const cs)
330
{
331
  const LEX_STRING *pos;
332
  for (pos= haystack; pos->str; pos++)
333
    if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
334
                               (uchar *) needle->str, needle->length, 0))
335
    {
336
      return (pos - haystack);
337
    }
338
  return -1;
339
}