1
/* Copyright (C) 2003 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
/* Some useful string utility functions used by the MySQL server */
17
#include <drizzled/server_includes.h>
20
Return bitmap for strings used in a set
25
str Strings of set-strings separated by ','
26
err_pos If error, set to point to start of wrong set string
27
err_len If error, set to the length of wrong set string
28
set_warning Set to 1 if some string in set couldn't be used
31
We delete all end space from str before comparison
34
bitmap of all sets found in x.
35
set_warning is set to 1 if there was any sets that couldn't be set
38
static const char field_separator=',';
40
uint64_t find_set(TYPELIB *lib, const char *str, uint32_t length,
41
const CHARSET_INFO * const cs,
42
char **err_pos, uint32_t *err_len, bool *set_warning)
44
const CHARSET_INFO * const strip= cs ? cs : &my_charset_utf8_general_ci;
45
const char *end= str + strip->cset->lengthsp(strip, str, length);
47
*err_pos= 0; // No error yet
50
const char *start= str;
53
const char *pos= start;
57
if (cs && cs->mbminlen > 1)
59
for ( ; pos < end; pos+= mblen)
62
if ((mblen= cs->cset->mb_wc(cs, &wc, (const unsigned char *) pos,
63
(const unsigned char *) end)) < 1)
64
mblen= 1; // Not to hang on a wrong multibyte sequence
65
if (wc == (my_wc_t) field_separator)
70
for (; pos != end && *pos != field_separator; pos++) ;
71
var_len= (uint) (pos - start);
72
uint32_t find= cs ? find_type2(lib, start, var_len, cs) :
73
find_type(lib, start, var_len, (bool) 0);
76
*err_pos= (char*) start;
81
found|= ((int64_t) 1 << (find - 1));
92
Function to find a string in a TYPELIB
93
(Same format as mysys/typelib.c)
97
lib TYPELIB (struct of pointer to values + count)
99
length Length of string to find
100
part_match Allow part matching of value
104
> 0 position in TYPELIB->type_names +1
107
uint32_t find_type(const TYPELIB *lib, const char *find, uint32_t length,
110
uint32_t found_count=0, found_pos=0;
111
const char *end= find+length;
114
for (uint32_t pos=0 ; (j=lib->type_names[pos++]) ; )
116
for (i=find ; i != end &&
117
my_toupper(system_charset_info,*i) ==
118
my_toupper(system_charset_info,*j) ; i++, j++) ;
127
return(found_count == 1 && part_match ? found_pos : 0);
132
Find a string in a list of strings according to collation
136
lib TYPELIB (struct of pointer to values + count)
139
cs Character set + collation to use for comparison
145
>0 Offset+1 in typelib for matched string
148
uint32_t find_type2(const TYPELIB *typelib, const char *x, uint32_t length,
149
const CHARSET_INFO * const cs)
159
for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
161
if (!my_strnncoll(cs, (const unsigned char*) x, length,
162
(const unsigned char*) j, typelib->type_lengths[pos]))
170
Un-hex all elements in a typelib
174
interval TYPELIB (struct of pointer to values + lengths + count)
182
void unhex_type2(TYPELIB *interval)
184
for (uint32_t pos= 0; pos < interval->count; pos++)
187
for (from= to= (char*) interval->type_names[pos]; *from; )
190
Note, hexchar_to_int(*from++) doesn't work
191
one some compilers, e.g. IRIX. Looks like a compiler
192
bug in inline functions in combination with arguments
193
that have a side effect. So, let's use from[0] and from[1]
194
and increment 'from' by two later.
197
*to++= (char) (hexchar_to_int(from[0]) << 4) +
198
hexchar_to_int(from[1]);
201
interval->type_lengths[pos] /= 2;
207
Check if the first word in a string is one of the ones in TYPELIB
214
end_of_word Store value of last used byte here if we found word
218
> 1 lib->type_names[#-1] matched
219
end_of_word will point to separator character/end in 'val'
222
uint32_t check_word(TYPELIB *lib, const char *val, const char *end,
223
const char **end_of_word)
228
/* Fiend end of word */
229
for (ptr= val ; ptr < end && my_isalpha(&my_charset_utf8_general_ci, *ptr) ; ptr++)
231
if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
238
Converts a string between character sets
242
from_cs source character set
243
from source, a null terminated string
244
to destination buffer
245
to_length destination buffer length
248
'to' is always terminated with a '\0' character.
249
If there is no enough space to convert whole string,
250
only prefix is converted, and terminated with '\0'.
257
uint32_t strconvert(const CHARSET_INFO * const from_cs, const char *from,
258
const CHARSET_INFO * const to_cs, char *to, uint32_t to_length, uint32_t *errors)
263
unsigned char *to_end= (unsigned char*) to + to_length - 1;
264
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
265
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
266
uint32_t error_count= 0;
271
Using 'from + 10' is safe:
272
- it is enough to scan a single character in any character set.
273
- if remaining string is shorter than 10, then mb_wc will return
274
with error because of unexpected '\0' character.
276
if ((cnvres= (*mb_wc)(from_cs, &wc,
277
(unsigned char*) from, (unsigned char*) from + 10)) > 0)
283
else if (cnvres == MY_CS_ILSEQ)
290
break; // Impossible char.
294
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
296
else if (cnvres == MY_CS_ILUNI && wc != '?')
306
*errors= error_count;
307
return (uint32_t) (to - to_start);
313
Searches for a LEX_STRING in an LEX_STRING array.
316
find_string_in_array()
318
needle The string to search for
321
The last LEX_STRING in the array should have str member set to NULL
328
int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
329
const CHARSET_INFO * const cs)
331
const LEX_STRING *pos;
332
for (pos= haystack; pos->str; pos++)
333
if (!cs->coll->strnncollsp(cs, (unsigned char *) pos->str, pos->length,
334
(unsigned char *) needle->str, needle->length, 0))
336
return (pos - haystack);