1
by brian
clean slate |
1 |
/* Copyright (C) 2003 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
14 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
15 |
||
16 |
/* Some useful string utility functions used by the MySQL server */
|
|
17 |
||
18 |
#include "mysql_priv.h" |
|
19 |
||
20 |
/*
|
|
21 |
Return bitmap for strings used in a set
|
|
22 |
||
23 |
SYNOPSIS
|
|
24 |
find_set()
|
|
25 |
lib Strings in set
|
|
26 |
str Strings of set-strings separated by ','
|
|
27 |
err_pos If error, set to point to start of wrong set string
|
|
28 |
err_len If error, set to the length of wrong set string
|
|
29 |
set_warning Set to 1 if some string in set couldn't be used
|
|
30 |
||
31 |
NOTE
|
|
32 |
We delete all end space from str before comparison
|
|
33 |
||
34 |
RETURN
|
|
35 |
bitmap of all sets found in x.
|
|
36 |
set_warning is set to 1 if there was any sets that couldn't be set
|
|
37 |
*/
|
|
38 |
||
39 |
static const char field_separator=','; |
|
40 |
||
41 |
ulonglong find_set(TYPELIB *lib, const char *str, uint length, CHARSET_INFO *cs, |
|
42 |
char **err_pos, uint *err_len, bool *set_warning) |
|
43 |
{
|
|
44 |
CHARSET_INFO *strip= cs ? cs : &my_charset_latin1; |
|
45 |
const char *end= str + strip->cset->lengthsp(strip, str, length); |
|
46 |
ulonglong found= 0; |
|
47 |
*err_pos= 0; // No error yet |
|
48 |
if (str != end) |
|
49 |
{
|
|
50 |
const char *start= str; |
|
51 |
for (;;) |
|
52 |
{
|
|
53 |
const char *pos= start; |
|
54 |
uint var_len; |
|
55 |
int mblen= 1; |
|
56 |
||
57 |
if (cs && cs->mbminlen > 1) |
|
58 |
{
|
|
59 |
for ( ; pos < end; pos+= mblen) |
|
60 |
{
|
|
61 |
my_wc_t wc; |
|
62 |
if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos, |
|
63 |
(const uchar *) end)) < 1) |
|
64 |
mblen= 1; // Not to hang on a wrong multibyte sequence |
|
65 |
if (wc == (my_wc_t) field_separator) |
|
66 |
break; |
|
67 |
}
|
|
68 |
}
|
|
69 |
else
|
|
70 |
for (; pos != end && *pos != field_separator; pos++) ; |
|
71 |
var_len= (uint) (pos - start); |
|
72 |
uint find= cs ? find_type2(lib, start, var_len, cs) : |
|
73 |
find_type(lib, start, var_len, (bool) 0); |
|
74 |
if (!find) |
|
75 |
{
|
|
76 |
*err_pos= (char*) start; |
|
77 |
*err_len= var_len; |
|
78 |
*set_warning= 1; |
|
79 |
}
|
|
80 |
else
|
|
81 |
found|= ((longlong) 1 << (find - 1)); |
|
82 |
if (pos >= end) |
|
83 |
break; |
|
84 |
start= pos + mblen; |
|
85 |
}
|
|
86 |
}
|
|
87 |
return found; |
|
88 |
}
|
|
89 |
||
90 |
||
91 |
/*
|
|
92 |
Function to find a string in a TYPELIB
|
|
93 |
(Same format as mysys/typelib.c)
|
|
94 |
||
95 |
SYNOPSIS
|
|
96 |
find_type()
|
|
97 |
lib TYPELIB (struct of pointer to values + count)
|
|
98 |
find String to find
|
|
99 |
length Length of string to find
|
|
100 |
part_match Allow part matching of value
|
|
101 |
||
102 |
RETURN
|
|
103 |
0 error
|
|
104 |
> 0 position in TYPELIB->type_names +1
|
|
105 |
*/
|
|
106 |
||
107 |
uint find_type(const TYPELIB *lib, const char *find, uint length, |
|
108 |
bool part_match) |
|
109 |
{
|
|
110 |
uint found_count=0, found_pos=0; |
|
111 |
const char *end= find+length; |
|
112 |
const char *i; |
|
113 |
const char *j; |
|
114 |
for (uint pos=0 ; (j=lib->type_names[pos++]) ; ) |
|
115 |
{
|
|
116 |
for (i=find ; i != end && |
|
117 |
my_toupper(system_charset_info,*i) == |
|
118 |
my_toupper(system_charset_info,*j) ; i++, j++) ; |
|
119 |
if (i == end) |
|
120 |
{
|
|
121 |
if (! *j) |
|
122 |
return(pos); |
|
123 |
found_count++; |
|
124 |
found_pos= pos; |
|
125 |
}
|
|
126 |
}
|
|
127 |
return(found_count == 1 && part_match ? found_pos : 0); |
|
128 |
}
|
|
129 |
||
130 |
||
131 |
/*
|
|
132 |
Find a string in a list of strings according to collation
|
|
133 |
||
134 |
SYNOPSIS
|
|
135 |
find_type2()
|
|
136 |
lib TYPELIB (struct of pointer to values + count)
|
|
137 |
x String to find
|
|
138 |
length String length
|
|
139 |
cs Character set + collation to use for comparison
|
|
140 |
||
141 |
NOTES
|
|
142 |
||
143 |
RETURN
|
|
144 |
0 No matching value
|
|
145 |
>0 Offset+1 in typelib for matched string
|
|
146 |
*/
|
|
147 |
||
148 |
uint find_type2(const TYPELIB *typelib, const char *x, uint length, |
|
149 |
CHARSET_INFO *cs) |
|
150 |
{
|
|
151 |
int pos; |
|
152 |
const char *j; |
|
153 |
DBUG_ENTER("find_type2"); |
|
154 |
DBUG_PRINT("enter",("x: '%.*s' lib: 0x%lx", length, x, (long) typelib)); |
|
155 |
||
156 |
if (!typelib->count) |
|
157 |
{
|
|
158 |
DBUG_PRINT("exit",("no count")); |
|
159 |
DBUG_RETURN(0); |
|
160 |
}
|
|
161 |
||
162 |
for (pos=0 ; (j=typelib->type_names[pos]) ; pos++) |
|
163 |
{
|
|
164 |
if (!my_strnncoll(cs, (const uchar*) x, length, |
|
165 |
(const uchar*) j, typelib->type_lengths[pos])) |
|
166 |
DBUG_RETURN(pos+1); |
|
167 |
}
|
|
168 |
DBUG_PRINT("exit",("Couldn't find type")); |
|
169 |
DBUG_RETURN(0); |
|
170 |
} /* find_type */ |
|
171 |
||
172 |
||
173 |
/*
|
|
174 |
Un-hex all elements in a typelib
|
|
175 |
||
176 |
SYNOPSIS
|
|
177 |
unhex_type2()
|
|
178 |
interval TYPELIB (struct of pointer to values + lengths + count)
|
|
179 |
||
180 |
NOTES
|
|
181 |
||
182 |
RETURN
|
|
183 |
N/A
|
|
184 |
*/
|
|
185 |
||
186 |
void unhex_type2(TYPELIB *interval) |
|
187 |
{
|
|
188 |
for (uint pos= 0; pos < interval->count; pos++) |
|
189 |
{
|
|
190 |
char *from, *to; |
|
191 |
for (from= to= (char*) interval->type_names[pos]; *from; ) |
|
192 |
{
|
|
193 |
/*
|
|
194 |
Note, hexchar_to_int(*from++) doesn't work
|
|
195 |
one some compilers, e.g. IRIX. Looks like a compiler
|
|
196 |
bug in inline functions in combination with arguments
|
|
197 |
that have a side effect. So, let's use from[0] and from[1]
|
|
198 |
and increment 'from' by two later.
|
|
199 |
*/
|
|
200 |
||
201 |
*to++= (char) (hexchar_to_int(from[0]) << 4) + |
|
202 |
hexchar_to_int(from[1]); |
|
203 |
from+= 2; |
|
204 |
}
|
|
205 |
interval->type_lengths[pos] /= 2; |
|
206 |
}
|
|
207 |
}
|
|
208 |
||
209 |
||
210 |
/*
|
|
211 |
Check if the first word in a string is one of the ones in TYPELIB
|
|
212 |
||
213 |
SYNOPSIS
|
|
214 |
check_word()
|
|
215 |
lib TYPELIB
|
|
216 |
val String to check
|
|
217 |
end End of input
|
|
218 |
end_of_word Store value of last used byte here if we found word
|
|
219 |
||
220 |
RETURN
|
|
221 |
0 No matching value
|
|
222 |
> 1 lib->type_names[#-1] matched
|
|
223 |
end_of_word will point to separator character/end in 'val'
|
|
224 |
*/
|
|
225 |
||
226 |
uint check_word(TYPELIB *lib, const char *val, const char *end, |
|
227 |
const char **end_of_word) |
|
228 |
{
|
|
229 |
int res; |
|
230 |
const char *ptr; |
|
231 |
||
232 |
/* Fiend end of word */
|
|
233 |
for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++) |
|
234 |
;
|
|
235 |
if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0) |
|
236 |
*end_of_word= ptr; |
|
237 |
return res; |
|
238 |
}
|
|
239 |
||
240 |
||
241 |
/*
|
|
242 |
Converts a string between character sets
|
|
243 |
||
244 |
SYNOPSIS
|
|
245 |
strconvert()
|
|
246 |
from_cs source character set
|
|
247 |
from source, a null terminated string
|
|
248 |
to destination buffer
|
|
249 |
to_length destination buffer length
|
|
250 |
||
251 |
NOTES
|
|
252 |
'to' is always terminated with a '\0' character.
|
|
253 |
If there is no enough space to convert whole string,
|
|
254 |
only prefix is converted, and terminated with '\0'.
|
|
255 |
||
256 |
RETURN VALUES
|
|
257 |
result string length
|
|
258 |
*/
|
|
259 |
||
260 |
||
261 |
uint strconvert(CHARSET_INFO *from_cs, const char *from, |
|
262 |
CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors) |
|
263 |
{
|
|
264 |
int cnvres; |
|
265 |
my_wc_t wc; |
|
266 |
char *to_start= to; |
|
267 |
uchar *to_end= (uchar*) to + to_length - 1; |
|
268 |
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc; |
|
269 |
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb; |
|
270 |
uint error_count= 0; |
|
271 |
||
272 |
while (1) |
|
273 |
{
|
|
274 |
/*
|
|
275 |
Using 'from + 10' is safe:
|
|
276 |
- it is enough to scan a single character in any character set.
|
|
277 |
- if remaining string is shorter than 10, then mb_wc will return
|
|
278 |
with error because of unexpected '\0' character.
|
|
279 |
*/
|
|
280 |
if ((cnvres= (*mb_wc)(from_cs, &wc, |
|
281 |
(uchar*) from, (uchar*) from + 10)) > 0) |
|
282 |
{
|
|
283 |
if (!wc) |
|
284 |
break; |
|
285 |
from+= cnvres; |
|
286 |
}
|
|
287 |
else if (cnvres == MY_CS_ILSEQ) |
|
288 |
{
|
|
289 |
error_count++; |
|
290 |
from++; |
|
291 |
wc= '?'; |
|
292 |
}
|
|
293 |
else
|
|
294 |
break; // Impossible char. |
|
295 |
||
296 |
outp: |
|
297 |
||
298 |
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) |
|
299 |
to+= cnvres; |
|
300 |
else if (cnvres == MY_CS_ILUNI && wc != '?') |
|
301 |
{
|
|
302 |
error_count++; |
|
303 |
wc= '?'; |
|
304 |
goto outp; |
|
305 |
}
|
|
306 |
else
|
|
307 |
break; |
|
308 |
}
|
|
309 |
*to= '\0'; |
|
310 |
*errors= error_count; |
|
311 |
return (uint32) (to - to_start); |
|
312 |
||
313 |
}
|
|
314 |
||
315 |
||
316 |
/*
|
|
317 |
Searches for a LEX_STRING in an LEX_STRING array.
|
|
318 |
||
319 |
SYNOPSIS
|
|
320 |
find_string_in_array()
|
|
321 |
heap The array
|
|
322 |
needle The string to search for
|
|
323 |
||
324 |
NOTE
|
|
325 |
The last LEX_STRING in the array should have str member set to NULL
|
|
326 |
||
327 |
RETURN VALUES
|
|
328 |
-1 Not found
|
|
329 |
>=0 Ordinal position
|
|
330 |
*/
|
|
331 |
||
332 |
int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle, |
|
333 |
CHARSET_INFO * const cs) |
|
334 |
{
|
|
335 |
const LEX_STRING *pos; |
|
336 |
for (pos= haystack; pos->str; pos++) |
|
337 |
if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length, |
|
338 |
(uchar *) needle->str, needle->length, 0)) |
|
339 |
{
|
|
340 |
return (pos - haystack); |
|
341 |
}
|
|
342 |
return -1; |
|
343 |
}
|