~drizzle-trunk/drizzle/development : contents of sql/strfunc.cc at revision 77.1.44

~drizzle-trunk/drizzle/development : (revision 77.1.44)

/* Copyright (C) 2003 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* Some useful string utility functions used by the MySQL server */

#include "mysql_priv.h"

/*
  Return bitmap for strings used in a set

  SYNOPSIS
  find_set()
  lib			Strings in set
  str			Strings of set-strings separated by ','
  err_pos		If error, set to point to start of wrong set string
  err_len		If error, set to the length of wrong set string
  set_warning		Set to 1 if some string in set couldn't be used

  NOTE
    We delete all end space from str before comparison

  RETURN
    bitmap of all sets found in x.
    set_warning is set to 1 if there was any sets that couldn't be set
*/

static const char field_separator=',';

ulonglong find_set(TYPELIB *lib, const char *str, uint length, CHARSET_INFO *cs,
                   char **err_pos, uint *err_len, bool *set_warning)
{
  CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
  const char *end= str + strip->cset->lengthsp(strip, str, length);
  ulonglong found= 0;
  *err_pos= 0;                  // No error yet
  if (str != end)
  {
    const char *start= str;    
    for (;;)
    {
      const char *pos= start;
      uint var_len;
      int mblen= 1;

      if (cs && cs->mbminlen > 1)
      {
        for ( ; pos < end; pos+= mblen)
        {
          my_wc_t wc;
          if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos, 
                                               (const uchar *) end)) < 1)
            mblen= 1; // Not to hang on a wrong multibyte sequence
          if (wc == (my_wc_t) field_separator)
            break;
        }
      }
      else
        for (; pos != end && *pos != field_separator; pos++) ;
      var_len= (uint) (pos - start);
      uint find= cs ? find_type2(lib, start, var_len, cs) :
                      find_type(lib, start, var_len, (bool) 0);
      if (!find)
      {
        *err_pos= (char*) start;
        *err_len= var_len;
        *set_warning= 1;
      }
      else
        found|= ((longlong) 1 << (find - 1));
      if (pos >= end)
        break;
      start= pos + mblen;
    }
  }
  return found;
}


/*
  Function to find a string in a TYPELIB
  (Same format as mysys/typelib.c)

  SYNOPSIS
   find_type()
   lib			TYPELIB (struct of pointer to values + count)
   find			String to find
   length		Length of string to find
   part_match		Allow part matching of value

 RETURN
  0 error
  > 0 position in TYPELIB->type_names +1
*/

uint find_type(const TYPELIB *lib, const char *find, uint length,
               bool part_match)
{
  uint found_count=0, found_pos=0;
  const char *end= find+length;
  const char *i;
  const char *j;
  for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
  {
    for (i=find ; i != end && 
	   my_toupper(system_charset_info,*i) == 
	   my_toupper(system_charset_info,*j) ; i++, j++) ;
    if (i == end)
    {
      if (! *j)
	return(pos);
      found_count++;
      found_pos= pos;
    }
  }
  return(found_count == 1 && part_match ? found_pos : 0);
}


/*
  Find a string in a list of strings according to collation

  SYNOPSIS
   find_type2()
   lib			TYPELIB (struct of pointer to values + count)
   x			String to find
   length               String length
   cs			Character set + collation to use for comparison

  NOTES

  RETURN
    0	No matching value
    >0  Offset+1 in typelib for matched string
*/

uint find_type2(const TYPELIB *typelib, const char *x, uint length,
                CHARSET_INFO *cs)
{
  int pos;
  const char *j;
  DBUG_ENTER("find_type2");
  DBUG_PRINT("enter",("x: '%.*s'  lib: 0x%lx", length, x, (long) typelib));

  if (!typelib->count)
  {
    DBUG_PRINT("exit",("no count"));
    DBUG_RETURN(0);
  }

  for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
  {
    if (!my_strnncoll(cs, (const uchar*) x, length,
                          (const uchar*) j, typelib->type_lengths[pos]))
      DBUG_RETURN(pos+1);
  }
  DBUG_PRINT("exit",("Couldn't find type"));
  DBUG_RETURN(0);
} /* find_type */


/*
  Un-hex all elements in a typelib

  SYNOPSIS
   unhex_type2()
   interval       TYPELIB (struct of pointer to values + lengths + count)

  NOTES

  RETURN
    N/A
*/

void unhex_type2(TYPELIB *interval)
{
  for (uint pos= 0; pos < interval->count; pos++)
  {
    char *from, *to;
    for (from= to= (char*) interval->type_names[pos]; *from; )
    {
      /*
        Note, hexchar_to_int(*from++) doesn't work
        one some compilers, e.g. IRIX. Looks like a compiler
        bug in inline functions in combination with arguments
        that have a side effect. So, let's use from[0] and from[1]
        and increment 'from' by two later.
      */

      *to++= (char) (hexchar_to_int(from[0]) << 4) +
                     hexchar_to_int(from[1]);
      from+= 2;
    }
    interval->type_lengths[pos] /= 2;
  }
}


/*
  Check if the first word in a string is one of the ones in TYPELIB

  SYNOPSIS
    check_word()
    lib		TYPELIB
    val		String to check
    end		End of input
    end_of_word	Store value of last used byte here if we found word

  RETURN
    0	 No matching value
    > 1  lib->type_names[#-1] matched
	 end_of_word will point to separator character/end in 'val'
*/

uint check_word(TYPELIB *lib, const char *val, const char *end,
		const char **end_of_word)
{
  int res;
  const char *ptr;

  /* Fiend end of word */
  for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
    ;
  if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
    *end_of_word= ptr;
  return res;
}


/*
  Converts a string between character sets

  SYNOPSIS
    strconvert()
    from_cs       source character set
    from          source, a null terminated string
    to            destination buffer
    to_length     destination buffer length

  NOTES
    'to' is always terminated with a '\0' character.
    If there is no enough space to convert whole string,
    only prefix is converted, and terminated with '\0'.

  RETURN VALUES
    result string length
*/


uint strconvert(CHARSET_INFO *from_cs, const char *from,
                CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors)
{
  int cnvres;
  my_wc_t wc;
  char *to_start= to;
  uchar *to_end= (uchar*) to + to_length - 1;
  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
  uint error_count= 0;

  while (1)
  {
    /*
      Using 'from + 10' is safe:
      - it is enough to scan a single character in any character set.
      - if remaining string is shorter than 10, then mb_wc will return
        with error because of unexpected '\0' character.
    */
    if ((cnvres= (*mb_wc)(from_cs, &wc,
                          (uchar*) from, (uchar*) from + 10)) > 0)
    {
      if (!wc)
        break;
      from+= cnvres;
    }
    else if (cnvres == MY_CS_ILSEQ)
    {
      error_count++;
      from++;
      wc= '?';
    }
    else
      break; // Impossible char.

outp:

    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
      to+= cnvres;
    else if (cnvres == MY_CS_ILUNI && wc != '?')
    {
      error_count++;
      wc= '?';
      goto outp;
    }
    else
      break;
  }
  *to= '\0';
  *errors= error_count;
  return (uint32) (to - to_start);

}


/*
  Searches for a LEX_STRING in an LEX_STRING array.

  SYNOPSIS
    find_string_in_array()
      heap    The array
      needle  The string to search for

  NOTE
    The last LEX_STRING in the array should have str member set to NULL

  RETURN VALUES
    -1   Not found
    >=0  Ordinal position
*/

int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
                         CHARSET_INFO * const cs)
{
  const LEX_STRING *pos;
  for (pos= haystack; pos->str; pos++)
    if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
                               (uchar *) needle->str, needle->length, 0))
    {
      return (pos - haystack);
    }
  return -1;
}

1 by brian clean slate	1	/* Copyright (C) 2003 MySQL AB
	2
	3	This program is free software; you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation; version 2 of the License.
	6
	7	This program is distributed in the hope that it will be useful,
	8	but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	GNU General Public License for more details.
	11
	12	You should have received a copy of the GNU General Public License
	13	along with this program; if not, write to the Free Software
	14	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
	15
	16	/* Some useful string utility functions used by the MySQL server */
	17
	18	#include "mysql_priv.h"
	19
	20	/*
	21	Return bitmap for strings used in a set
	22
	23	SYNOPSIS
	24	find_set()
	25	lib Strings in set
	26	str Strings of set-strings separated by ','
	27	err_pos If error, set to point to start of wrong set string
	28	err_len If error, set to the length of wrong set string
	29	set_warning Set to 1 if some string in set couldn't be used
	30
	31	NOTE
	32	We delete all end space from str before comparison
	33
	34	RETURN
	35	bitmap of all sets found in x.
	36	set_warning is set to 1 if there was any sets that couldn't be set
	37	*/
	38
	39	static const char field_separator=',';
	40
	41	ulonglong find_set(TYPELIB lib, const char str, uint length, CHARSET_INFO *cs,
	42	char *err_pos, uint err_len, bool *set_warning)
	43	{
	44	CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
	45	const char *end= str + strip->cset->lengthsp(strip, str, length);
	46	ulonglong found= 0;
	47	*err_pos= 0; // No error yet
	48	if (str != end)
	49	{
	50	const char *start= str;
	51	for (;;)
	52	{
	53	const char *pos= start;
	54	uint var_len;
	55	int mblen= 1;
	56
	57	if (cs && cs->mbminlen > 1)
	58	{
	59	for ( ; pos < end; pos+= mblen)
	60	{
	61	my_wc_t wc;
	62	if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos,
	63	(const uchar *) end)) < 1)
	64	mblen= 1; // Not to hang on a wrong multibyte sequence
65	if (wc == (my_wc_t) field_separator)
66	break;
67	}
68	}
69	else
70	for (; pos != end && *pos != field_separator; pos++) ;
71	var_len= (uint) (pos - start);
72	uint find= cs ? find_type2(lib, start, var_len, cs) :
73	find_type(lib, start, var_len, (bool) 0);
74	if (!find)
75	{
76	err_pos= (char) start;
77	*err_len= var_len;
78	*set_warning= 1;
79	}
80	else
81	found\|= ((longlong) 1 << (find - 1));
82	if (pos >= end)
83	break;
84	start= pos + mblen;
85	}
86	}
87	return found;
88	}
89
90
91	/*
92	Function to find a string in a TYPELIB
93	(Same format as mysys/typelib.c)
94
95	SYNOPSIS
96	find_type()
97	lib TYPELIB (struct of pointer to values + count)
98	find String to find
99	length Length of string to find
100	part_match Allow part matching of value
101
102	RETURN
103	0 error
104	> 0 position in TYPELIB->type_names +1
105	*/
106
107	uint find_type(const TYPELIB lib, const char find, uint length,
108	bool part_match)
109	{
110	uint found_count=0, found_pos=0;
111	const char *end= find+length;
112	const char *i;
113	const char *j;
114	for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
115	{
116	for (i=find ; i != end &&
117	my_toupper(system_charset_info,*i) ==
118	my_toupper(system_charset_info,*j) ; i++, j++) ;
119	if (i == end)
120	{
121	if (! *j)
122	return(pos);
123	found_count++;
124	found_pos= pos;
125	}
126	}
127	return(found_count == 1 && part_match ? found_pos : 0);
128	}
129
130
131	/*
132	Find a string in a list of strings according to collation
133
134	SYNOPSIS
135	find_type2()
136	lib TYPELIB (struct of pointer to values + count)
137	x String to find
138	length String length
139	cs Character set + collation to use for comparison
140
141	NOTES
142
143	RETURN
144	0 No matching value
145	>0 Offset+1 in typelib for matched string
146	*/
147
148	uint find_type2(const TYPELIB typelib, const char x, uint length,
149	CHARSET_INFO *cs)
150	{
151	int pos;
152	const char *j;
153	DBUG_ENTER("find_type2");
154	DBUG_PRINT("enter",("x: '%.*s' lib: 0x%lx", length, x, (long) typelib));
155
156	if (!typelib->count)
157	{
158	DBUG_PRINT("exit",("no count"));
159	DBUG_RETURN(0);
160	}
161
162	for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
163	{
164	if (!my_strnncoll(cs, (const uchar*) x, length,
165	(const uchar*) j, typelib->type_lengths[pos]))
166	DBUG_RETURN(pos+1);
167	}
168	DBUG_PRINT("exit",("Couldn't find type"));
169	DBUG_RETURN(0);
170	} /* find_type */
171
172
173	/*
174	Un-hex all elements in a typelib
175
176	SYNOPSIS
177	unhex_type2()
178	interval TYPELIB (struct of pointer to values + lengths + count)
179
180	NOTES
181
182	RETURN
183	N/A
184	*/
185
186	void unhex_type2(TYPELIB *interval)
187	{
188	for (uint pos= 0; pos < interval->count; pos++)
189	{
190	char from, to;
191	for (from= to= (char) interval->type_names[pos]; from; )
192	{
193	/*
194	Note, hexchar_to_int(*from++) doesn't work
195	one some compilers, e.g. IRIX. Looks like a compiler
196	bug in inline functions in combination with arguments
197	that have a side effect. So, let's use from[0] and from[1]
198	and increment 'from' by two later.
199	*/
200
201	*to++= (char) (hexchar_to_int(from[0]) << 4) +
202	hexchar_to_int(from[1]);
203	from+= 2;
204	}
205	interval->type_lengths[pos] /= 2;
206	}
207	}
208
209
210	/*
211	Check if the first word in a string is one of the ones in TYPELIB
212
213	SYNOPSIS
214	check_word()
215	lib TYPELIB
216	val String to check
217	end End of input
218	end_of_word Store value of last used byte here if we found word
219
220	RETURN
221	0 No matching value
222	> 1 lib->type_names[#-1] matched
223	end_of_word will point to separator character/end in 'val'
224	*/
225
226	uint check_word(TYPELIB lib, const char val, const char *end,
227	const char **end_of_word)
228	{
229	int res;
230	const char *ptr;
231
232	/* Fiend end of word */
233	for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
234	;
235	if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
236	*end_of_word= ptr;
237	return res;
238	}
239
240
241	/*
242	Converts a string between character sets
243
244	SYNOPSIS
245	strconvert()
246	from_cs source character set
247	from source, a null terminated string
248	to destination buffer
249	to_length destination buffer length
250
251	NOTES
252	'to' is always terminated with a '\0' character.
253	If there is no enough space to convert whole string,
254	only prefix is converted, and terminated with '\0'.
255
256	RETURN VALUES
257	result string length
258	*/
259
260
261	uint strconvert(CHARSET_INFO from_cs, const char from,
262	CHARSET_INFO to_cs, char to, uint to_length, uint *errors)
263	{
264	int cnvres;
265	my_wc_t wc;
266	char *to_start= to;
267	uchar to_end= (uchar) to + to_length - 1;
268	my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
269	my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
270	uint error_count= 0;
271
272	while (1)
273	{
274	/*
275	Using 'from + 10' is safe:
276	- it is enough to scan a single character in any character set.
277	- if remaining string is shorter than 10, then mb_wc will return
278	with error because of unexpected '\0' character.
279	*/
280	if ((cnvres= (*mb_wc)(from_cs, &wc,
281	(uchar) from, (uchar) from + 10)) > 0)
282	{
283	if (!wc)
284	break;
285	from+= cnvres;
286	}
287	else if (cnvres == MY_CS_ILSEQ)
288	{
289	error_count++;
290	from++;
291	wc= '?';
292	}
293	else
294	break; // Impossible char.
295
296	outp:
297
298	if ((cnvres= (wc_mb)(to_cs, wc, (uchar) to, to_end)) > 0)
299	to+= cnvres;
300	else if (cnvres == MY_CS_ILUNI && wc != '?')
301	{
302	error_count++;
303	wc= '?';
304	goto outp;
305	}
306	else
307	break;
308	}
309	*to= '\0';
310	*errors= error_count;
311	return (uint32) (to - to_start);
312
313	}
314
315
316	/*
317	Searches for a LEX_STRING in an LEX_STRING array.
318
319	SYNOPSIS
320	find_string_in_array()
321	heap The array
322	needle The string to search for
323
324	NOTE
325	The last LEX_STRING in the array should have str member set to NULL
326
327	RETURN VALUES
328	-1 Not found
329	>=0 Ordinal position
330	*/
331
332	int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
333	CHARSET_INFO * const cs)
334	{
335	const LEX_STRING *pos;
336	for (pos= haystack; pos->str; pos++)
337	if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
338	(uchar *) needle->str, needle->length, 0))
339	{
340	return (pos - haystack);
341	}
342	return -1;
343	}