~drizzle-trunk/drizzle/development : contents of drizzled/strfunc.cc at revision 243.1.2

~drizzle-trunk/drizzle/development : (revision 243.1.2)

/* Copyright (C) 2003 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* Some useful string utility functions used by the MySQL server */

#include "mysql_priv.h"

/*
  Return bitmap for strings used in a set

  SYNOPSIS
  find_set()
  lib			Strings in set
  str			Strings of set-strings separated by ','
  err_pos		If error, set to point to start of wrong set string
  err_len		If error, set to the length of wrong set string
  set_warning		Set to 1 if some string in set couldn't be used

  NOTE
    We delete all end space from str before comparison

  RETURN
    bitmap of all sets found in x.
    set_warning is set to 1 if there was any sets that couldn't be set
*/

static const char field_separator=',';

uint64_t find_set(TYPELIB *lib, const char *str, uint length, CHARSET_INFO *cs,
                   char **err_pos, uint *err_len, bool *set_warning)
{
  CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
  const char *end= str + strip->cset->lengthsp(strip, str, length);
  uint64_t found= 0;
  *err_pos= 0;                  // No error yet
  if (str != end)
  {
    const char *start= str;    
    for (;;)
    {
      const char *pos= start;
      uint var_len;
      int mblen= 1;

      if (cs && cs->mbminlen > 1)
      {
        for ( ; pos < end; pos+= mblen)
        {
          my_wc_t wc;
          if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos, 
                                               (const uchar *) end)) < 1)
            mblen= 1; // Not to hang on a wrong multibyte sequence
          if (wc == (my_wc_t) field_separator)
            break;
        }
      }
      else
        for (; pos != end && *pos != field_separator; pos++) ;
      var_len= (uint) (pos - start);
      uint find= cs ? find_type2(lib, start, var_len, cs) :
                      find_type(lib, start, var_len, (bool) 0);
      if (!find)
      {
        *err_pos= (char*) start;
        *err_len= var_len;
        *set_warning= 1;
      }
      else
        found|= ((int64_t) 1 << (find - 1));
      if (pos >= end)
        break;
      start= pos + mblen;
    }
  }
  return found;
}


/*
  Function to find a string in a TYPELIB
  (Same format as mysys/typelib.c)

  SYNOPSIS
   find_type()
   lib			TYPELIB (struct of pointer to values + count)
   find			String to find
   length		Length of string to find
   part_match		Allow part matching of value

 RETURN
  0 error
  > 0 position in TYPELIB->type_names +1
*/

uint find_type(const TYPELIB *lib, const char *find, uint length,
               bool part_match)
{
  uint found_count=0, found_pos=0;
  const char *end= find+length;
  const char *i;
  const char *j;
  for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
  {
    for (i=find ; i != end && 
	   my_toupper(system_charset_info,*i) == 
	   my_toupper(system_charset_info,*j) ; i++, j++) ;
    if (i == end)
    {
      if (! *j)
	return(pos);
      found_count++;
      found_pos= pos;
    }
  }
  return(found_count == 1 && part_match ? found_pos : 0);
}


/*
  Find a string in a list of strings according to collation

  SYNOPSIS
   find_type2()
   lib			TYPELIB (struct of pointer to values + count)
   x			String to find
   length               String length
   cs			Character set + collation to use for comparison

  NOTES

  RETURN
    0	No matching value
    >0  Offset+1 in typelib for matched string
*/

uint find_type2(const TYPELIB *typelib, const char *x, uint length,
                CHARSET_INFO *cs)
{
  int pos;
  const char *j;

  if (!typelib->count)
  {
    return(0);
  }

  for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
  {
    if (!my_strnncoll(cs, (const uchar*) x, length,
                          (const uchar*) j, typelib->type_lengths[pos]))
      return(pos+1);
  }
  return(0);
} /* find_type */


/*
  Un-hex all elements in a typelib

  SYNOPSIS
   unhex_type2()
   interval       TYPELIB (struct of pointer to values + lengths + count)

  NOTES

  RETURN
    N/A
*/

void unhex_type2(TYPELIB *interval)
{
  for (uint pos= 0; pos < interval->count; pos++)
  {
    char *from, *to;
    for (from= to= (char*) interval->type_names[pos]; *from; )
    {
      /*
        Note, hexchar_to_int(*from++) doesn't work
        one some compilers, e.g. IRIX. Looks like a compiler
        bug in inline functions in combination with arguments
        that have a side effect. So, let's use from[0] and from[1]
        and increment 'from' by two later.
      */

      *to++= (char) (hexchar_to_int(from[0]) << 4) +
                     hexchar_to_int(from[1]);
      from+= 2;
    }
    interval->type_lengths[pos] /= 2;
  }
}


/*
  Check if the first word in a string is one of the ones in TYPELIB

  SYNOPSIS
    check_word()
    lib		TYPELIB
    val		String to check
    end		End of input
    end_of_word	Store value of last used byte here if we found word

  RETURN
    0	 No matching value
    > 1  lib->type_names[#-1] matched
	 end_of_word will point to separator character/end in 'val'
*/

uint check_word(TYPELIB *lib, const char *val, const char *end,
		const char **end_of_word)
{
  int res;
  const char *ptr;

  /* Fiend end of word */
  for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
    ;
  if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
    *end_of_word= ptr;
  return res;
}


/*
  Converts a string between character sets

  SYNOPSIS
    strconvert()
    from_cs       source character set
    from          source, a null terminated string
    to            destination buffer
    to_length     destination buffer length

  NOTES
    'to' is always terminated with a '\0' character.
    If there is no enough space to convert whole string,
    only prefix is converted, and terminated with '\0'.

  RETURN VALUES
    result string length
*/


uint strconvert(CHARSET_INFO *from_cs, const char *from,
                CHARSET_INFO *to_cs, char *to, uint to_length, uint *errors)
{
  int cnvres;
  my_wc_t wc;
  char *to_start= to;
  uchar *to_end= (uchar*) to + to_length - 1;
  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
  uint error_count= 0;

  while (1)
  {
    /*
      Using 'from + 10' is safe:
      - it is enough to scan a single character in any character set.
      - if remaining string is shorter than 10, then mb_wc will return
        with error because of unexpected '\0' character.
    */
    if ((cnvres= (*mb_wc)(from_cs, &wc,
                          (uchar*) from, (uchar*) from + 10)) > 0)
    {
      if (!wc)
        break;
      from+= cnvres;
    }
    else if (cnvres == MY_CS_ILSEQ)
    {
      error_count++;
      from++;
      wc= '?';
    }
    else
      break; // Impossible char.

outp:

    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
      to+= cnvres;
    else if (cnvres == MY_CS_ILUNI && wc != '?')
    {
      error_count++;
      wc= '?';
      goto outp;
    }
    else
      break;
  }
  *to= '\0';
  *errors= error_count;
  return (uint32_t) (to - to_start);

}


/*
  Searches for a LEX_STRING in an LEX_STRING array.

  SYNOPSIS
    find_string_in_array()
      heap    The array
      needle  The string to search for

  NOTE
    The last LEX_STRING in the array should have str member set to NULL

  RETURN VALUES
    -1   Not found
    >=0  Ordinal position
*/

int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
                         CHARSET_INFO * const cs)
{
  const LEX_STRING *pos;
  for (pos= haystack; pos->str; pos++)
    if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
                               (uchar *) needle->str, needle->length, 0))
    {
      return (pos - haystack);
    }
  return -1;
}

1 by brian clean slate	1	/* Copyright (C) 2003 MySQL AB
	2
	3	This program is free software; you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation; version 2 of the License.
	6
	7	This program is distributed in the hope that it will be useful,
	8	but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	GNU General Public License for more details.
	11
	12	You should have received a copy of the GNU General Public License
	13	along with this program; if not, write to the Free Software
	14	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
	15
	16	/* Some useful string utility functions used by the MySQL server */
	17
	18	#include "mysql_priv.h"
	19
	20	/*
	21	Return bitmap for strings used in a set
	22
	23	SYNOPSIS
	24	find_set()
	25	lib Strings in set
	26	str Strings of set-strings separated by ','
	27	err_pos If error, set to point to start of wrong set string
	28	err_len If error, set to the length of wrong set string
	29	set_warning Set to 1 if some string in set couldn't be used
	30
	31	NOTE
	32	We delete all end space from str before comparison
	33
	34	RETURN
	35	bitmap of all sets found in x.
	36	set_warning is set to 1 if there was any sets that couldn't be set
	37	*/
	38
	39	static const char field_separator=',';
	40
151 by Brian Aker Ulonglong to uint64_t	41	uint64_t find_set(TYPELIB lib, const char str, uint length, CHARSET_INFO *cs,
1 by brian clean slate	42	char *err_pos, uint err_len, bool *set_warning)
	43	{
	44	CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
	45	const char *end= str + strip->cset->lengthsp(strip, str, length);
151 by Brian Aker Ulonglong to uint64_t	46	uint64_t found= 0;
1 by brian clean slate	47	*err_pos= 0; // No error yet
	48	if (str != end)
	49	{
	50	const char *start= str;
	51	for (;;)
	52	{
	53	const char *pos= start;
	54	uint var_len;
	55	int mblen= 1;
	56
	57	if (cs && cs->mbminlen > 1)
	58	{
	59	for ( ; pos < end; pos+= mblen)
	60	{
	61	my_wc_t wc;
	62	if ((mblen= cs->cset->mb_wc(cs, &wc, (const uchar *) pos,
	63	(const uchar *) end)) < 1)
	64	mblen= 1; // Not to hang on a wrong multibyte sequence
	65	if (wc == (my_wc_t) field_separator)
	66	break;
	67	}
	68	}
	69	else
	70	for (; pos != end && *pos != field_separator; pos++) ;
	71	var_len= (uint) (pos - start);
	72	uint find= cs ? find_type2(lib, start, var_len, cs) :
	73	find_type(lib, start, var_len, (bool) 0);
	74	if (!find)
	75	{
	76	err_pos= (char) start;
	77	*err_len= var_len;
	78	*set_warning= 1;
	79	}
	80	else
152 by Brian Aker longlong replacement	81	found\|= ((int64_t) 1 << (find - 1));
1 by brian clean slate	82	if (pos >= end)
	83	break;
	84	start= pos + mblen;
	85	}
	86	}
	87	return found;
	88	}
	89
	90
	91	/*
	92	Function to find a string in a TYPELIB
	93	(Same format as mysys/typelib.c)
	94
	95	SYNOPSIS
	96	find_type()
	97	lib TYPELIB (struct of pointer to values + count)
	98	find String to find
	99	length Length of string to find
	100	part_match Allow part matching of value
	101
	102	RETURN
	103	0 error
	104	> 0 position in TYPELIB->type_names +1
	105	*/
	106
	107	uint find_type(const TYPELIB lib, const char find, uint length,
	108	bool part_match)
	109	{
	110	uint found_count=0, found_pos=0;
	111	const char *end= find+length;
	112	const char *i;
	113	const char *j;
	114	for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
	115	{
	116	for (i=find ; i != end &&
	117	my_toupper(system_charset_info,*i) ==
	118	my_toupper(system_charset_info,*j) ; i++, j++) ;
	119	if (i == end)
	120	{
	121	if (! *j)
	122	return(pos);
	123	found_count++;
	124	found_pos= pos;
	125	}
	126	}
	127	return(found_count == 1 && part_match ? found_pos : 0);
	128	}
	129
	130
	131	/*
	132	Find a string in a list of strings according to collation
	133
	134	SYNOPSIS
	135	find_type2()
	136	lib TYPELIB (struct of pointer to values + count)
	137	x String to find
	138	length String length
	139	cs Character set + collation to use for comparison
	140
	141	NOTES
	142
	143	RETURN
	144	0 No matching value
	145	>0 Offset+1 in typelib for matched string
146	*/
147
148	uint find_type2(const TYPELIB typelib, const char x, uint length,
149	CHARSET_INFO *cs)
150	{
151	int pos;
152	const char *j;
153
154	if (!typelib->count)
155	{
51.1.69 by Jay Pipes Removed/replaced DBUG symbols	156	return(0);
1 by brian clean slate	157	}
	158
	159	for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
	160	{
	161	if (!my_strnncoll(cs, (const uchar*) x, length,
	162	(const uchar*) j, typelib->type_lengths[pos]))
51.1.69 by Jay Pipes Removed/replaced DBUG symbols	163	return(pos+1);
1 by brian clean slate	164	}
51.1.69 by Jay Pipes Removed/replaced DBUG symbols	165	return(0);
1 by brian clean slate	166	} /* find_type */
	167
	168
	169	/*
	170	Un-hex all elements in a typelib
	171
	172	SYNOPSIS
	173	unhex_type2()
	174	interval TYPELIB (struct of pointer to values + lengths + count)
	175
	176	NOTES
	177
	178	RETURN
	179	N/A
	180	*/
	181
	182	void unhex_type2(TYPELIB *interval)
	183	{
	184	for (uint pos= 0; pos < interval->count; pos++)
	185	{
	186	char from, to;
	187	for (from= to= (char) interval->type_names[pos]; from; )
	188	{
	189	/*
	190	Note, hexchar_to_int(*from++) doesn't work
	191	one some compilers, e.g. IRIX. Looks like a compiler
	192	bug in inline functions in combination with arguments
	193	that have a side effect. So, let's use from[0] and from[1]
	194	and increment 'from' by two later.
	195	*/
	196
	197	*to++= (char) (hexchar_to_int(from[0]) << 4) +
	198	hexchar_to_int(from[1]);
	199	from+= 2;
	200	}
	201	interval->type_lengths[pos] /= 2;
	202	}
	203	}
	204
	205
	206	/*
	207	Check if the first word in a string is one of the ones in TYPELIB
	208
	209	SYNOPSIS
	210	check_word()
	211	lib TYPELIB
	212	val String to check
	213	end End of input
	214	end_of_word Store value of last used byte here if we found word
	215
	216	RETURN
	217	0 No matching value
	218	> 1 lib->type_names[#-1] matched
	219	end_of_word will point to separator character/end in 'val'
	220	*/
	221
	222	uint check_word(TYPELIB lib, const char val, const char *end,
	223	const char **end_of_word)
	224	{
	225	int res;
	226	const char *ptr;
	227
	228	/* Fiend end of word */
	229	for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
230	;
231	if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
232	*end_of_word= ptr;
233	return res;
234	}
235
236
237	/*
238	Converts a string between character sets
239
240	SYNOPSIS
241	strconvert()
242	from_cs source character set
243	from source, a null terminated string
244	to destination buffer
245	to_length destination buffer length
246
247	NOTES
248	'to' is always terminated with a '\0' character.
249	If there is no enough space to convert whole string,
250	only prefix is converted, and terminated with '\0'.
251
252	RETURN VALUES
253	result string length
254	*/
255
256
257	uint strconvert(CHARSET_INFO from_cs, const char from,
258	CHARSET_INFO to_cs, char to, uint to_length, uint *errors)
259	{
260	int cnvres;
261	my_wc_t wc;
262	char *to_start= to;
263	uchar to_end= (uchar) to + to_length - 1;
264	my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
265	my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
266	uint error_count= 0;
267
268	while (1)
269	{
270	/*
271	Using 'from + 10' is safe:
272	- it is enough to scan a single character in any character set.
273	- if remaining string is shorter than 10, then mb_wc will return
274	with error because of unexpected '\0' character.
275	*/
276	if ((cnvres= (*mb_wc)(from_cs, &wc,
277	(uchar) from, (uchar) from + 10)) > 0)
278	{
279	if (!wc)
280	break;
281	from+= cnvres;
282	}
283	else if (cnvres == MY_CS_ILSEQ)
284	{
285	error_count++;
286	from++;
287	wc= '?';
288	}
289	else
290	break; // Impossible char.
291
292	outp:
293
294	if ((cnvres= (wc_mb)(to_cs, wc, (uchar) to, to_end)) > 0)
295	to+= cnvres;
296	else if (cnvres == MY_CS_ILUNI && wc != '?')
297	{
298	error_count++;
299	wc= '?';
300	goto outp;
301	}
302	else
303	break;
304	}
305	*to= '\0';
306	*errors= error_count;
205 by Brian Aker uint32 -> uin32_t	307	return (uint32_t) (to - to_start);
1 by brian clean slate	308
	309	}
	310
	311
	312	/*
	313	Searches for a LEX_STRING in an LEX_STRING array.
	314
	315	SYNOPSIS
	316	find_string_in_array()
	317	heap The array
	318	needle The string to search for
	319
	320	NOTE
	321	The last LEX_STRING in the array should have str member set to NULL
	322
	323	RETURN VALUES
	324	-1 Not found
	325	>=0 Ordinal position
	326	*/
	327
	328	int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
	329	CHARSET_INFO * const cs)
	330	{
	331	const LEX_STRING *pos;
	332	for (pos= haystack; pos->str; pos++)
	333	if (!cs->coll->strnncollsp(cs, (uchar *) pos->str, pos->length,
	334	(uchar *) needle->str, needle->length, 0))
	335	{
	336	return (pos - haystack);
	337	}
	338	return -1;
	339	}