~drizzle-trunk/drizzle/development : contents of drizzled/strfunc.cc at revision 1099.4.55

~drizzle-trunk/drizzle/development : (revision 1099.4.55)

/* Copyright (C) 2003 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* Some useful string utility functions used by the MySQL server */
#include <drizzled/server_includes.h>

/*
  Return bitmap for strings used in a set

  SYNOPSIS
  find_set()
  lib			Strings in set
  str			Strings of set-strings separated by ','
  err_pos		If error, set to point to start of wrong set string
  err_len		If error, set to the length of wrong set string
  set_warning		Set to 1 if some string in set couldn't be used

  NOTE
    We delete all end space from str before comparison

  RETURN
    bitmap of all sets found in x.
    set_warning is set to 1 if there was any sets that couldn't be set
*/

static const char field_separator=',';

uint64_t find_set(TYPELIB *lib, const char *str, uint32_t length,
                  const CHARSET_INFO * const cs,
                  char **err_pos, uint32_t *err_len, bool *set_warning)
{
  const CHARSET_INFO * const strip= cs ? cs : &my_charset_utf8_general_ci;
  const char *end= str + strip->cset->lengthsp(strip, str, length);
  uint64_t found= 0;
  *err_pos= 0;                  // No error yet
  if (str != end)
  {
    const char *start= str;
    for (;;)
    {
      const char *pos= start;
      uint32_t var_len;
      int mblen= 1;

      for (; pos != end && *pos != field_separator; pos++) 
      {}
      var_len= (uint32_t) (pos - start);
      uint32_t find= cs ? find_type2(lib, start, var_len, cs) :
                      find_type(lib, start, var_len, (bool) 0);
      if (!find)
      {
        *err_pos= (char*) start;
        *err_len= var_len;
        *set_warning= 1;
      }
      else
        found|= ((int64_t) 1 << (find - 1));
      if (pos >= end)
        break;
      start= pos + mblen;
    }
  }
  return found;
}


/*
  Function to find a string in a TYPELIB
  (Same format as mysys/typelib.c)

  SYNOPSIS
   find_type()
   lib			TYPELIB (struct of pointer to values + count)
   find			String to find
   length		Length of string to find
   part_match		Allow part matching of value

 RETURN
  0 error
  > 0 position in TYPELIB->type_names +1
*/

uint32_t find_type(const TYPELIB *lib, const char *find, uint32_t length,
               bool part_match)
{
  uint32_t found_count=0, found_pos=0;
  const char *end= find+length;
  const char *i;
  const char *j;
  for (uint32_t pos=0 ; (j=lib->type_names[pos++]) ; )
  {
    for (i=find ; i != end &&
	   my_toupper(system_charset_info,*i) ==
	   my_toupper(system_charset_info,*j) ; i++, j++) ;
    if (i == end)
    {
      if (! *j)
	return(pos);
      found_count++;
      found_pos= pos;
    }
  }
  return(found_count == 1 && part_match ? found_pos : 0);
}


/*
  Find a string in a list of strings according to collation

  SYNOPSIS
   find_type2()
   lib			TYPELIB (struct of pointer to values + count)
   x			String to find
   length               String length
   cs			Character set + collation to use for comparison

  NOTES

  RETURN
    0	No matching value
    >0  Offset+1 in typelib for matched string
*/

uint32_t find_type2(const TYPELIB *typelib, const char *x, uint32_t length,
                const CHARSET_INFO * const cs)
{
  int pos;
  const char *j;

  if (!typelib->count)
  {
    return(0);
  }

  for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
  {
    if (!my_strnncoll(cs, (const unsigned char*) x, length,
                          (const unsigned char*) j, typelib->type_lengths[pos]))
      return(pos+1);
  }
  return(0);
} /* find_type */


/*
  Un-hex all elements in a typelib

  SYNOPSIS
   unhex_type2()
   interval       TYPELIB (struct of pointer to values + lengths + count)

  NOTES

  RETURN
    N/A
*/

void unhex_type2(TYPELIB *interval)
{
  for (uint32_t pos= 0; pos < interval->count; pos++)
  {
    char *from, *to;
    for (from= to= (char*) interval->type_names[pos]; *from; )
    {
      /*
        Note, hexchar_to_int(*from++) doesn't work
        one some compilers, e.g. IRIX. Looks like a compiler
        bug in inline functions in combination with arguments
        that have a side effect. So, let's use from[0] and from[1]
        and increment 'from' by two later.
      */

      *to++= (char) (hexchar_to_int(from[0]) << 4) +
                     hexchar_to_int(from[1]);
      from+= 2;
    }
    interval->type_lengths[pos] /= 2;
  }
}


/*
  Check if the first word in a string is one of the ones in TYPELIB

  SYNOPSIS
    check_word()
    lib		TYPELIB
    val		String to check
    end		End of input
    end_of_word	Store value of last used byte here if we found word

  RETURN
    0	 No matching value
    > 1  lib->type_names[#-1] matched
	 end_of_word will point to separator character/end in 'val'
*/

uint32_t check_word(TYPELIB *lib, const char *val, const char *end,
		const char **end_of_word)
{
  int res;
  const char *ptr;

  /* Fiend end of word */
  for (ptr= val ; ptr < end && my_isalpha(&my_charset_utf8_general_ci, *ptr) ; ptr++)
    ;
  if ((res=find_type(lib, val, (uint32_t) (ptr - val), 1)) > 0)
    *end_of_word= ptr;
  return res;
}


/*
  Searches for a LEX_STRING in an LEX_STRING array.

  SYNOPSIS
    find_string_in_array()
      heap    The array
      needle  The string to search for

  NOTE
    The last LEX_STRING in the array should have str member set to NULL

  RETURN VALUES
    -1   Not found
    >=0  Ordinal position
*/

int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
                         const CHARSET_INFO * const cs)
{
  const LEX_STRING *pos;
  for (pos= haystack; pos->str; pos++)
    if (!cs->coll->strnncollsp(cs, (unsigned char *) pos->str, pos->length,
                               (unsigned char *) needle->str, needle->length, 0))
    {
      return (pos - haystack);
    }
  return -1;
}

1 by brian clean slate	1	/* Copyright (C) 2003 MySQL AB
	2
	3	This program is free software; you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation; version 2 of the License.
	6
	7	This program is distributed in the hope that it will be useful,
	8	but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	GNU General Public License for more details.
	11
	12	You should have received a copy of the GNU General Public License
	13	along with this program; if not, write to the Free Software
	14	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
	15
	16	/* Some useful string utility functions used by the MySQL server */
243.1.17 by Jay Pipes FINAL PHASE removal of mysql_priv.h (Bye, bye my friend.)	17	#include <drizzled/server_includes.h>
1 by brian clean slate	18
	19	/*
	20	Return bitmap for strings used in a set
	21
	22	SYNOPSIS
	23	find_set()
	24	lib Strings in set
	25	str Strings of set-strings separated by ','
	26	err_pos If error, set to point to start of wrong set string
	27	err_len If error, set to the length of wrong set string
	28	set_warning Set to 1 if some string in set couldn't be used
	29
	30	NOTE
	31	We delete all end space from str before comparison
	32
	33	RETURN
	34	bitmap of all sets found in x.
	35	set_warning is set to 1 if there was any sets that couldn't be set
	36	*/
	37
	38	static const char field_separator=',';
	39
482 by Brian Aker Remove uint.	40	uint64_t find_set(TYPELIB lib, const char str, uint32_t length,
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	41	const CHARSET_INFO * const cs,
482 by Brian Aker Remove uint.	42	char *err_pos, uint32_t err_len, bool *set_warning)
1 by brian clean slate	43	{
383.1.12 by Brian Aker Much closer toward UTF8 being around all the time...	44	const CHARSET_INFO * const strip= cs ? cs : &my_charset_utf8_general_ci;
1 by brian clean slate	45	const char *end= str + strip->cset->lengthsp(strip, str, length);
151 by Brian Aker Ulonglong to uint64_t	46	uint64_t found= 0;
1 by brian clean slate	47	*err_pos= 0; // No error yet
	48	if (str != end)
	49	{
660.1.3 by Eric Herman removed trailing whitespace with simple script:	50	const char *start= str;
1 by brian clean slate	51	for (;;)
	52	{
	53	const char *pos= start;
482 by Brian Aker Remove uint.	54	uint32_t var_len;
1 by brian clean slate	55	int mblen= 1;
1 by brian clean slate	56
1054.2.12 by Monty Taylor First pass at removing strconvert.	57	for (; pos != end && *pos != field_separator; pos++)
	58	{}
895 by Brian Aker Completion (?) of uint conversion.	59	var_len= (uint32_t) (pos - start);
482 by Brian Aker Remove uint.	60	uint32_t find= cs ? find_type2(lib, start, var_len, cs) :
1 by brian clean slate	61	find_type(lib, start, var_len, (bool) 0);
	62	if (!find)
	63	{
	64	err_pos= (char) start;
	65	*err_len= var_len;
	66	*set_warning= 1;
	67	}
	68	else
152 by Brian Aker longlong replacement	69	found\|= ((int64_t) 1 << (find - 1));
1 by brian clean slate	70	if (pos >= end)
	71	break;
	72	start= pos + mblen;
	73	}
	74	}
	75	return found;
	76	}
	77
	78
	79	/*
	80	Function to find a string in a TYPELIB
	81	(Same format as mysys/typelib.c)
	82
	83	SYNOPSIS
	84	find_type()
	85	lib TYPELIB (struct of pointer to values + count)
	86	find String to find
	87	length Length of string to find
	88	part_match Allow part matching of value
	89
	90	RETURN
	91	0 error
	92	> 0 position in TYPELIB->type_names +1
	93	*/
	94
482 by Brian Aker Remove uint.	95	uint32_t find_type(const TYPELIB lib, const char find, uint32_t length,
1 by brian clean slate	96	bool part_match)
1 by brian clean slate	97	{
482 by Brian Aker Remove uint.	98	uint32_t found_count=0, found_pos=0;
1 by brian clean slate	99	const char *end= find+length;
	100	const char *i;
	101	const char *j;
482 by Brian Aker Remove uint.	102	for (uint32_t pos=0 ; (j=lib->type_names[pos++]) ; )
1 by brian clean slate	103	{
660.1.3 by Eric Herman removed trailing whitespace with simple script:	104	for (i=find ; i != end &&
	105	my_toupper(system_charset_info,*i) ==
1 by brian clean slate	106	my_toupper(system_charset_info,*j) ; i++, j++) ;
	107	if (i == end)
	108	{
	109	if (! *j)
	110	return(pos);
	111	found_count++;
	112	found_pos= pos;
	113	}
	114	}
	115	return(found_count == 1 && part_match ? found_pos : 0);
	116	}
	117
	118
	119	/*
	120	Find a string in a list of strings according to collation
	121
	122	SYNOPSIS
	123	find_type2()
	124	lib TYPELIB (struct of pointer to values + count)
	125	x String to find
	126	length String length
	127	cs Character set + collation to use for comparison
	128
	129	NOTES
	130
	131	RETURN
	132	0 No matching value
	133	>0 Offset+1 in typelib for matched string
	134	*/
	135
482 by Brian Aker Remove uint.	136	uint32_t find_type2(const TYPELIB typelib, const char x, uint32_t length,
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	137	const CHARSET_INFO * const cs)
1 by brian clean slate	138	{
	139	int pos;
	140	const char *j;
	141
	142	if (!typelib->count)
	143	{
51.1.69 by Jay Pipes Removed/replaced DBUG symbols	144	return(0);
1 by brian clean slate	145	}
	146
	147	for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
	148	{
481 by Brian Aker Remove all of uchar.	149	if (!my_strnncoll(cs, (const unsigned char*) x, length,
481 by Brian Aker Remove all of uchar.	150	(const unsigned char*) j, typelib->type_lengths[pos]))
51.1.69 by Jay Pipes Removed/replaced DBUG symbols	151	return(pos+1);
1 by brian clean slate	152	}
51.1.69 by Jay Pipes Removed/replaced DBUG symbols	153	return(0);
1 by brian clean slate	154	} /* find_type */
	155
	156
	157	/*
	158	Un-hex all elements in a typelib
	159
	160	SYNOPSIS
	161	unhex_type2()
	162	interval TYPELIB (struct of pointer to values + lengths + count)
	163
	164	NOTES
	165
	166	RETURN
	167	N/A
	168	*/
	169
	170	void unhex_type2(TYPELIB *interval)
	171	{
482 by Brian Aker Remove uint.	172	for (uint32_t pos= 0; pos < interval->count; pos++)
1 by brian clean slate	173	{
	174	char from, to;
	175	for (from= to= (char) interval->type_names[pos]; from; )
	176	{
	177	/*
	178	Note, hexchar_to_int(*from++) doesn't work
	179	one some compilers, e.g. IRIX. Looks like a compiler
	180	bug in inline functions in combination with arguments
	181	that have a side effect. So, let's use from[0] and from[1]
	182	and increment 'from' by two later.
	183	*/
	184
	185	*to++= (char) (hexchar_to_int(from[0]) << 4) +
	186	hexchar_to_int(from[1]);
	187	from+= 2;
	188	}
	189	interval->type_lengths[pos] /= 2;
	190	}
	191	}
	192
	193
	194	/*
	195	Check if the first word in a string is one of the ones in TYPELIB
	196
	197	SYNOPSIS
	198	check_word()
	199	lib TYPELIB
	200	val String to check
	201	end End of input
	202	end_of_word Store value of last used byte here if we found word
	203
	204	RETURN
	205	0 No matching value
	206	> 1 lib->type_names[#-1] matched
	207	end_of_word will point to separator character/end in 'val'
	208	*/
	209
482 by Brian Aker Remove uint.	210	uint32_t check_word(TYPELIB lib, const char val, const char *end,
1 by brian clean slate	211	const char **end_of_word)
	212	{
	213	int res;
	214	const char *ptr;
	215
	216	/* Fiend end of word */
383.1.12 by Brian Aker Much closer toward UTF8 being around all the time...	217	for (ptr= val ; ptr < end && my_isalpha(&my_charset_utf8_general_ci, *ptr) ; ptr++)
1 by brian clean slate	218	;
895 by Brian Aker Completion (?) of uint conversion.	219	if ((res=find_type(lib, val, (uint32_t) (ptr - val), 1)) > 0)
1 by brian clean slate	220	*end_of_word= ptr;
	221	return res;
	222	}
	223
	224
	225	/*
	226	Searches for a LEX_STRING in an LEX_STRING array.
	227
	228	SYNOPSIS
	229	find_string_in_array()
	230	heap The array
	231	needle The string to search for
	232
	233	NOTE
	234	The last LEX_STRING in the array should have str member set to NULL
	235
	236	RETURN VALUES
	237	-1 Not found
	238	>=0 Ordinal position
	239	*/
	240
	241	int find_string_in_array(LEX_STRING * const haystack, LEX_STRING * const needle,
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	242	const CHARSET_INFO * const cs)
1 by brian clean slate	243	{
	244	const LEX_STRING *pos;
	245	for (pos= haystack; pos->str; pos++)
481 by Brian Aker Remove all of uchar.	246	if (!cs->coll->strnncollsp(cs, (unsigned char *) pos->str, pos->length,
481 by Brian Aker Remove all of uchar.	247	(unsigned char *) needle->str, needle->length, 0))
1 by brian clean slate	248	{
	249	return (pos - haystack);
	250	}
	251	return -1;
	252	}