~drizzle-trunk/drizzle/development : contents of mysys/charset.c at revision 505

~drizzle-trunk/drizzle/development : (revision 505)

/* Copyright (C) 2000 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#include "mysys_priv.h"
#include "mysys_err.h"
#include <mystrings/m_ctype.h>
#include <mystrings/m_string.h>
#include <my_dir.h>


/*
  The code below implements this functionality:
  
    - Initializing charset related structures
    - Loading dynamic charsets
    - Searching for a proper CHARSET_INFO 
      using charset name, collation name or collation ID
    - Setting server default character set
*/

bool my_charset_same(const CHARSET_INFO *cs1, const CHARSET_INFO *cs2)
{
  return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
}


static uint
get_collation_number_internal(const char *name)
{
  CHARSET_INFO **cs;
  for (cs= all_charsets;
       cs < all_charsets+array_elements(all_charsets)-1 ;
       cs++)
  {
    if ( cs[0] && cs[0]->name && 
         !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name))
      return cs[0]->number;
  }  
  return 0;
}


static bool init_state_maps(CHARSET_INFO *cs)
{
  uint32_t i;
  unsigned char *state_map;
  unsigned char *ident_map;

  if (!(cs->state_map= (unsigned char*) my_once_alloc(256, MYF(MY_WME))))
    return 1;
    
  if (!(cs->ident_map= (unsigned char*) my_once_alloc(256, MYF(MY_WME))))
    return 1;

  state_map= cs->state_map;
  ident_map= cs->ident_map;
  
  /* Fill state_map with states to get a faster parser */
  for (i=0; i < 256 ; i++)
  {
    if (my_isalpha(cs,i))
      state_map[i]=(unsigned char) MY_LEX_IDENT;
    else if (my_isdigit(cs,i))
      state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT;
#if defined(USE_MB) && defined(USE_MB_IDENT)
    else if (my_mbcharlen(cs, i)>1)
      state_map[i]=(unsigned char) MY_LEX_IDENT;
#endif
    else if (my_isspace(cs,i))
      state_map[i]=(unsigned char) MY_LEX_SKIP;
    else
      state_map[i]=(unsigned char) MY_LEX_CHAR;
  }
  state_map[(unsigned char)'_']=state_map[(unsigned char)'$']=(unsigned char) MY_LEX_IDENT;
  state_map[(unsigned char)'\'']=(unsigned char) MY_LEX_STRING;
  state_map[(unsigned char)'.']=(unsigned char) MY_LEX_REAL_OR_POINT;
  state_map[(unsigned char)'>']=state_map[(unsigned char)'=']=state_map[(unsigned char)'!']= (unsigned char) MY_LEX_CMP_OP;
  state_map[(unsigned char)'<']= (unsigned char) MY_LEX_LONG_CMP_OP;
  state_map[(unsigned char)'&']=state_map[(unsigned char)'|']=(unsigned char) MY_LEX_BOOL;
  state_map[(unsigned char)'#']=(unsigned char) MY_LEX_COMMENT;
  state_map[(unsigned char)';']=(unsigned char) MY_LEX_SEMICOLON;
  state_map[(unsigned char)':']=(unsigned char) MY_LEX_SET_VAR;
  state_map[0]=(unsigned char) MY_LEX_EOL;
  state_map[(unsigned char)'\\']= (unsigned char) MY_LEX_ESCAPE;
  state_map[(unsigned char)'/']= (unsigned char) MY_LEX_LONG_COMMENT;
  state_map[(unsigned char)'*']= (unsigned char) MY_LEX_END_LONG_COMMENT;
  state_map[(unsigned char)'@']= (unsigned char) MY_LEX_USER_END;
  state_map[(unsigned char) '`']= (unsigned char) MY_LEX_USER_VARIABLE_DELIMITER;
  state_map[(unsigned char)'"']= (unsigned char) MY_LEX_STRING_OR_DELIMITER;

  /*
    Create a second map to make it faster to find identifiers
  */
  for (i=0; i < 256 ; i++)
  {
    ident_map[i]= (unsigned char) (state_map[i] == MY_LEX_IDENT ||
			   state_map[i] == MY_LEX_NUMBER_IDENT);
  }

  /* Special handling of hex and binary strings */
  state_map[(unsigned char)'x']= state_map[(unsigned char)'X']= (unsigned char) MY_LEX_IDENT_OR_HEX;
  state_map[(unsigned char)'b']= state_map[(unsigned char)'B']= (unsigned char) MY_LEX_IDENT_OR_BIN;
  return 0;
}


#define MY_MAX_ALLOWED_BUF 1024*1024
#define MY_CHARSET_INDEX "Index.xml"

const char *charsets_dir= NULL;
static int charset_initialized=0;


char *get_charsets_dir(char *buf)
{
  const char *sharedir= SHAREDIR;
  char *res;

  if (charsets_dir != NULL)
    strmake(buf, charsets_dir, FN_REFLEN-1);
  else
  {
    if (test_if_hard_path(sharedir) ||
	is_prefix(sharedir, DEFAULT_CHARSET_HOME))
      strxmov(buf, sharedir, "/", CHARSET_DIR, NULL);
    else
      strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
	      NULL);
  }
  res= convert_dirname(buf,buf,NULL);
  return(res);
}

CHARSET_INFO *all_charsets[256];
const CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;

void add_compiled_collation(CHARSET_INFO * cs)
{
  all_charsets[cs->number]= cs;
  cs->state|= MY_CS_AVAILABLE;
}

static void *cs_alloc(size_t size)
{
  return my_once_alloc(size, MYF(MY_WME));
}


static bool init_available_charsets(myf myflags)
{
  char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
  bool error=false;
  /*
    We have to use charset_initialized to not lock on THR_LOCK_charset
    inside get_internal_charset...
  */
  if (!charset_initialized)
  {
    CHARSET_INFO **cs;
    /*
      To make things thread safe we are not allowing other threads to interfere
      while we may changing the cs_info_table
    */
    pthread_mutex_lock(&THR_LOCK_charset);
    if (!charset_initialized)
    {
      memset(&all_charsets, 0, sizeof(all_charsets));
      init_compiled_charsets(myflags);
      
      /* Copy compiled charsets */
      for (cs=all_charsets;
           cs < all_charsets+array_elements(all_charsets)-1 ;
           cs++)
      {
        if (*cs)
        {
          if (cs[0]->ctype)
            if (init_state_maps(*cs))
              *cs= NULL;
        }
      }
      
      my_stpcpy(get_charsets_dir(fname), MY_CHARSET_INDEX);
      charset_initialized=1;
    }
    pthread_mutex_unlock(&THR_LOCK_charset);
  }
  return error;
}


void free_charsets(void)
{
  charset_initialized=0;
}


uint32_t get_collation_number(const char *name)
{
  init_available_charsets(MYF(0));
  return get_collation_number_internal(name);
}


uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags)
{
  CHARSET_INFO **cs;
  init_available_charsets(MYF(0));
  
  for (cs= all_charsets;
       cs < all_charsets+array_elements(all_charsets)-1 ;
       cs++)
  {
    if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
         !my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name))
      return cs[0]->number;
  }  
  return 0;
}


const char *get_charset_name(uint32_t charset_number)
{
  const CHARSET_INFO *cs;
  init_available_charsets(MYF(0));

  cs=all_charsets[charset_number];
  if (cs && (cs->number == charset_number) && cs->name )
    return (char*) cs->name;
  
  return (char*) "?";   /* this mimics find_type() */
}


static const CHARSET_INFO *get_internal_charset(uint32_t cs_number)
{
  CHARSET_INFO *cs;
  /*
    To make things thread safe we are not allowing other threads to interfere
    while we may changing the cs_info_table
  */
  pthread_mutex_lock(&THR_LOCK_charset);
  if ((cs= all_charsets[cs_number]))
  {
    if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED))
    {
      assert(0);
    }
    cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
  }
  if (cs && !(cs->state & MY_CS_READY))
  {
    if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
        (cs->coll->init && cs->coll->init(cs, cs_alloc)))
      cs= NULL;
    else
      cs->state|= MY_CS_READY;
  }
  pthread_mutex_unlock(&THR_LOCK_charset);
  return cs;
}


const const CHARSET_INFO *get_charset(uint32_t cs_number, myf flags)
{
  const CHARSET_INFO *cs;
  if (cs_number == default_charset_info->number)
    return default_charset_info;

  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */
  
  if (!cs_number || cs_number >= array_elements(all_charsets)-1)
    return NULL;
  
  cs= get_internal_charset(cs_number);

  if (!cs && (flags & MY_WME))
  {
    char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];
    my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
    cs_string[0]='#';
    int10_to_str(cs_number, cs_string+1, 10);
    my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
  }
  return cs;
}

const CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
{
  uint32_t cs_number;
  const CHARSET_INFO *cs;
  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */

  cs_number=get_collation_number(cs_name);
  cs= cs_number ? get_internal_charset(cs_number) : NULL;

  if (!cs && (flags & MY_WME))
  {
    char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
    my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
    my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file);
  }

  return cs;
}


const CHARSET_INFO *get_charset_by_csname(const char *cs_name,
				    uint32_t cs_flags,
				    myf flags)
{
  uint32_t cs_number;
  const CHARSET_INFO *cs;

  (void) init_available_charsets(MYF(0));	/* If it isn't initialized */

  cs_number= get_charset_number(cs_name, cs_flags);
  cs= cs_number ? get_internal_charset(cs_number) : NULL;

  if (!cs && (flags & MY_WME))
  {
    char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
    my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
    my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
  }

  return(cs);
}


/**
  Resolve character set by the character set name (utf8, latin1, ...).

  The function tries to resolve character set by the specified name. If
  there is character set with the given name, it is assigned to the "cs"
  parameter and false is returned. If there is no such character set,
  "default_cs" is assigned to the "cs" and true is returned.

  @param[in] cs_name    Character set name.
  @param[in] default_cs Default character set.
  @param[out] cs        Variable to store character set.

  @return false if character set was resolved successfully; true if there
  is no character set with given name.
*/

bool resolve_charset(const char *cs_name,
                     const CHARSET_INFO *default_cs,
                     const CHARSET_INFO **cs)
{
  *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));

  if (*cs == NULL)
  {
    *cs= default_cs;
    return true;
  }

  return false;
}


/**
  Resolve collation by the collation name (utf8_general_ci, ...).

  The function tries to resolve collation by the specified name. If there
  is collation with the given name, it is assigned to the "cl" parameter
  and false is returned. If there is no such collation, "default_cl" is
  assigned to the "cl" and true is returned.

  @param[out] cl        Variable to store collation.
  @param[in] cl_name    Collation name.
  @param[in] default_cl Default collation.

  @return false if collation was resolved successfully; true if there is no
  collation with given name.
*/

bool resolve_collation(const char *cl_name,
                       const CHARSET_INFO *default_cl,
                       const CHARSET_INFO **cl)
{
  *cl= get_charset_by_name(cl_name, MYF(0));

  if (*cl == NULL)
  {
    *cl= default_cl;
    return true;
  }

  return false;
}


#ifdef BACKSLASH_MBTAIL
static CHARSET_INFO *fs_cset_cache= NULL;

CHARSET_INFO *fs_character_set()
{
  if (!fs_cset_cache)
  {
    char buf[10]= "cp";
    GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
                  buf+2, sizeof(buf)-3);
    /*
      We cannot call get_charset_by_name here
      because fs_character_set() is executed before
      LOCK_THD_charset mutex initialization, which
      is used inside get_charset_by_name.
      As we're now interested in cp932 only,
      let's just detect it using strcmp().
    */
    fs_cset_cache= !strcmp(buf, "cp932") ?
                   &my_charset_cp932_japanese_ci : &my_charset_bin;
  }
  return fs_cset_cache;
}
#endif

/*
  Escape apostrophes by doubling them up

  SYNOPSIS
    escape_quotes_for_drizzle()
    charset_info        Charset of the strings
    to                  Buffer for escaped string
    to_length           Length of destination buffer, or 0
    from                The string to escape
    length              The length of the string to escape

  DESCRIPTION
    This escapes the contents of a string by doubling up any apostrophes that
    it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
    effect on the server.

  NOTE
    To be consistent with escape_string_for_mysql(), to_length may be 0 to
    mean "big enough"

  RETURN VALUES
    UINT32_MAX  The escaped string did not fit in the to buffer
    >=0         The length of the escaped string
*/

size_t escape_quotes_for_drizzle(const CHARSET_INFO *charset_info,
                                 char *to, size_t to_length,
                                 const char *from, size_t length)
{
  const char *to_start= to;
  const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
  bool overflow= false;
#ifdef USE_MB
  bool use_mb_flag= use_mb(charset_info);
#endif
  for (end= from + length; from < end; from++)
  {
#ifdef USE_MB
    int tmp_length;
    if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
    {
      if (to + tmp_length > to_end)
      {
        overflow= true;
        break;
      }
      while (tmp_length--)
	*to++= *from++;
      from--;
      continue;
    }
    /*
      We don't have the same issue here with a non-multi-byte character being
      turned into a multi-byte character by the addition of an escaping
      character, because we are only escaping the ' character with itself.
     */
#endif
    if (*from == '\'')
    {
      if (to + 2 > to_end)
      {
        overflow= true;
        break;
      }
      *to++= '\'';
      *to++= '\'';
    }
    else
    {
      if (to + 1 > to_end)
      {
        overflow= true;
        break;
      }
      *to++= *from;
    }
  }
  *to= 0;
  return overflow ? UINT32_MAX : (uint32_t) (to - to_start);
}

1 by brian clean slate	1	/* Copyright (C) 2000 MySQL AB
	2
	3	This program is free software; you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation; version 2 of the License.
	6
	7	This program is distributed in the hope that it will be useful,
	8	but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	GNU General Public License for more details.
	11
	12	You should have received a copy of the GNU General Public License
	13	along with this program; if not, write to the Free Software
	14	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
	15
	16	#include "mysys_priv.h"
	17	#include "mysys_err.h"
212.5.18 by Monty Taylor Moved m_ctype, m_string and my_bitmap. Removed t_ctype.	18	#include <mystrings/m_ctype.h>
	19	#include <mystrings/m_string.h>
1 by brian clean slate	20	#include <my_dir.h>
	21
	22
	23	/*
	24	The code below implements this functionality:
	25
	26	- Initializing charset related structures
	27	- Loading dynamic charsets
	28	- Searching for a proper CHARSET_INFO
	29	using charset name, collation name or collation ID
	30	- Setting server default character set
	31	*/
	32
236.3.9 by Andrey Hristov - Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)	33	bool my_charset_same(const CHARSET_INFO cs1, const CHARSET_INFO cs2)
1 by brian clean slate	34	{
	35	return ((cs1 == cs2) \|\| !strcmp(cs1->csname,cs2->csname));
	36	}
	37
	38
	39	static uint
	40	get_collation_number_internal(const char *name)
	41	{
	42	CHARSET_INFO **cs;
	43	for (cs= all_charsets;
	44	cs < all_charsets+array_elements(all_charsets)-1 ;
	45	cs++)
	46	{
	47	if ( cs[0] && cs[0]->name &&
383.1.12 by Brian Aker Much closer toward UTF8 being around all the time...	48	!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->name, name))
1 by brian clean slate	49	return cs[0]->number;
	50	}
	51	return 0;
	52	}
	53
	54
146 by Brian Aker my_bool cleanup.	55	static bool init_state_maps(CHARSET_INFO *cs)
1 by brian clean slate	56	{
482 by Brian Aker Remove uint.	57	uint32_t i;
481 by Brian Aker Remove all of uchar.	58	unsigned char *state_map;
481 by Brian Aker Remove all of uchar.	59	unsigned char *ident_map;
1 by brian clean slate	60
481 by Brian Aker Remove all of uchar.	61	if (!(cs->state_map= (unsigned char*) my_once_alloc(256, MYF(MY_WME))))
1 by brian clean slate	62	return 1;
1 by brian clean slate	63
481 by Brian Aker Remove all of uchar.	64	if (!(cs->ident_map= (unsigned char*) my_once_alloc(256, MYF(MY_WME))))
1 by brian clean slate	65	return 1;
	66
	67	state_map= cs->state_map;
	68	ident_map= cs->ident_map;
	69
	70	/* Fill state_map with states to get a faster parser */
	71	for (i=0; i < 256 ; i++)
	72	{
	73	if (my_isalpha(cs,i))
481 by Brian Aker Remove all of uchar.	74	state_map[i]=(unsigned char) MY_LEX_IDENT;
1 by brian clean slate	75	else if (my_isdigit(cs,i))
481 by Brian Aker Remove all of uchar.	76	state_map[i]=(unsigned char) MY_LEX_NUMBER_IDENT;
1 by brian clean slate	77	#if defined(USE_MB) && defined(USE_MB_IDENT)
1 by brian clean slate	78	else if (my_mbcharlen(cs, i)>1)
481 by Brian Aker Remove all of uchar.	79	state_map[i]=(unsigned char) MY_LEX_IDENT;
1 by brian clean slate	80	#endif
1 by brian clean slate	81	else if (my_isspace(cs,i))
481 by Brian Aker Remove all of uchar.	82	state_map[i]=(unsigned char) MY_LEX_SKIP;
1 by brian clean slate	83	else
481 by Brian Aker Remove all of uchar.	84	state_map[i]=(unsigned char) MY_LEX_CHAR;
1 by brian clean slate	85	}
481 by Brian Aker Remove all of uchar.	86	state_map[(unsigned char)'_']=state_map[(unsigned char)'$']=(unsigned char) MY_LEX_IDENT;
	87	state_map[(unsigned char)'\'']=(unsigned char) MY_LEX_STRING;
	88	state_map[(unsigned char)'.']=(unsigned char) MY_LEX_REAL_OR_POINT;
	89	state_map[(unsigned char)'>']=state_map[(unsigned char)'=']=state_map[(unsigned char)'!']= (unsigned char) MY_LEX_CMP_OP;
	90	state_map[(unsigned char)'<']= (unsigned char) MY_LEX_LONG_CMP_OP;
	91	state_map[(unsigned char)'&']=state_map[(unsigned char)'\|']=(unsigned char) MY_LEX_BOOL;
	92	state_map[(unsigned char)'#']=(unsigned char) MY_LEX_COMMENT;
	93	state_map[(unsigned char)';']=(unsigned char) MY_LEX_SEMICOLON;
	94	state_map[(unsigned char)':']=(unsigned char) MY_LEX_SET_VAR;
	95	state_map[0]=(unsigned char) MY_LEX_EOL;
	96	state_map[(unsigned char)'\\']= (unsigned char) MY_LEX_ESCAPE;
	97	state_map[(unsigned char)'/']= (unsigned char) MY_LEX_LONG_COMMENT;
	98	state_map[(unsigned char)'*']= (unsigned char) MY_LEX_END_LONG_COMMENT;
	99	state_map[(unsigned char)'@']= (unsigned char) MY_LEX_USER_END;
	100	state_map[(unsigned char) '`']= (unsigned char) MY_LEX_USER_VARIABLE_DELIMITER;
	101	state_map[(unsigned char)'"']= (unsigned char) MY_LEX_STRING_OR_DELIMITER;
1 by brian clean slate	102
	103	/*
	104	Create a second map to make it faster to find identifiers
	105	*/
	106	for (i=0; i < 256 ; i++)
	107	{
481 by Brian Aker Remove all of uchar.	108	ident_map[i]= (unsigned char) (state_map[i] == MY_LEX_IDENT \|\|
1 by brian clean slate	109	state_map[i] == MY_LEX_NUMBER_IDENT);
	110	}
	111
	112	/* Special handling of hex and binary strings */
481 by Brian Aker Remove all of uchar.	113	state_map[(unsigned char)'x']= state_map[(unsigned char)'X']= (unsigned char) MY_LEX_IDENT_OR_HEX;
481 by Brian Aker Remove all of uchar.	114	state_map[(unsigned char)'b']= state_map[(unsigned char)'B']= (unsigned char) MY_LEX_IDENT_OR_BIN;
1 by brian clean slate	115	return 0;
	116	}
	117
	118
	119	#define MY_MAX_ALLOWED_BUF 1024*1024
	120	#define MY_CHARSET_INDEX "Index.xml"
	121
	122	const char *charsets_dir= NULL;
	123	static int charset_initialized=0;
	124
	125
	126	char get_charsets_dir(char buf)
	127	{
	128	const char *sharedir= SHAREDIR;
	129	char *res;
	130
	131	if (charsets_dir != NULL)
	132	strmake(buf, charsets_dir, FN_REFLEN-1);
	133	else
	134	{
	135	if (test_if_hard_path(sharedir) \|\|
	136	is_prefix(sharedir, DEFAULT_CHARSET_HOME))
461 by Monty Taylor Removed NullS. bu-bye.	137	strxmov(buf, sharedir, "/", CHARSET_DIR, NULL);
1 by brian clean slate	138	else
1 by brian clean slate	139	strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
461 by Monty Taylor Removed NullS. bu-bye.	140	NULL);
1 by brian clean slate	141	}
461 by Monty Taylor Removed NullS. bu-bye.	142	res= convert_dirname(buf,buf,NULL);
51.3.22 by Jay Pipes Final round of removal of DBUG in mysys/, including Makefile	143	return(res);
1 by brian clean slate	144	}
	145
	146	CHARSET_INFO *all_charsets[256];
383.1.12 by Brian Aker Much closer toward UTF8 being around all the time...	147	const CHARSET_INFO *default_charset_info = &my_charset_utf8_general_ci;
1 by brian clean slate	148
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	149	void add_compiled_collation(CHARSET_INFO * cs)
1 by brian clean slate	150	{
	151	all_charsets[cs->number]= cs;
	152	cs->state\|= MY_CS_AVAILABLE;
	153	}
	154
	155	static void *cs_alloc(size_t size)
	156	{
	157	return my_once_alloc(size, MYF(MY_WME));
	158	}
	159
	160
146 by Brian Aker my_bool cleanup.	161	static bool init_available_charsets(myf myflags)
1 by brian clean slate	162	{
1 by brian clean slate	163	char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
163 by Brian Aker Merge Monty's code.	164	bool error=false;
1 by brian clean slate	165	/*
	166	We have to use charset_initialized to not lock on THR_LOCK_charset
	167	inside get_internal_charset...
	168	*/
	169	if (!charset_initialized)
	170	{
	171	CHARSET_INFO **cs;
	172	/*
	173	To make things thread safe we are not allowing other threads to interfere
	174	while we may changing the cs_info_table
	175	*/
	176	pthread_mutex_lock(&THR_LOCK_charset);
	177	if (!charset_initialized)
	178	{
212.6.1 by Mats Kindahl Replacing all bzero() calls with memset() calls and removing the bzero.c file.	179	memset(&all_charsets, 0, sizeof(all_charsets));
1 by brian clean slate	180	init_compiled_charsets(myflags);
	181
	182	/* Copy compiled charsets */
	183	for (cs=all_charsets;
	184	cs < all_charsets+array_elements(all_charsets)-1 ;
	185	cs++)
	186	{
	187	if (*cs)
	188	{
	189	if (cs[0]->ctype)
	190	if (init_state_maps(*cs))
	191	*cs= NULL;
	192	}
	193	}
	194
411.1.1 by Brian Aker Work on removing GNU specific calls.	195	my_stpcpy(get_charsets_dir(fname), MY_CHARSET_INDEX);
1 by brian clean slate	196	charset_initialized=1;
	197	}
	198	pthread_mutex_unlock(&THR_LOCK_charset);
	199	}
	200	return error;
	201	}
	202
	203
	204	void free_charsets(void)
	205	{
	206	charset_initialized=0;
	207	}
	208
	209
482 by Brian Aker Remove uint.	210	uint32_t get_collation_number(const char *name)
1 by brian clean slate	211	{
	212	init_available_charsets(MYF(0));
	213	return get_collation_number_internal(name);
	214	}
	215
	216
482 by Brian Aker Remove uint.	217	uint32_t get_charset_number(const char *charset_name, uint32_t cs_flags)
1 by brian clean slate	218	{
	219	CHARSET_INFO **cs;
	220	init_available_charsets(MYF(0));
	221
	222	for (cs= all_charsets;
	223	cs < all_charsets+array_elements(all_charsets)-1 ;
	224	cs++)
	225	{
	226	if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
383.1.12 by Brian Aker Much closer toward UTF8 being around all the time...	227	!my_strcasecmp(&my_charset_utf8_general_ci, cs[0]->csname, charset_name))
1 by brian clean slate	228	return cs[0]->number;
	229	}
	230	return 0;
	231	}
	232
	233
482 by Brian Aker Remove uint.	234	const char *get_charset_name(uint32_t charset_number)
1 by brian clean slate	235	{
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	236	const CHARSET_INFO *cs;
1 by brian clean slate	237	init_available_charsets(MYF(0));
	238
	239	cs=all_charsets[charset_number];
	240	if (cs && (cs->number == charset_number) && cs->name )
	241	return (char*) cs->name;
	242
	243	return (char) "?"; / this mimics find_type() */
	244	}
	245
	246
482 by Brian Aker Remove uint.	247	static const CHARSET_INFO *get_internal_charset(uint32_t cs_number)
1 by brian clean slate	248	{
	249	CHARSET_INFO *cs;
	250	/*
	251	To make things thread safe we are not allowing other threads to interfere
	252	while we may changing the cs_info_table
	253	*/
	254	pthread_mutex_lock(&THR_LOCK_charset);
	255	if ((cs= all_charsets[cs_number]))
	256	{
	257	if (!(cs->state & MY_CS_COMPILED) && !(cs->state & MY_CS_LOADED))
	258	{
383.1.7 by Brian Aker Remove homebrew xml parser.	259	assert(0);
1 by brian clean slate	260	}
	261	cs= (cs->state & MY_CS_AVAILABLE) ? cs : NULL;
	262	}
	263	if (cs && !(cs->state & MY_CS_READY))
	264	{
	265	if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) \|\|
	266	(cs->coll->init && cs->coll->init(cs, cs_alloc)))
	267	cs= NULL;
	268	else
	269	cs->state\|= MY_CS_READY;
	270	}
	271	pthread_mutex_unlock(&THR_LOCK_charset);
	272	return cs;
	273	}
	274
	275
482 by Brian Aker Remove uint.	276	const const CHARSET_INFO *get_charset(uint32_t cs_number, myf flags)
1 by brian clean slate	277	{
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	278	const CHARSET_INFO *cs;
1 by brian clean slate	279	if (cs_number == default_charset_info->number)
	280	return default_charset_info;
	281
	282	(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
	283
	284	if (!cs_number \|\| cs_number >= array_elements(all_charsets)-1)
	285	return NULL;
	286
383.1.7 by Brian Aker Remove homebrew xml parser.	287	cs= get_internal_charset(cs_number);
1 by brian clean slate	288
	289	if (!cs && (flags & MY_WME))
	290	{
	291	char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];
411.1.1 by Brian Aker Work on removing GNU specific calls.	292	my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
1 by brian clean slate	293	cs_string[0]='#';
	294	int10_to_str(cs_number, cs_string+1, 10);
	295	my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
	296	}
	297	return cs;
	298	}
	299
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	300	const CHARSET_INFO get_charset_by_name(const char cs_name, myf flags)
1 by brian clean slate	301	{
482 by Brian Aker Remove uint.	302	uint32_t cs_number;
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	303	const CHARSET_INFO *cs;
1 by brian clean slate	304	(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
	305
	306	cs_number=get_collation_number(cs_name);
383.1.7 by Brian Aker Remove homebrew xml parser.	307	cs= cs_number ? get_internal_charset(cs_number) : NULL;
1 by brian clean slate	308
	309	if (!cs && (flags & MY_WME))
	310	{
	311	char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
411.1.1 by Brian Aker Work on removing GNU specific calls.	312	my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
1 by brian clean slate	313	my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file);
	314	}
	315
	316	return cs;
	317	}
	318
	319
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	320	const CHARSET_INFO get_charset_by_csname(const char cs_name,
482 by Brian Aker Remove uint.	321	uint32_t cs_flags,
1 by brian clean slate	322	myf flags)
1 by brian clean slate	323	{
482 by Brian Aker Remove uint.	324	uint32_t cs_number;
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	325	const CHARSET_INFO *cs;
1 by brian clean slate	326
	327	(void) init_available_charsets(MYF(0)); /* If it isn't initialized */
	328
	329	cs_number= get_charset_number(cs_name, cs_flags);
383.1.7 by Brian Aker Remove homebrew xml parser.	330	cs= cs_number ? get_internal_charset(cs_number) : NULL;
1 by brian clean slate	331
	332	if (!cs && (flags & MY_WME))
	333	{
	334	char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
411.1.1 by Brian Aker Work on removing GNU specific calls.	335	my_stpcpy(get_charsets_dir(index_file),MY_CHARSET_INDEX);
1 by brian clean slate	336	my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
	337	}
	338
51.3.22 by Jay Pipes Final round of removal of DBUG in mysys/, including Makefile	339	return(cs);
1 by brian clean slate	340	}
	341
	342
	343	/**
	344	Resolve character set by the character set name (utf8, latin1, ...).
	345
	346	The function tries to resolve character set by the specified name. If
	347	there is character set with the given name, it is assigned to the "cs"
163 by Brian Aker Merge Monty's code.	348	parameter and false is returned. If there is no such character set,
163 by Brian Aker Merge Monty's code.	349	"default_cs" is assigned to the "cs" and true is returned.
1 by brian clean slate	350
	351	@param[in] cs_name Character set name.
	352	@param[in] default_cs Default character set.
	353	@param[out] cs Variable to store character set.
	354
163 by Brian Aker Merge Monty's code.	355	@return false if character set was resolved successfully; true if there
1 by brian clean slate	356	is no character set with given name.
	357	*/
	358
146 by Brian Aker my_bool cleanup.	359	bool resolve_charset(const char *cs_name,
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	360	const CHARSET_INFO *default_cs,
	361	const CHARSET_INFO **cs)
1 by brian clean slate	362	{
	363	*cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));
	364
	365	if (*cs == NULL)
	366	{
	367	*cs= default_cs;
163 by Brian Aker Merge Monty's code.	368	return true;
1 by brian clean slate	369	}
1 by brian clean slate	370
163 by Brian Aker Merge Monty's code.	371	return false;
1 by brian clean slate	372	}
	373
	374
	375	/**
	376	Resolve collation by the collation name (utf8_general_ci, ...).
	377
	378	The function tries to resolve collation by the specified name. If there
	379	is collation with the given name, it is assigned to the "cl" parameter
163 by Brian Aker Merge Monty's code.	380	and false is returned. If there is no such collation, "default_cl" is
163 by Brian Aker Merge Monty's code.	381	assigned to the "cl" and true is returned.
1 by brian clean slate	382
	383	@param[out] cl Variable to store collation.
	384	@param[in] cl_name Collation name.
	385	@param[in] default_cl Default collation.
	386
163 by Brian Aker Merge Monty's code.	387	@return false if collation was resolved successfully; true if there is no
1 by brian clean slate	388	collation with given name.
	389	*/
	390
146 by Brian Aker my_bool cleanup.	391	bool resolve_collation(const char *cl_name,
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	392	const CHARSET_INFO *default_cl,
	393	const CHARSET_INFO **cl)
1 by brian clean slate	394	{
	395	*cl= get_charset_by_name(cl_name, MYF(0));
	396
	397	if (*cl == NULL)
	398	{
	399	*cl= default_cl;
163 by Brian Aker Merge Monty's code.	400	return true;
1 by brian clean slate	401	}
1 by brian clean slate	402
163 by Brian Aker Merge Monty's code.	403	return false;
1 by brian clean slate	404	}
	405
	406
	407	#ifdef BACKSLASH_MBTAIL
	408	static CHARSET_INFO *fs_cset_cache= NULL;
	409
	410	CHARSET_INFO *fs_character_set()
	411	{
	412	if (!fs_cset_cache)
	413	{
	414	char buf[10]= "cp";
	415	GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
	416	buf+2, sizeof(buf)-3);
	417	/*
	418	We cannot call get_charset_by_name here
	419	because fs_character_set() is executed before
	420	LOCK_THD_charset mutex initialization, which
	421	is used inside get_charset_by_name.
	422	As we're now interested in cp932 only,
	423	let's just detect it using strcmp().
	424	*/
	425	fs_cset_cache= !strcmp(buf, "cp932") ?
	426	&my_charset_cp932_japanese_ci : &my_charset_bin;
	427	}
	428	return fs_cset_cache;
	429	}
	430	#endif
	431
	432	/*
	433	Escape apostrophes by doubling them up
	434
	435	SYNOPSIS
236.3.4 by Andrey Hristov Rename escape_(string\|quotes)_for_mysql to escape_(string\|quotes)_for_drizzle	436	escape_quotes_for_drizzle()
1 by brian clean slate	437	charset_info Charset of the strings
	438	to Buffer for escaped string
	439	to_length Length of destination buffer, or 0
	440	from The string to escape
	441	length The length of the string to escape
	442
	443	DESCRIPTION
	444	This escapes the contents of a string by doubling up any apostrophes that
	445	it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
	446	effect on the server.
	447
	448	NOTE
	449	To be consistent with escape_string_for_mysql(), to_length may be 0 to
	450	mean "big enough"
	451
	452	RETURN VALUES
365.2.9 by Monty Taylor Got rid of all instances of ~0	453	UINT32_MAX The escaped string did not fit in the to buffer
1 by brian clean slate	454	>=0 The length of the escaped string
	455	*/
	456
236.3.9 by Andrey Hristov - Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)	457	size_t escape_quotes_for_drizzle(const CHARSET_INFO *charset_info,
236.3.4 by Andrey Hristov Rename escape_(string\|quotes)_for_mysql to escape_(string\|quotes)_for_drizzle	458	char *to, size_t to_length,
	459	const char *from, size_t length)
1 by brian clean slate	460	{
	461	const char *to_start= to;
	462	const char end, to_end=to_start + (to_length ? to_length-1 : 2*length);
163 by Brian Aker Merge Monty's code.	463	bool overflow= false;
1 by brian clean slate	464	#ifdef USE_MB
146 by Brian Aker my_bool cleanup.	465	bool use_mb_flag= use_mb(charset_info);
1 by brian clean slate	466	#endif
	467	for (end= from + length; from < end; from++)
	468	{
	469	#ifdef USE_MB
	470	int tmp_length;
	471	if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
	472	{
	473	if (to + tmp_length > to_end)
	474	{
163 by Brian Aker Merge Monty's code.	475	overflow= true;
1 by brian clean slate	476	break;
	477	}
	478	while (tmp_length--)
	479	to++= from++;
	480	from--;
	481	continue;
	482	}
	483	/*
	484	We don't have the same issue here with a non-multi-byte character being
	485	turned into a multi-byte character by the addition of an escaping
	486	character, because we are only escaping the ' character with itself.
	487	*/
	488	#endif
	489	if (*from == '\'')
	490	{
	491	if (to + 2 > to_end)
	492	{
163 by Brian Aker Merge Monty's code.	493	overflow= true;
1 by brian clean slate	494	break;
	495	}
	496	*to++= '\'';
	497	*to++= '\'';
	498	}
	499	else
	500	{
	501	if (to + 1 > to_end)
	502	{
163 by Brian Aker Merge Monty's code.	503	overflow= true;
1 by brian clean slate	504	break;
	505	}
	506	to++= from;
	507	}
	508	}
	509	*to= 0;
365.2.9 by Monty Taylor Got rid of all instances of ~0	510	return overflow ? UINT32_MAX : (uint32_t) (to - to_start);
1 by brian clean slate	511	}