~drizzle-trunk/drizzle/development : contents of mystrings/ctype.c at revision 319

~drizzle-trunk/drizzle/development : (revision 319)

/* Copyright (C) 2000 MySQL AB

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

#include <m_string.h>
#include <m_ctype.h>
#include <my_xml.h>


/*

  This files implements routines which parse XML based
  character set and collation description files.
  
  Unicode collations are encoded according to
  
    Unicode Technical Standard #35
    Locale Data Markup Language (LDML)
    http://www.unicode.org/reports/tr35/
  
  and converted into ICU string according to
  
    Collation Customization
    http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
  
*/

static char *mstr(char *str,const char *src,uint l1,uint l2)
{
  l1= l1<l2 ? l1 : l2;
  memcpy(str,src,l1);
  str[l1]='\0';
  return str;
}

struct my_cs_file_section_st
{
  int        state;
  const char *str;
};

#define _CS_MISC	1
#define _CS_ID		2
#define _CS_CSNAME	3
#define _CS_FAMILY	4
#define _CS_ORDER	5
#define _CS_COLNAME	6
#define _CS_FLAG	7
#define _CS_CHARSET	8
#define _CS_COLLATION	9
#define _CS_UPPERMAP	10
#define _CS_LOWERMAP	11
#define _CS_UNIMAP	12
#define _CS_COLLMAP	13
#define _CS_CTYPEMAP	14
#define _CS_PRIMARY_ID	15
#define _CS_BINARY_ID	16
#define _CS_CSDESCRIPT	17
#define _CS_RESET	18
#define	_CS_DIFF1	19
#define	_CS_DIFF2	20
#define	_CS_DIFF3	21


static struct my_cs_file_section_st sec[] =
{
  {_CS_MISC,		"xml"},
  {_CS_MISC,		"xml/version"},
  {_CS_MISC,		"xml/encoding"},
  {_CS_MISC,		"charsets"},
  {_CS_MISC,		"charsets/max-id"},
  {_CS_CHARSET,		"charsets/charset"},
  {_CS_PRIMARY_ID,	"charsets/charset/primary-id"},
  {_CS_BINARY_ID,	"charsets/charset/binary-id"},
  {_CS_CSNAME,		"charsets/charset/name"},
  {_CS_FAMILY,		"charsets/charset/family"},
  {_CS_CSDESCRIPT,	"charsets/charset/description"},
  {_CS_MISC,		"charsets/charset/alias"},
  {_CS_MISC,		"charsets/charset/ctype"},
  {_CS_CTYPEMAP,	"charsets/charset/ctype/map"},
  {_CS_MISC,		"charsets/charset/upper"},
  {_CS_UPPERMAP,	"charsets/charset/upper/map"},
  {_CS_MISC,		"charsets/charset/lower"},
  {_CS_LOWERMAP,	"charsets/charset/lower/map"},
  {_CS_MISC,		"charsets/charset/unicode"},
  {_CS_UNIMAP,		"charsets/charset/unicode/map"},
  {_CS_COLLATION,	"charsets/charset/collation"},
  {_CS_COLNAME,		"charsets/charset/collation/name"},
  {_CS_ID,		"charsets/charset/collation/id"},
  {_CS_ORDER,		"charsets/charset/collation/order"},
  {_CS_FLAG,		"charsets/charset/collation/flag"},
  {_CS_COLLMAP,		"charsets/charset/collation/map"},
  {_CS_RESET,		"charsets/charset/collation/rules/reset"},
  {_CS_DIFF1,		"charsets/charset/collation/rules/p"},
  {_CS_DIFF2,		"charsets/charset/collation/rules/s"},
  {_CS_DIFF3,		"charsets/charset/collation/rules/t"},
  {0,	NULL}
};

static struct my_cs_file_section_st * cs_file_sec(const char *attr, size_t len)
{
  struct my_cs_file_section_st *s;
  for (s=sec; s->str; s++)
  {
    if (!strncmp(attr,s->str,len))
      return s;
  }
  return NULL;
}

#define MY_CS_CSDESCR_SIZE	64
#define MY_CS_TAILORING_SIZE	1024

typedef struct my_cs_file_info
{
  char   csname[MY_CS_NAME_SIZE];
  char   name[MY_CS_NAME_SIZE];
  uchar  ctype[MY_CS_CTYPE_TABLE_SIZE];
  uchar  to_lower[MY_CS_TO_LOWER_TABLE_SIZE];
  uchar  to_upper[MY_CS_TO_UPPER_TABLE_SIZE];
  uchar  sort_order[MY_CS_SORT_ORDER_TABLE_SIZE];
  uint16_t tab_to_uni[MY_CS_TO_UNI_TABLE_SIZE];
  char   comment[MY_CS_CSDESCR_SIZE];
  char   tailoring[MY_CS_TAILORING_SIZE];
  size_t tailoring_length;
  CHARSET_INFO cs;
  int (*add_collation)(CHARSET_INFO *cs);
} MY_CHARSET_LOADER;



static int fill_uchar(uchar *a,uint size,const char *str, uint len)
{
  uint i= 0;
  const char *s, *b, *e=str+len;
  
  for (s=str ; s < e ; i++)
  { 
    for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ;
    b=s;
    for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ;
    if (s == b || i > size)
      break;
    a[i]= (uchar) strtoul(b,NULL,16);
  }
  return 0;
}

static int fill_uint16(uint16_t *a,uint size,const char *str, size_t len)
{
  uint i= 0;
  
  const char *s, *b, *e=str+len;
  for (s=str ; s < e ; i++)
  { 
    for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ;
    b=s;
    for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ;
    if (s == b || i > size)
      break;
    a[i]= (uint16_t) strtol(b,NULL,16);
  }
  return 0;
}


static int cs_enter(MY_XML_PARSER *st,const char *attr, size_t len)
{
  struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
  struct my_cs_file_section_st *s= cs_file_sec(attr,len);
  
  if ( s && (s->state == _CS_CHARSET))
    memset(&i->cs, 0, sizeof(i->cs));
  
  if (s && (s->state == _CS_COLLATION))
    i->tailoring_length= 0;

  return MY_XML_OK;
}


static int cs_leave(MY_XML_PARSER *st,const char *attr, size_t len)
{
  struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
  struct my_cs_file_section_st *s= cs_file_sec(attr,len);
  int    state= s ? s->state : 0;
  int    rc;
  
  switch(state){
  case _CS_COLLATION:
    rc= i->add_collation ? i->add_collation(&i->cs) : MY_XML_OK;
    break;
  default:
    rc=MY_XML_OK;
  }
  return rc;
}


static int cs_value(MY_XML_PARSER *st,const char *attr, size_t len)
{
  struct my_cs_file_info *i= (struct my_cs_file_info *)st->user_data;
  struct my_cs_file_section_st *s;
  int    state= (int)((s=cs_file_sec(st->attr, strlen(st->attr))) ? s->state :
                      0);
  
  switch (state) {
  case _CS_ID:
    i->cs.number= strtol(attr,(char**)NULL,10);
    break;
  case _CS_BINARY_ID:
    i->cs.binary_number= strtol(attr,(char**)NULL,10);
    break;
  case _CS_PRIMARY_ID:
    i->cs.primary_number= strtol(attr,(char**)NULL,10);
    break;
  case _CS_COLNAME:
    i->cs.name=mstr(i->name,attr,len,MY_CS_NAME_SIZE-1);
    break;
  case _CS_CSNAME:
    i->cs.csname=mstr(i->csname,attr,len,MY_CS_NAME_SIZE-1);
    break;
  case _CS_CSDESCRIPT:
    i->cs.comment=mstr(i->comment,attr,len,MY_CS_CSDESCR_SIZE-1);
    break;
  case _CS_FLAG:
    if (!strncmp("primary",attr,len))
      i->cs.state|= MY_CS_PRIMARY;
    else if (!strncmp("binary",attr,len))
      i->cs.state|= MY_CS_BINSORT;
    else if (!strncmp("compiled",attr,len))
      i->cs.state|= MY_CS_COMPILED;
    break;
  case _CS_UPPERMAP:
    fill_uchar(i->to_upper,MY_CS_TO_UPPER_TABLE_SIZE,attr,len);
    i->cs.to_upper=i->to_upper;
    break;
  case _CS_LOWERMAP:
    fill_uchar(i->to_lower,MY_CS_TO_LOWER_TABLE_SIZE,attr,len);
    i->cs.to_lower=i->to_lower;
    break;
  case _CS_UNIMAP:
    fill_uint16(i->tab_to_uni,MY_CS_TO_UNI_TABLE_SIZE,attr,len);
    i->cs.tab_to_uni=i->tab_to_uni;
    break;
  case _CS_COLLMAP:
    fill_uchar(i->sort_order,MY_CS_SORT_ORDER_TABLE_SIZE,attr,len);
    i->cs.sort_order=i->sort_order;
    break;
  case _CS_CTYPEMAP:
    fill_uchar(i->ctype,MY_CS_CTYPE_TABLE_SIZE,attr,len);
    i->cs.ctype=i->ctype;
    break;
  case _CS_RESET:
  case _CS_DIFF1:
  case _CS_DIFF2:
  case _CS_DIFF3:
    {
      /*
        Convert collation description from
        Locale Data Markup Language (LDML)
        into ICU Collation Customization expression.
      */
      char arg[16];
      const char *cmd[]= {"&","<","<<","<<<"};
      i->cs.tailoring= i->tailoring;
      mstr(arg,attr,len,sizeof(arg)-1);
      if (i->tailoring_length + 20 < sizeof(i->tailoring))
      {
        char *dst= i->tailoring_length + i->tailoring;
        i->tailoring_length+= sprintf(dst," %s %s",cmd[state-_CS_RESET],arg);
      }
    }
  }
  return MY_XML_OK;
}


bool my_parse_charset_xml(const char *buf, size_t len,
                             int (*add_collation)(CHARSET_INFO *cs))
{
  MY_XML_PARSER p;
  struct my_cs_file_info i;
  bool rc;
  
  my_xml_parser_create(&p);
  my_xml_set_enter_handler(&p,cs_enter);
  my_xml_set_value_handler(&p,cs_value);
  my_xml_set_leave_handler(&p,cs_leave);
  i.add_collation= add_collation;
  my_xml_set_user_data(&p,(void*)&i);
  rc= (my_xml_parse(&p,buf,len) == MY_XML_OK) ? false : true;
  my_xml_parser_free(&p);
  return rc;
}


/*
  Check repertoire: detect pure ascii strings
*/
uint
my_string_repertoire(const CHARSET_INFO * const cs, const char *str, ulong length)
{
  const char *strend= str + length;
  if (cs->mbminlen == 1)
  {
    for ( ; str < strend; str++)
    {
      if (((uchar) *str) > 0x7F)
        return MY_REPERTOIRE_UNICODE30;
    }
  }
  else
  {
    my_wc_t wc;
    int chlen;
    for (; (chlen= cs->cset->mb_wc(cs, &wc, (uchar *)str, (uchar *)strend)) > 0; str+= chlen)
    {
      if (wc > 0x7F)
        return MY_REPERTOIRE_UNICODE30;
    }
  }
  return MY_REPERTOIRE_ASCII;
}


/*
  Detect whether a character set is ASCII compatible.

  Returns true for:
  
  - all 8bit character sets whose Unicode mapping of 0x7B is '{'
    (ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
  
  - all multi-byte character sets having mbminlen == 1
    (ignores ucs2 whose mbminlen is 2)
  
  TODO:
  
  When merging to 5.2, this function should be changed
  to check a new flag MY_CS_NONASCII, 
  
     return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
  
  This flag was previously added into 5.2 under terms
  of WL#3759 "Optimize identifier conversion in client-server protocol"
  especially to mark character sets not compatible with ASCII.
  
  We won't backport this flag to 5.0 or 5.1.
  This function is Ok for 5.0 and 5.1, because we're not going
  to introduce new tricky character sets between 5.0 and 5.2.
*/
bool
my_charset_is_ascii_based(const CHARSET_INFO * const cs)
{
  return 
    (cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') ||
    (cs->mbminlen == 1 && cs->mbmaxlen > 1);
}


/*
  Detect if a character set is 8bit,
  and it is pure ascii, i.e. doesn't have
  characters outside U+0000..U+007F
  This functions is shared between "conf_to_src"
  and dynamic charsets loader in "mysqld".
*/
bool
my_charset_is_8bit_pure_ascii(const CHARSET_INFO * const cs)
{
  size_t code;
  if (!cs->tab_to_uni)
    return 0;
  for (code= 0; code < 256; code++)
  {
    if (cs->tab_to_uni[code] > 0x7F)
      return 0;
  }
  return 1;
}


/*
  Shared function between conf_to_src and mysys.
  Check if a 8bit character set is compatible with
  ascii on the range 0x00..0x7F.
*/
bool
my_charset_is_ascii_compatible(const CHARSET_INFO * const cs)
{
  uint i;
  if (!cs->tab_to_uni)
    return 1;
  for (i= 0; i < 128; i++)
  {
    if (cs->tab_to_uni[i] != i)
      return 0;
  }
  return 1;
}

1 by brian clean slate	1	/* Copyright (C) 2000 MySQL AB
	2
	3	This program is free software; you can redistribute it and/or modify
	4	it under the terms of the GNU General Public License as published by
	5	the Free Software Foundation; version 2 of the License.
	6
	7	This program is distributed in the hope that it will be useful,
	8	but WITHOUT ANY WARRANTY; without even the implied warranty of
	9	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	10	GNU General Public License for more details.
	11
	12	You should have received a copy of the GNU General Public License
	13	along with this program; if not, write to the Free Software
	14	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
	15
212.5.39 by Monty Taylor Phew. Moved my_base and my_global.	16	#include <m_string.h>
1 by brian clean slate	17	#include <m_ctype.h>
	18	#include <my_xml.h>
	19
	20
	21	/*
	22
	23	This files implements routines which parse XML based
	24	character set and collation description files.
	25
	26	Unicode collations are encoded according to
	27
	28	Unicode Technical Standard #35
	29	Locale Data Markup Language (LDML)
	30	http://www.unicode.org/reports/tr35/
	31
	32	and converted into ICU string according to
	33
	34	Collation Customization
	35	http://oss.software.ibm.com/icu/userguide/Collate_Customization.html
	36
	37	*/
	38
	39	static char mstr(char str,const char *src,uint l1,uint l2)
	40	{
	41	l1= l1<l2 ? l1 : l2;
	42	memcpy(str,src,l1);
	43	str[l1]='\0';
	44	return str;
	45	}
	46
	47	struct my_cs_file_section_st
	48	{
	49	int state;
	50	const char *str;
	51	};
	52
	53	#define _CS_MISC 1
	54	#define _CS_ID 2
	55	#define _CS_CSNAME 3
	56	#define _CS_FAMILY 4
	57	#define _CS_ORDER 5
	58	#define _CS_COLNAME 6
	59	#define _CS_FLAG 7
	60	#define _CS_CHARSET 8
	61	#define _CS_COLLATION 9
	62	#define _CS_UPPERMAP 10
	63	#define _CS_LOWERMAP 11
	64	#define _CS_UNIMAP 12
	65	#define _CS_COLLMAP 13
	66	#define _CS_CTYPEMAP 14
	67	#define _CS_PRIMARY_ID 15
	68	#define _CS_BINARY_ID 16
	69	#define _CS_CSDESCRIPT 17
	70	#define _CS_RESET 18
	71	#define _CS_DIFF1 19
	72	#define _CS_DIFF2 20
	73	#define _CS_DIFF3 21
	74
	75
	76	static struct my_cs_file_section_st sec[] =
	77	{
	78	{_CS_MISC, "xml"},
	79	{_CS_MISC, "xml/version"},
	80	{_CS_MISC, "xml/encoding"},
81	{_CS_MISC, "charsets"},
82	{_CS_MISC, "charsets/max-id"},
83	{_CS_CHARSET, "charsets/charset"},
84	{_CS_PRIMARY_ID, "charsets/charset/primary-id"},
85	{_CS_BINARY_ID, "charsets/charset/binary-id"},
86	{_CS_CSNAME, "charsets/charset/name"},
87	{_CS_FAMILY, "charsets/charset/family"},
88	{_CS_CSDESCRIPT, "charsets/charset/description"},
89	{_CS_MISC, "charsets/charset/alias"},
90	{_CS_MISC, "charsets/charset/ctype"},
91	{_CS_CTYPEMAP, "charsets/charset/ctype/map"},
92	{_CS_MISC, "charsets/charset/upper"},
93	{_CS_UPPERMAP, "charsets/charset/upper/map"},
94	{_CS_MISC, "charsets/charset/lower"},
95	{_CS_LOWERMAP, "charsets/charset/lower/map"},
96	{_CS_MISC, "charsets/charset/unicode"},
97	{_CS_UNIMAP, "charsets/charset/unicode/map"},
98	{_CS_COLLATION, "charsets/charset/collation"},
99	{_CS_COLNAME, "charsets/charset/collation/name"},
100	{_CS_ID, "charsets/charset/collation/id"},
101	{_CS_ORDER, "charsets/charset/collation/order"},
102	{_CS_FLAG, "charsets/charset/collation/flag"},
103	{_CS_COLLMAP, "charsets/charset/collation/map"},
104	{_CS_RESET, "charsets/charset/collation/rules/reset"},
105	{_CS_DIFF1, "charsets/charset/collation/rules/p"},
106	{_CS_DIFF2, "charsets/charset/collation/rules/s"},
107	{_CS_DIFF3, "charsets/charset/collation/rules/t"},
108	{0, NULL}
109	};
110
111	static struct my_cs_file_section_st * cs_file_sec(const char *attr, size_t len)
112	{
113	struct my_cs_file_section_st *s;
114	for (s=sec; s->str; s++)
115	{
116	if (!strncmp(attr,s->str,len))
117	return s;
118	}
119	return NULL;
120	}
121
122	#define MY_CS_CSDESCR_SIZE 64
123	#define MY_CS_TAILORING_SIZE 1024
124
125	typedef struct my_cs_file_info
126	{
127	char csname[MY_CS_NAME_SIZE];
128	char name[MY_CS_NAME_SIZE];
129	uchar ctype[MY_CS_CTYPE_TABLE_SIZE];
130	uchar to_lower[MY_CS_TO_LOWER_TABLE_SIZE];
131	uchar to_upper[MY_CS_TO_UPPER_TABLE_SIZE];
132	uchar sort_order[MY_CS_SORT_ORDER_TABLE_SIZE];
206 by Brian Aker Removed final uint dead types.	133	uint16_t tab_to_uni[MY_CS_TO_UNI_TABLE_SIZE];
1 by brian clean slate	134	char comment[MY_CS_CSDESCR_SIZE];
	135	char tailoring[MY_CS_TAILORING_SIZE];
	136	size_t tailoring_length;
	137	CHARSET_INFO cs;
	138	int (add_collation)(CHARSET_INFO cs);
	139	} MY_CHARSET_LOADER;
	140
	141
	142
	143	static int fill_uchar(uchar a,uint size,const char str, uint len)
	144	{
	145	uint i= 0;
	146	const char s, b, *e=str+len;
	147
	148	for (s=str ; s < e ; i++)
	149	{
	150	for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ;
	151	b=s;
	152	for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ;
	153	if (s == b \|\| i > size)
	154	break;
	155	a[i]= (uchar) strtoul(b,NULL,16);
	156	}
	157	return 0;
	158	}
	159
206 by Brian Aker Removed final uint dead types.	160	static int fill_uint16(uint16_t a,uint size,const char str, size_t len)
1 by brian clean slate	161	{
	162	uint i= 0;
	163
	164	const char s, b, *e=str+len;
	165	for (s=str ; s < e ; i++)
	166	{
	167	for ( ; (s < e) && strchr(" \t\r\n",s[0]); s++) ;
	168	b=s;
	169	for ( ; (s < e) && !strchr(" \t\r\n",s[0]); s++) ;
	170	if (s == b \|\| i > size)
	171	break;
206 by Brian Aker Removed final uint dead types.	172	a[i]= (uint16_t) strtol(b,NULL,16);
1 by brian clean slate	173	}
	174	return 0;
	175	}
	176
	177
	178	static int cs_enter(MY_XML_PARSER st,const char attr, size_t len)
	179	{
	180	struct my_cs_file_info i= (struct my_cs_file_info )st->user_data;
	181	struct my_cs_file_section_st *s= cs_file_sec(attr,len);
	182
	183	if ( s && (s->state == _CS_CHARSET))
212.6.1 by Mats Kindahl Replacing all bzero() calls with memset() calls and removing the bzero.c file.	184	memset(&i->cs, 0, sizeof(i->cs));
1 by brian clean slate	185
	186	if (s && (s->state == _CS_COLLATION))
	187	i->tailoring_length= 0;
	188
	189	return MY_XML_OK;
	190	}
	191
	192
	193	static int cs_leave(MY_XML_PARSER st,const char attr, size_t len)
	194	{
	195	struct my_cs_file_info i= (struct my_cs_file_info )st->user_data;
	196	struct my_cs_file_section_st *s= cs_file_sec(attr,len);
	197	int state= s ? s->state : 0;
	198	int rc;
	199
	200	switch(state){
	201	case _CS_COLLATION:
	202	rc= i->add_collation ? i->add_collation(&i->cs) : MY_XML_OK;
	203	break;
	204	default:
	205	rc=MY_XML_OK;
	206	}
	207	return rc;
	208	}
	209
	210
	211	static int cs_value(MY_XML_PARSER st,const char attr, size_t len)
	212	{
	213	struct my_cs_file_info i= (struct my_cs_file_info )st->user_data;
	214	struct my_cs_file_section_st *s;
	215	int state= (int)((s=cs_file_sec(st->attr, strlen(st->attr))) ? s->state :
	216	0);
	217
	218	switch (state) {
	219	case _CS_ID:
	220	i->cs.number= strtol(attr,(char**)NULL,10);
	221	break;
	222	case _CS_BINARY_ID:
	223	i->cs.binary_number= strtol(attr,(char**)NULL,10);
	224	break;
	225	case _CS_PRIMARY_ID:
	226	i->cs.primary_number= strtol(attr,(char**)NULL,10);
	227	break;
	228	case _CS_COLNAME:
	229	i->cs.name=mstr(i->name,attr,len,MY_CS_NAME_SIZE-1);
	230	break;
	231	case _CS_CSNAME:
	232	i->cs.csname=mstr(i->csname,attr,len,MY_CS_NAME_SIZE-1);
	233	break;
	234	case _CS_CSDESCRIPT:
	235	i->cs.comment=mstr(i->comment,attr,len,MY_CS_CSDESCR_SIZE-1);
	236	break;
	237	case _CS_FLAG:
	238	if (!strncmp("primary",attr,len))
	239	i->cs.state\|= MY_CS_PRIMARY;
	240	else if (!strncmp("binary",attr,len))
	241	i->cs.state\|= MY_CS_BINSORT;
	242	else if (!strncmp("compiled",attr,len))
	243	i->cs.state\|= MY_CS_COMPILED;
	244	break;
	245	case _CS_UPPERMAP:
	246	fill_uchar(i->to_upper,MY_CS_TO_UPPER_TABLE_SIZE,attr,len);
	247	i->cs.to_upper=i->to_upper;
	248	break;
249	case _CS_LOWERMAP:
250	fill_uchar(i->to_lower,MY_CS_TO_LOWER_TABLE_SIZE,attr,len);
251	i->cs.to_lower=i->to_lower;
252	break;
253	case _CS_UNIMAP:
254	fill_uint16(i->tab_to_uni,MY_CS_TO_UNI_TABLE_SIZE,attr,len);
255	i->cs.tab_to_uni=i->tab_to_uni;
256	break;
257	case _CS_COLLMAP:
258	fill_uchar(i->sort_order,MY_CS_SORT_ORDER_TABLE_SIZE,attr,len);
259	i->cs.sort_order=i->sort_order;
260	break;
261	case _CS_CTYPEMAP:
262	fill_uchar(i->ctype,MY_CS_CTYPE_TABLE_SIZE,attr,len);
263	i->cs.ctype=i->ctype;
264	break;
265	case _CS_RESET:
266	case _CS_DIFF1:
267	case _CS_DIFF2:
268	case _CS_DIFF3:
269	{
270	/*
271	Convert collation description from
272	Locale Data Markup Language (LDML)
273	into ICU Collation Customization expression.
274	*/
275	char arg[16];
276	const char *cmd[]= {"&","<","<<","<<<"};
277	i->cs.tailoring= i->tailoring;
278	mstr(arg,attr,len,sizeof(arg)-1);
279	if (i->tailoring_length + 20 < sizeof(i->tailoring))
280	{
281	char *dst= i->tailoring_length + i->tailoring;
282	i->tailoring_length+= sprintf(dst," %s %s",cmd[state-_CS_RESET],arg);
283	}
284	}
285	}
286	return MY_XML_OK;
287	}
288
289
276 by Brian Aker Cleaned out my_bool from strings.	290	bool my_parse_charset_xml(const char *buf, size_t len,
1 by brian clean slate	291	int (add_collation)(CHARSET_INFO cs))
	292	{
	293	MY_XML_PARSER p;
	294	struct my_cs_file_info i;
276 by Brian Aker Cleaned out my_bool from strings.	295	bool rc;
1 by brian clean slate	296
	297	my_xml_parser_create(&p);
	298	my_xml_set_enter_handler(&p,cs_enter);
	299	my_xml_set_value_handler(&p,cs_value);
	300	my_xml_set_leave_handler(&p,cs_leave);
	301	i.add_collation= add_collation;
	302	my_xml_set_user_data(&p,(void*)&i);
163 by Brian Aker Merge Monty's code.	303	rc= (my_xml_parse(&p,buf,len) == MY_XML_OK) ? false : true;
1 by brian clean slate	304	my_xml_parser_free(&p);
	305	return rc;
	306	}
	307
	308
	309	/*
	310	Check repertoire: detect pure ascii strings
	311	*/
	312	uint
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	313	my_string_repertoire(const CHARSET_INFO * const cs, const char *str, ulong length)
1 by brian clean slate	314	{
	315	const char *strend= str + length;
	316	if (cs->mbminlen == 1)
	317	{
	318	for ( ; str < strend; str++)
	319	{
	320	if (((uchar) *str) > 0x7F)
	321	return MY_REPERTOIRE_UNICODE30;
	322	}
	323	}
	324	else
	325	{
	326	my_wc_t wc;
	327	int chlen;
53.2.11 by Monty Taylor Added cast for a type-signedness problem.	328	for (; (chlen= cs->cset->mb_wc(cs, &wc, (uchar )str, (uchar )strend)) > 0; str+= chlen)
1 by brian clean slate	329	{
	330	if (wc > 0x7F)
	331	return MY_REPERTOIRE_UNICODE30;
	332	}
	333	}
	334	return MY_REPERTOIRE_ASCII;
	335	}
	336
	337
	338	/*
	339	Detect whether a character set is ASCII compatible.
	340
163 by Brian Aker Merge Monty's code.	341	Returns true for:
1 by brian clean slate	342
	343	- all 8bit character sets whose Unicode mapping of 0x7B is '{'
	344	(ignores swe7 which maps 0x7B to "LATIN LETTER A WITH DIAERESIS")
	345
	346	- all multi-byte character sets having mbminlen == 1
	347	(ignores ucs2 whose mbminlen is 2)
	348
	349	TODO:
	350
	351	When merging to 5.2, this function should be changed
	352	to check a new flag MY_CS_NONASCII,
	353
	354	return (cs->flag & MY_CS_NONASCII) ? 0 : 1;
	355
	356	This flag was previously added into 5.2 under terms
	357	of WL#3759 "Optimize identifier conversion in client-server protocol"
	358	especially to mark character sets not compatible with ASCII.
	359
	360	We won't backport this flag to 5.0 or 5.1.
	361	This function is Ok for 5.0 and 5.1, because we're not going
	362	to introduce new tricky character sets between 5.0 and 5.2.
	363	*/
276 by Brian Aker Cleaned out my_bool from strings.	364	bool
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	365	my_charset_is_ascii_based(const CHARSET_INFO * const cs)
1 by brian clean slate	366	{
	367	return
	368	(cs->mbmaxlen == 1 && cs->tab_to_uni && cs->tab_to_uni['{'] == '{') \|\|
	369	(cs->mbminlen == 1 && cs->mbmaxlen > 1);
	370	}
	371
	372
	373	/*
	374	Detect if a character set is 8bit,
	375	and it is pure ascii, i.e. doesn't have
	376	characters outside U+0000..U+007F
	377	This functions is shared between "conf_to_src"
	378	and dynamic charsets loader in "mysqld".
	379	*/
276 by Brian Aker Cleaned out my_bool from strings.	380	bool
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	381	my_charset_is_8bit_pure_ascii(const CHARSET_INFO * const cs)
1 by brian clean slate	382	{
	383	size_t code;
	384	if (!cs->tab_to_uni)
	385	return 0;
	386	for (code= 0; code < 256; code++)
	387	{
	388	if (cs->tab_to_uni[code] > 0x7F)
	389	return 0;
	390	}
	391	return 1;
	392	}
	393
	394
	395	/*
	396	Shared function between conf_to_src and mysys.
	397	Check if a 8bit character set is compatible with
	398	ascii on the range 0x00..0x7F.
	399	*/
276 by Brian Aker Cleaned out my_bool from strings.	400	bool
264.2.6 by Andrey Hristov Constify the usage of CHARSET_INFO almost to the last place in the code.	401	my_charset_is_ascii_compatible(const CHARSET_INFO * const cs)
1 by brian clean slate	402	{
	403	uint i;
	404	if (!cs->tab_to_uni)
	405	return 1;
	406	for (i= 0; i < 128; i++)
	407	{
	408	if (cs->tab_to_uni[i] != i)
	409	return 0;
	410	}
	411	return 1;
	412	}