~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/*
17
        File strings/ctype-czech.c for MySQL.
18
19
	This file implements the Czech sorting for the MySQL database
20
	server (www.mysql.com). Due to some complicated rules the
21
	Czech language has for sorting strings, a more complex
22
	solution was needed than the one-to-one conversion table. To
23
	note a few, here is an example of a Czech sorting sequence:
24
25
		co < hlaska < hláska < hlava < chlapec < krtek
26
27
	It because some of the rules are: double char 'ch' is sorted
28
	between 'h' and 'i'. Accented character 'á' (a with acute) is
29
	sorted after 'a' and before 'b', but only if the word is
30
	otherwise the same. However, because 's' is sorted before 'v'
31
	in hlava, the accentness of 'á' is overridden. There are many
32
	more rules.
33
34
	This file defines functions my_strxfrm and my_strcoll for
35
	C-like zero terminated strings and my_strnxfrm and my_strnncoll
36
	for strings where the length comes as an parameter. Also
37
	defined here you will find function my_like_range that returns
38
	index range strings for LIKE expression and the
39
	MY_STRXFRM_MULTIPLY set to value 4 -- this is the ratio the
40
	strings grows during my_strxfrm. The algorithm has four
41
	passes, that's why we need four times more space for expanded
42
	string.
43
44
	This file also contains the ISO-Latin-2 definitions of
45
	characters.
46
47
	Author: (c) 1997--1998 Jan Pazdziora, adelton@fi.muni.cz
48
	Jan Pazdziora has a shared copyright for this code
49
50
	The original of this file can also be found at
51
	http://www.fi.muni.cz/~adelton/l10n/
52
53
	Bug reports and suggestions are always welcome.
54
*/
55
56
/*
57
 * This comment is parsed by configure to create ctype.c,
58
 * so don't change it unless you know what you are doing.
59
 *
60
 * .configure. strxfrm_multiply_czech=4
61
 */
62
63
#include "m_string.h"
64
#include "m_ctype.h"
65
66
#ifdef HAVE_CHARSET_latin2
67
68
/*
69
	These are four tables for four passes of the algorithm. Please see
70
	below for what are the "special values"
71
*/
72
73
static const uchar *CZ_SORT_TABLE[]=
74
{
75
  (const uchar*)
76
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x41\x42\x43\x44\x45\x00\x00"
77
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
78
  "\x47\x58\x5C\x6A\x77\x6B\x69\x5B\x5E\x5F\x66\x6E\x55\x54\x5A\x67"
79
  "\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x57\x56\x71\x72\x73\x59"
80
  "\x65\x82\x83\xFF\x86\x87\x88\x89\x8A\x8C\x8D\x8E\x8F\x90\x91\x92"
81
  "\x94\x95\x96\x98\x9A\x9B\x9D\x9E\x9F\xA0\xA1\x60\x68\x61\x4B\x52"
82
  "\x49\x82\x83\xFF\x86\x87\x88\x89\x8A\x8C\x8D\x8E\x8F\x90\x91\x92"
83
  "\x94\x95\x96\x98\x9A\x9B\x9D\x9E\x9F\xA0\xA1\x62\x74\x63\x75\x00"
84
  "\x00\x00\x00\x00\x00\x46\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
85
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
86
  "\x48\x82\x4C\x8F\x76\x8F\x98\x64\x4E\x99\x98\x9A\xA1\x53\xA2\xA1"
87
  "\x6D\x82\x51\x8F\x4A\x8F\x98\x6C\x50\x99\x98\x9A\xA1\x4F\xA2\xA1"
88
  "\x96\x82\x82\x82\x82\x8F\x84\x84\x85\x87\x87\x87\x87\x8C\x8C\x86"
89
  "\x86\x91\x91\x92\x92\x92\x92\x70\x97\x9B\x9B\x9B\x9B\xA0\x9A\x98"
90
  "\x96\x82\x82\x82\x82\x8F\x84\x84\x85\x87\x87\x87\x87\x8C\x8C\x86"
91
  "\x86\x91\x91\x92\x92\x92\x92\x6F\x97\x9B\x9B\x9B\x9B\xA0\x9A\x4D",
92
93
  (const uchar*)
94
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x20\x20\x20\x20\x00\x00"
95
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
96
  "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20"
97
  "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20"
98
  "\x20\x20\x20\xFF\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20"
99
  "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20"
100
  "\x20\x20\x20\xFF\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20"
101
  "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x00"
102
  "\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
103
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
104
  "\x20\x2B\x20\x2C\x20\x25\x22\x20\x20\x25\x2A\x25\x22\x20\x25\x29"
105
  "\x20\x2B\x20\x2C\x20\x25\x22\x20\x20\x25\x2A\x25\x22\x20\x25\x29"
106
  "\x22\x22\x24\x23\x27\x22\x22\x2A\x25\x22\x2B\x47\x25\x22\x24\x25"
107
  "\x2C\x22\x25\x22\x24\x28\x27\x20\x25\x26\x22\x28\x27\x22\x2A\x21"
108
  "\x22\x22\x24\x23\x27\x22\x22\x2A\x25\x22\x2B\x47\x25\x22\x24\x25"
109
  "\x2C\x22\x25\x22\x24\x28\x27\x20\x25\x26\x22\x28\x27\x22\x2A\x20",
110
111
112
  (const uchar*)
113
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x03\x03\x03\x03\x00\x00"
114
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
115
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03"
116
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03"
117
  "\x03\x05\x05\xFF\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05"
118
  "\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x03\x03\x03\x03\x03"
119
  "\x03\x03\x03\xFF\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03"
120
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x00"
121
  "\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
122
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
123
  "\x1B\x05\x03\x05\x03\x05\x05\x03\x03\x05\x05\x05\x05\x03\x05\x05"
124
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03"
125
  "\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05"
126
  "\x05\x05\x05\x05\x05\x05\x05\x03\x05\x05\x05\x05\x05\x05\x05\x03"
127
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03"
128
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03",
129
130
  (const uchar*)
131
  "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
132
  "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
133
  "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
134
  "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
135
  "\x40\x41\x42\xFF\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F"
136
  "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F"
137
  "\x60\x61\x62\xFF\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
138
  "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"
139
  "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
140
  "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
141
  "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
142
  "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
143
  "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
144
  "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
145
  "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
146
  "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"
147
};
148
149
/*
150
  These define the values for the double chars that need to be
151
  sorted as they were single characters -- in Czech these are
152
  'ch', 'Ch' and 'CH'.
153
*/
154
155
struct wordvalue
156
{
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
157
  const char *word;
1 by brian
clean slate
158
  const uchar *outvalue;
159
};
160
161
static struct wordvalue doubles[]=
162
{
163
  { "ch", (const uchar*) "\x8B\x20\x03\x63" },
164
  { "Ch", (const uchar*) "\x8B\x20\x04\x43" },
165
  { "CH", (const uchar*) "\x8B\x20\x05\x43" },
166
  { "c",  (const uchar*) "\x84\x20\x03\x63" },
167
  { "C",  (const uchar*) "\x84\x20\x05\x43" },
168
};
169
170
171
/*
172
  Define "auto" space character,
173
  which is used while processing "PAD SPACE" rule,
174
  when one string is shorter than another string.
175
  "Auto" space character is lower than a real space
176
  character on the third level.
177
*/
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
178
static const uchar *virtual_space= (const uchar*)"\x47\x20\x02\x20";
1 by brian
clean slate
179
180
/*
181
        Original comments from the contributor:
182
        
183
	Informal description of the algorithm:
184
185
	We walk the string left to right.
186
187
	The end of the string is either passed as parameter, or is
188
	*p == 0. This is hidden in the IS_END macro.
189
190
	In the first two passes, we compare word by word. So we make
191
	first and second pass on the first word, first and second pass
192
	on the second word, etc. If we come to the end of the string
193
	during the first pass, we need to jump to the last word of the
194
	second pass.
195
196
	End of pass is marked with value 1 on the output.
197
198
	For each character, we read it's value from the table.
199
200
	If the value is ignore (0), we go straight to the next character.
201
202
	If the value is space/end of word (2) and we are in the first
203
	or second pass, we skip all characters having value 0 -- 2 and
204
	switch the pass.
205
206
	If it's the compose character (255), we check if the double
207
	exists behind it, find its value.
208
209
	We append 0 to the end.
210
211
	Neformální popis algoritmu:
212
213
	procházíme øetìzec zleva doprava
214
	konec øetìzce poznáme podle *p == 0
215
	pokud jsme do¹li na konec øetìzce pøi prùchodu 0, nejdeme na
216
		zaèátek, ale na uloŸenou pozici, protoŸe první a druhý
217
		prùchod bìŸí souèasnì
218
	konec vstupu (prùchodu) oznaèíme na výstupu hodnotou 1
219
220
	naèteme hodnotu z tøídící tabulky
221
	jde-li o hodnotu ignorovat (0), skoèíme na dal¹í prùchod
222
	jde-li o hodnotu konec slova (2) a je to prùchod 0 nebo 1,
223
		pøeskoèíme v¹echny dal¹í 0 -- 2 a prohodíme
224
		prùchody
225
	jde-li o kompozitní znak (255), otestujeme, zda následuje
226
		správný do dvojice, dohledáme správnou hodnotu
227
228
	na konci pøipojíme znak 0
229
*/
230
231
/*
232
  In March 2007 latin2_czech_cs was reworked by Alexander Barkov,
233
  to suite other MySQL collations better, and to be Falcon compatible.
234
  
235
  Changes:
236
  - Discarded word-by-word comparison on the primary and the secondary level.
237
    Comparison is now strictly done level-by-level
238
    (like the Unicode Collation Algorithm (UCA) does).
239
    
240
  - Character weights were derived from Unicode 5.0.0 standard.
241
    This is to make order of punctuation characters and digits
242
    more consistent with all other MySQL collations and UCA.
243
    
244
    The order is now:
245
    
246
      Controls, spaces, punctuations, digits, letters.
247
    
248
    It previously used to be:
249
    
250
      Punctuations, controls, some more punctuations, letters, digits.
251
    
252
    NOTE:
253
    
254
    A minor difference between this implementations and the UCA:
255
    
256
    German "LATIN SMALL LETTER SHARP S" does not expand to "ss".
257
    It is instead considered as secondary greater than "LATIN LETTER S",
258
    and thus sorted between "LATIN LETTER S" and "LATIN LETTER S WITH ACUTE".
259
    This allows to reduce *twice* disk space required for un-indexed
260
    ORDER BY (using the filesort method).
261
    
262
    As neither the original version of latin2_czech_cs 
263
    expanded "SHARP S" to "ss", nor "SHARP S" is a part of Czech alphabet,
264
    this behavior should be ok.
265
    
266
  - Collation is now "PAD SPACE" like all other MySQL collations.
267
    It ignores trailing spaces on primary and secondary level.
268
    
269
  - SPACE and TAB characters are not ignorable anymore.
270
    Also, they have different weights on primary level,
271
    like in all other MySQL collations:
272
    
273
    SELECT 'a\t' < 'a ' -- returns true
274
    SELECT 'a\t' < 'a'  -- returns true
275
    
276
  - Some other punctuation characters are not ignorable anymore,
277
    for better compatibility with UCA and other MySQL collations.
278
279
*/
280
281
282
#define ADD_TO_RESULT(dest, len, totlen, value)			\
283
if ((totlen) < (len)) { dest[totlen] = value; } (totlen++);
284
#define IS_END(p, src, len)	(((char *)p - (char *)src) >= (len))
285
286
/*
287
  ml - a flag indicating whether automatically
288
       switch to the secondary level,
289
       or stop on the primary level
290
*/
291
                
292
#define NEXT_CMP_VALUE(src, p, pass, value, len, ml)	\
293
while (1)						\
294
{							\
295
  if (IS_END(p, src, len))				\
296
  {							\
297
    /* when we are at the end of string */		\
298
    /* return either 0 for end of string */		\
299
   /* or 1 for end of pass */				\
300
   value= pass < 3 && ml ? 1 : -1;			\
301
   if (pass != 3 && ml && len > 0)			\
302
   {							\
303
     p= src;						\
304
     pass++;						\
305
   }							\
306
   break;						\
307
  }							\
308
  /* not at end of string */				\
309
  value = CZ_SORT_TABLE[pass][*p];			\
310
  if (value == 0 && pass < 3)				\
311
  { p++; continue; } /* ignore value on levels 0,1,2 */	\
312
  if (value == 255)					\
313
  {							\
314
    int i;						\
315
    for (i= 0; i < (int) array_elements(doubles); i++)  \
316
    {							\
317
      const char * pattern = doubles[i].word;		\
318
      const char * q = (const char *) p;		\
319
      int j = 0;					\
320
      while (pattern[j])				\
321
      {							\
322
	if (IS_END(q, src, len) || (*q != pattern[j]))	\
323
	 break;						\
324
	j++; q++;					\
325
      }							\
326
      if (!(pattern[j]))				\
327
      {							\
328
	value = (int)(doubles[i].outvalue[pass]);	\
329
	p= (const uchar *) q - 1;			\
330
	break;						\
331
      }							\
332
    }							\
333
  }							\
334
  p++;							\
335
  break;						\
336
}
337
338
/*
339
  Function strnncoll, actually strcoll, with Czech sorting, which expect
340
  the length of the strings being specified
341
*/
342
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
343
static int my_strnncoll_czech(const CHARSET_INFO *cs __attribute__((unused)),
1 by brian
clean slate
344
			      const uchar *s1, size_t len1, 
345
			      const uchar *s2, size_t len2,
346
                              my_bool s2_is_prefix)
347
{
348
  int v1, v2;
349
  const uchar * p1, * p2;
350
  int pass1= 0, pass2= 0;
351
352
  if (s2_is_prefix && len1 > len2)
353
    len1=len2;
354
355
  p1= s1;
356
  p2= s2;
357
358
  do
359
  {
360
    int diff;
361
    NEXT_CMP_VALUE(s1, p1, pass1, v1, (int)len1, 1);
362
    NEXT_CMP_VALUE(s2, p2, pass2, v2, (int)len2, 1);
363
    if ((diff = v1 - v2))
364
      return diff;
365
  }
366
  while (v1);
367
  return 0;
368
}
369
370
371
372
/*
373
  Compare strings, ignore trailing spaces
374
*/
375
376
static int
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
377
my_strnncollsp_czech(const CHARSET_INFO * cs __attribute__((unused)),
1 by brian
clean slate
378
                     const uchar *s, size_t slen,
379
                     const uchar *t, size_t tlen,
380
                     my_bool diff_if_only_endspace_difference
381
                     __attribute__((unused)))
382
{
383
  int level;
384
385
  for (level= 0; level <= 3; level++)
386
  {
387
    const uchar *s1= s;
388
    const uchar *t1= t;
389
390
    for (;;)
391
    {
392
      int sval, tval, diff;
393
      NEXT_CMP_VALUE(s, s1, level, sval, (int) slen, 0);
394
      NEXT_CMP_VALUE(t, t1, level, tval, (int) tlen, 0);
395
      if (sval < 0)
396
      {
397
        sval= virtual_space[level];
398
        for (; tval >= 0 ;)
399
        {
400
          if ((diff= sval - tval))
401
            return diff;
402
          NEXT_CMP_VALUE(t, t1, level, tval, (int) tlen, 0);
403
        }
404
        break;
405
      }
406
      else if (tval < 0)
407
      {
408
        tval= virtual_space[level];
409
        for (; sval >= 0 ;)
410
        {
411
          if ((diff= sval - tval))
412
            return diff;
413
          NEXT_CMP_VALUE(s, s1, level, sval, (int) slen, 0);
414
        }
415
        break;
416
      }
417
418
      if ((diff= sval - tval))
419
        return diff;
420
    }
421
  }
422
  return 0;
423
}
424
425
426
/*
427
  Returns the number of bytes required for strnxfrm().
428
*/
429
static size_t
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
430
my_strnxfrmlen_czech(const CHARSET_INFO *cs __attribute__((unused)), size_t len)
1 by brian
clean slate
431
{
432
  return len * 4 + 4;
433
}
434
435
436
/*
437
  Function strnxfrm, actually strxfrm, with Czech sorting, which expect
438
  the length of the strings being specified
439
*/
440
static size_t
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
441
my_strnxfrm_czech(const CHARSET_INFO * cs  __attribute__((unused)),
1 by brian
clean slate
442
                  uchar *dst, size_t dstlen, uint nweights_arg,
443
                  const uchar *src, size_t srclen, uint flags)
444
{
445
  uint level;
446
  uchar *dst0= dst;
447
  uchar *de= dst + dstlen;
448
449
  if (!(flags & 0x0F)) /* All levels by default */
450
    flags|= 0x0F;
451
452
  for (level= 0; level <= 3; level++)
453
  {
454
    if (flags & (1 << level))
455
    {
456
      uint nweights= nweights_arg;
457
      const uchar *p= src;
458
      int value;
459
      uchar *dstl= dst;
460
      
461
      for (; dst < de && nweights; nweights--)
462
      {
463
        NEXT_CMP_VALUE(src, p, level, value, (int) srclen, 0);
464
        if (value < 0)
465
          break;
466
        *dst++= value;
467
      }
468
      
469
      if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
470
      {
471
        uint pad_length= de - dst;
472
        set_if_smaller(pad_length, nweights);
473
        /* fill with weight for space character */
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
474
        memset(dst, virtual_space[level], pad_length);
1 by brian
clean slate
475
        dst+= pad_length;
476
      }
477
      
478
      my_strxfrm_desc_and_reverse(dstl, dst, flags, level);
479
      
480
      /* Add level delimiter */
481
      if (dst < de)
482
        *dst++= level < 3 ? 1 : 0;
483
    }
484
  }
485
486
  return dst - dst0;
487
}
488
489
490
#undef IS_END
491
492
493
/*
494
 */
495
496
497
/*
498
** Calculate min_str and max_str that ranges a LIKE string.
499
** Arguments:
500
** ptr		Pointer to LIKE string.
501
** ptr_length	Length of LIKE string.
502
** escape	Escape character in LIKE.  (Normally '\').
503
**		All escape characters should be removed from min_str and max_str
504
** res_length   Length of min_str and max_str.
505
** min_str      Smallest case sensitive string that ranges LIKE.
506
**		Should be space padded to res_length.
507
** max_str	Largest case sensitive string that ranges LIKE.
508
**		Normally padded with the biggest character sort value.
509
**
510
** The function should return 0 if ok and 1 if the LIKE string can't be
511
** optimized !
512
*/
513
514
#define min_sort_char 0x00
515
#define max_sort_char 0xAE
516
517
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
518
static my_bool my_like_range_czech(const CHARSET_INFO *cs __attribute__((unused)),
1 by brian
clean slate
519
				   const char *ptr,size_t ptr_length,
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
520
				   char escape, char w_one, char w_many,
1 by brian
clean slate
521
				   size_t res_length, char *min_str,
522
				   char *max_str,
523
				   size_t *min_length,size_t *max_length)
524
{
525
  uchar value;
526
  const char *end=ptr+ptr_length;
527
  char *min_org=min_str;
528
  char *min_end=min_str+res_length;
529
530
  for (; ptr != end && min_str != min_end ; ptr++)
531
  {
532
    if (*ptr == w_one)		/* '_' in SQL */
533
    { break; }
534
    if (*ptr == w_many)		/* '%' in SQL */
535
    { break; }
536
537
    if (*ptr == escape && ptr+1 != end)
538
    { ptr++; }			/* Skip escape */
539
540
    value = CZ_SORT_TABLE[0][(int) (uchar) *ptr];
541
542
    if (value == 0)			/* Ignore in the first pass */
543
    { continue; }
544
    if (value <= 2)			/* End of pass or end of string */
545
    { break; }
546
    if (value == 255)		/* Double char too compicated */
547
    { break; }
548
549
    *min_str++= *max_str++ = *ptr;
550
  }
551
552
  if (cs->state & MY_CS_BINSORT)
553
    *min_length= (size_t) (min_str - min_org);
554
  else
555
  {
556
    /* 'a\0\0... is the smallest possible string */
557
    *min_length= res_length;
558
  }
559
  /* a\ff\ff... is the biggest possible string */
560
  *max_length= res_length;
561
562
  while (min_str != min_end)
563
  {
564
    *min_str++ = min_sort_char;	/* Because of key compression */
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
565
    *max_str++ = (char) max_sort_char;
1 by brian
clean slate
566
  }
567
  return 0;
568
}
569
570
571
/*
572
 * File generated by cset
573
 * (C) Abandoned 1997 Zarko Mocnik <zarko.mocnik@dem.si>
574
 *
575
 * definition table reworked by Jaromir Dolecek <dolecek@ics.muni.cz>
576
 */
577
578
static uchar ctype_czech[257] = {
579
0,
580
 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
581
 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
582
 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
583
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
584
 16,129,129,129,129,129,129,  1,  1,  1,  1,  1,  1,  1,  1,  1,
585
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
586
 16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
587
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
588
 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, 32,
589
 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 72,
590
  1, 16,  1, 16,  1,  1, 16,  0,  0,  1,  1,  1,  1, 16,  1,  1,
591
 16,  2, 16,  2, 16,  2,  2, 16, 16,  2,  2,  2,  2, 16,  2,  2,
592
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
593
 16,  1,  1,  1,  1,  1,  1, 16,  1,  1,  1,  1,  1,  1,  1, 16,
594
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
595
  2,  2,  2,  2,  2,  2,  2, 16,  2,  2,  2,  2,  2,  2,  2, 16,
596
};
597
598
static uchar to_lower_czech[] = {
599
  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
600
 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
601
 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
602
 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
603
 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
604
112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
605
 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
606
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
607
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
608
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
609
177,161,179,163,181,182,166,167,168,185,186,187,188,173,190,191,
610
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
611
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
612
208,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223,
613
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
614
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
615
};
616
617
static uchar to_upper_czech[] = {
618
  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
619
 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
620
 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
621
 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
622
 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
623
 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
624
 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
625
 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
626
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
627
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
628
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
629
176,160,178,162,180,164,165,183,184,169,170,171,172,189,174,175,
630
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
631
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
632
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
633
240,209,210,211,212,213,214,247,216,217,218,219,220,221,222,255,
634
};
635
636
static uchar sort_order_czech[] = {
637
  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
638
 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
639
 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
640
 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
641
 64, 65, 71, 72, 76, 78, 83, 84, 85, 86, 90, 91, 92, 96, 97,100,
642
105,106,107,110,114,117,122,123,124,125,127,131,132,133,134,135,
643
136, 65, 71, 72, 76, 78, 83, 84, 85, 86, 90, 91, 92, 96, 97,100,
644
105,106,107,110,114,117,122,123,124,125,127,137,138,139,140,  0,
645
  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
646
 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,255,
647
 66,255, 93,255, 94,111,255,255,255,112,113,115,128,255,129,130,
648
255, 66,255, 93,255, 94,111,255,255,112,113,115,128,255,129,130,
649
108, 67, 68, 69, 70, 95, 73, 75, 74, 79, 81, 82, 80, 89, 87, 77,
650
255, 98, 99,101,102,103,104,255,109,119,118,120,121,126,116,255,
651
108, 67, 68, 69, 70, 95, 73, 75, 74, 79, 81, 82, 80, 89, 88, 77,
652
255, 98, 99,101,102,103,104,255,109,119,118,120,121,126,116,255,
653
};
654
236.3.9 by Andrey Hristov
- Fix build of exotic, mostly non-western, charsets (--with-extra-charsets)
655
static uint16_t tab_8859_2_uni[256]={
1 by brian
clean slate
656
     0,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
657
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
658
0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
659
0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
660
0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
661
0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
662
0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
663
0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
664
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
665
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
666
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
667
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
668
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
669
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
670
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
671
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,     0,
672
     0,     0,     0,     0,     0,     0,     0,     0,
673
     0,     0,     0,     0,     0,     0,     0,     0,
674
     0,     0,     0,     0,     0,     0,     0,     0,
675
     0,     0,     0,     0,     0,     0,     0,     0,
676
0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
677
0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
678
0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
679
0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
680
0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
681
0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
682
0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
683
0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
684
0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
685
0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
686
0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
687
0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
688
};
689
690
691
/* 0000-00FD , 254 chars */
692
static uchar tab_uni_8859_2_plane00[]={
693
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
694
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
695
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
696
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
697
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
698
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
699
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
700
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x00,
701
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
702
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
703
0xA0,0x00,0x00,0x00,0xA4,0x00,0x00,0xA7,0xA8,0x00,0x00,0x00,0x00,0xAD,0x00,0x00,
704
0xB0,0x00,0x00,0x00,0xB4,0x00,0x00,0x00,0xB8,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
705
0x00,0xC1,0xC2,0x00,0xC4,0x00,0x00,0xC7,0x00,0xC9,0x00,0xCB,0x00,0xCD,0xCE,0x00,
706
0x00,0x00,0x00,0xD3,0xD4,0x00,0xD6,0xD7,0x00,0x00,0xDA,0x00,0xDC,0xDD,0x00,0xDF,
707
0x00,0xE1,0xE2,0x00,0xE4,0x00,0x00,0xE7,0x00,0xE9,0x00,0xEB,0x00,0xED,0xEE,0x00,
708
0x00,0x00,0x00,0xF3,0xF4,0x00,0xF6,0xF7,0x00,0x00,0xFA,0x00,0xFC,0xFD};
709
710
/* 0102-017E , 125 chars */
711
static uchar tab_uni_8859_2_plane01[]={
712
0xC3,0xE3,0xA1,0xB1,0xC6,0xE6,0x00,0x00,0x00,0x00,0xC8,0xE8,0xCF,0xEF,0xD0,0xF0,
713
0x00,0x00,0x00,0x00,0x00,0x00,0xCA,0xEA,0xCC,0xEC,0x00,0x00,0x00,0x00,0x00,0x00,
714
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
715
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xC5,0xE5,0x00,0x00,0xA5,0xB5,0x00,0x00,0xA3,
716
0xB3,0xD1,0xF1,0x00,0x00,0xD2,0xF2,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xD5,0xF5,
717
0x00,0x00,0xC0,0xE0,0x00,0x00,0xD8,0xF8,0xA6,0xB6,0x00,0x00,0xAA,0xBA,0xA9,0xB9,
718
0xDE,0xFE,0xAB,0xBB,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xD9,0xF9,0xDB,0xFB,
719
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xAC,0xBC,0xAF,0xBF,0xAE,0xBE};
720
721
/* 02C7-02DD ,  23 chars */
722
static uchar tab_uni_8859_2_plane02[]={
723
0xB7,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
724
0x00,0xA2,0xFF,0x00,0xB2,0x00,0xBD};
725
726
static MY_UNI_IDX idx_uni_8859_2[]={
727
  {0x0000,0x00FD,tab_uni_8859_2_plane00},
728
  {0x0102,0x017E,tab_uni_8859_2_plane01},
729
  {0x02C7,0x02DD,tab_uni_8859_2_plane02},
730
  {0,0,NULL}
731
};
732
733
734
static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
735
{
736
  NULL,			/* init */
737
  my_strnncoll_czech,
738
  my_strnncollsp_czech,
739
  my_strnxfrm_czech,
740
  my_strnxfrmlen_czech,
741
  my_like_range_czech,
742
  my_wildcmp_bin,
743
  my_strcasecmp_8bit,
744
  my_instr_simple,
745
  my_hash_sort_simple,
746
  my_propagate_simple
747
};
748
749
CHARSET_INFO my_charset_latin2_czech_ci =
750
{
751
    2,0,0,                                      /* number    */
752
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state     */
753
    "latin2",                                   /* cs name   */
754
    "latin2_czech_cs",                          /* name      */
755
    "",                                         /* comment   */
756
    NULL,                                       /* tailoring */
757
    ctype_czech,
758
    to_lower_czech,
759
    to_upper_czech,
760
    sort_order_czech,
761
    NULL,		/* contractions */
762
    NULL,		/* sort_order_big*/
763
    tab_8859_2_uni,	/* tab_to_uni   */
764
    idx_uni_8859_2,	/* tab_from_uni */
765
    my_unicase_default, /* caseinfo     */
766
    NULL,		/* state_map    */
767
    NULL,		/* ident_map    */
768
    4,			/* strxfrm_multiply */
769
    1,                  /* caseup_multiply  */
770
    1,                  /* casedn_multiply  */
771
    1,			/* mbminlen   */
772
    1,			/* mbmaxlen  */
773
    0,			/* min_sort_char */
774
    0,			/* max_sort_char */
775
    ' ',                /* pad char      */
776
    0,                  /* escape_with_backslash_is_dangerous */
777
    4,                  /* levels_for_compare */
778
    4,                  /* levels_for_order   */
779
    &my_charset_8bit_handler,
780
    &my_collation_latin2_czech_ci_handler
781
};
782
783
#endif