~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/*
17
        File strings/ctype-czech.c for MySQL.
18
19
	This file implements the Czech sorting for the MySQL database
20
	server (www.mysql.com). Due to some complicated rules the
21
	Czech language has for sorting strings, a more complex
22
	solution was needed than the one-to-one conversion table. To
23
	note a few, here is an example of a Czech sorting sequence:
24
25
		co < hlaska < hláska < hlava < chlapec < krtek
26
27
	It because some of the rules are: double char 'ch' is sorted
28
	between 'h' and 'i'. Accented character 'á' (a with acute) is
29
	sorted after 'a' and before 'b', but only if the word is
30
	otherwise the same. However, because 's' is sorted before 'v'
31
	in hlava, the accentness of 'á' is overridden. There are many
32
	more rules.
33
34
	This file defines functions my_strxfrm and my_strcoll for
35
	C-like zero terminated strings and my_strnxfrm and my_strnncoll
36
	for strings where the length comes as an parameter. Also
37
	defined here you will find function my_like_range that returns
38
	index range strings for LIKE expression and the
39
	MY_STRXFRM_MULTIPLY set to value 4 -- this is the ratio the
40
	strings grows during my_strxfrm. The algorithm has four
41
	passes, that's why we need four times more space for expanded
42
	string.
43
44
	This file also contains the ISO-Latin-2 definitions of
45
	characters.
46
47
	Author: (c) 1997--1998 Jan Pazdziora, adelton@fi.muni.cz
48
	Jan Pazdziora has a shared copyright for this code
49
50
	The original of this file can also be found at
51
	http://www.fi.muni.cz/~adelton/l10n/
52
53
	Bug reports and suggestions are always welcome.
54
*/
55
56
/*
57
 * This comment is parsed by configure to create ctype.c,
58
 * so don't change it unless you know what you are doing.
59
 *
60
 * .configure. strxfrm_multiply_czech=4
61
 */
62
63
#include <my_global.h>
64
#include "m_string.h"
65
#include "m_ctype.h"
66
67
#ifdef HAVE_CHARSET_latin2
68
69
/*
70
	These are four tables for four passes of the algorithm. Please see
71
	below for what are the "special values"
72
*/
73
74
static const uchar *CZ_SORT_TABLE[]=
75
{
76
  (const uchar*)
77
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x41\x42\x43\x44\x45\x00\x00"
78
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
79
  "\x47\x58\x5C\x6A\x77\x6B\x69\x5B\x5E\x5F\x66\x6E\x55\x54\x5A\x67"
80
  "\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F\x80\x81\x57\x56\x71\x72\x73\x59"
81
  "\x65\x82\x83\xFF\x86\x87\x88\x89\x8A\x8C\x8D\x8E\x8F\x90\x91\x92"
82
  "\x94\x95\x96\x98\x9A\x9B\x9D\x9E\x9F\xA0\xA1\x60\x68\x61\x4B\x52"
83
  "\x49\x82\x83\xFF\x86\x87\x88\x89\x8A\x8C\x8D\x8E\x8F\x90\x91\x92"
84
  "\x94\x95\x96\x98\x9A\x9B\x9D\x9E\x9F\xA0\xA1\x62\x74\x63\x75\x00"
85
  "\x00\x00\x00\x00\x00\x46\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
86
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
87
  "\x48\x82\x4C\x8F\x76\x8F\x98\x64\x4E\x99\x98\x9A\xA1\x53\xA2\xA1"
88
  "\x6D\x82\x51\x8F\x4A\x8F\x98\x6C\x50\x99\x98\x9A\xA1\x4F\xA2\xA1"
89
  "\x96\x82\x82\x82\x82\x8F\x84\x84\x85\x87\x87\x87\x87\x8C\x8C\x86"
90
  "\x86\x91\x91\x92\x92\x92\x92\x70\x97\x9B\x9B\x9B\x9B\xA0\x9A\x98"
91
  "\x96\x82\x82\x82\x82\x8F\x84\x84\x85\x87\x87\x87\x87\x8C\x8C\x86"
92
  "\x86\x91\x91\x92\x92\x92\x92\x6F\x97\x9B\x9B\x9B\x9B\xA0\x9A\x4D",
93
94
  (const uchar*)
95
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x20\x20\x20\x20\x00\x00"
96
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
97
  "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20"
98
  "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20"
99
  "\x20\x20\x20\xFF\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20"
100
  "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20"
101
  "\x20\x20\x20\xFF\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20"
102
  "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x00"
103
  "\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
104
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
105
  "\x20\x2B\x20\x2C\x20\x25\x22\x20\x20\x25\x2A\x25\x22\x20\x25\x29"
106
  "\x20\x2B\x20\x2C\x20\x25\x22\x20\x20\x25\x2A\x25\x22\x20\x25\x29"
107
  "\x22\x22\x24\x23\x27\x22\x22\x2A\x25\x22\x2B\x47\x25\x22\x24\x25"
108
  "\x2C\x22\x25\x22\x24\x28\x27\x20\x25\x26\x22\x28\x27\x22\x2A\x21"
109
  "\x22\x22\x24\x23\x27\x22\x22\x2A\x25\x22\x2B\x47\x25\x22\x24\x25"
110
  "\x2C\x22\x25\x22\x24\x28\x27\x20\x25\x26\x22\x28\x27\x22\x2A\x20",
111
112
113
  (const uchar*)
114
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x03\x03\x03\x03\x00\x00"
115
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
116
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03"
117
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03"
118
  "\x03\x05\x05\xFF\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05"
119
  "\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x03\x03\x03\x03\x03"
120
  "\x03\x03\x03\xFF\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03"
121
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x00"
122
  "\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
123
  "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
124
  "\x1B\x05\x03\x05\x03\x05\x05\x03\x03\x05\x05\x05\x05\x03\x05\x05"
125
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03"
126
  "\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05"
127
  "\x05\x05\x05\x05\x05\x05\x05\x03\x05\x05\x05\x05\x05\x05\x05\x03"
128
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03"
129
  "\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03\x03",
130
131
  (const uchar*)
132
  "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
133
  "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
134
  "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
135
  "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
136
  "\x40\x41\x42\xFF\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F"
137
  "\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x5B\x5C\x5D\x5E\x5F"
138
  "\x60\x61\x62\xFF\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
139
  "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"
140
  "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
141
  "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
142
  "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
143
  "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
144
  "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
145
  "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
146
  "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
147
  "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"
148
};
149
150
/*
151
  These define the values for the double chars that need to be
152
  sorted as they were single characters -- in Czech these are
153
  'ch', 'Ch' and 'CH'.
154
*/
155
156
struct wordvalue
157
{
158
  const uchar *word;
159
  const uchar *outvalue;
160
};
161
162
static struct wordvalue doubles[]=
163
{
164
  { "ch", (const uchar*) "\x8B\x20\x03\x63" },
165
  { "Ch", (const uchar*) "\x8B\x20\x04\x43" },
166
  { "CH", (const uchar*) "\x8B\x20\x05\x43" },
167
  { "c",  (const uchar*) "\x84\x20\x03\x63" },
168
  { "C",  (const uchar*) "\x84\x20\x05\x43" },
169
};
170
171
172
/*
173
  Define "auto" space character,
174
  which is used while processing "PAD SPACE" rule,
175
  when one string is shorter than another string.
176
  "Auto" space character is lower than a real space
177
  character on the third level.
178
*/
179
static const uchar *virtual_space= "\x47\x20\x02\x20";
180
181
/*
182
        Original comments from the contributor:
183
        
184
	Informal description of the algorithm:
185
186
	We walk the string left to right.
187
188
	The end of the string is either passed as parameter, or is
189
	*p == 0. This is hidden in the IS_END macro.
190
191
	In the first two passes, we compare word by word. So we make
192
	first and second pass on the first word, first and second pass
193
	on the second word, etc. If we come to the end of the string
194
	during the first pass, we need to jump to the last word of the
195
	second pass.
196
197
	End of pass is marked with value 1 on the output.
198
199
	For each character, we read it's value from the table.
200
201
	If the value is ignore (0), we go straight to the next character.
202
203
	If the value is space/end of word (2) and we are in the first
204
	or second pass, we skip all characters having value 0 -- 2 and
205
	switch the pass.
206
207
	If it's the compose character (255), we check if the double
208
	exists behind it, find its value.
209
210
	We append 0 to the end.
211
212
	Neformální popis algoritmu:
213
214
	procházíme øetìzec zleva doprava
215
	konec øetìzce poznáme podle *p == 0
216
	pokud jsme do¹li na konec øetìzce pøi prùchodu 0, nejdeme na
217
		zaèátek, ale na uloŸenou pozici, protoŸe první a druhý
218
		prùchod bìŸí souèasnì
219
	konec vstupu (prùchodu) oznaèíme na výstupu hodnotou 1
220
221
	naèteme hodnotu z tøídící tabulky
222
	jde-li o hodnotu ignorovat (0), skoèíme na dal¹í prùchod
223
	jde-li o hodnotu konec slova (2) a je to prùchod 0 nebo 1,
224
		pøeskoèíme v¹echny dal¹í 0 -- 2 a prohodíme
225
		prùchody
226
	jde-li o kompozitní znak (255), otestujeme, zda následuje
227
		správný do dvojice, dohledáme správnou hodnotu
228
229
	na konci pøipojíme znak 0
230
*/
231
232
/*
233
  In March 2007 latin2_czech_cs was reworked by Alexander Barkov,
234
  to suite other MySQL collations better, and to be Falcon compatible.
235
  
236
  Changes:
237
  - Discarded word-by-word comparison on the primary and the secondary level.
238
    Comparison is now strictly done level-by-level
239
    (like the Unicode Collation Algorithm (UCA) does).
240
    
241
  - Character weights were derived from Unicode 5.0.0 standard.
242
    This is to make order of punctuation characters and digits
243
    more consistent with all other MySQL collations and UCA.
244
    
245
    The order is now:
246
    
247
      Controls, spaces, punctuations, digits, letters.
248
    
249
    It previously used to be:
250
    
251
      Punctuations, controls, some more punctuations, letters, digits.
252
    
253
    NOTE:
254
    
255
    A minor difference between this implementations and the UCA:
256
    
257
    German "LATIN SMALL LETTER SHARP S" does not expand to "ss".
258
    It is instead considered as secondary greater than "LATIN LETTER S",
259
    and thus sorted between "LATIN LETTER S" and "LATIN LETTER S WITH ACUTE".
260
    This allows to reduce *twice* disk space required for un-indexed
261
    ORDER BY (using the filesort method).
262
    
263
    As neither the original version of latin2_czech_cs 
264
    expanded "SHARP S" to "ss", nor "SHARP S" is a part of Czech alphabet,
265
    this behavior should be ok.
266
    
267
  - Collation is now "PAD SPACE" like all other MySQL collations.
268
    It ignores trailing spaces on primary and secondary level.
269
    
270
  - SPACE and TAB characters are not ignorable anymore.
271
    Also, they have different weights on primary level,
272
    like in all other MySQL collations:
273
    
274
    SELECT 'a\t' < 'a ' -- returns true
275
    SELECT 'a\t' < 'a'  -- returns true
276
    
277
  - Some other punctuation characters are not ignorable anymore,
278
    for better compatibility with UCA and other MySQL collations.
279
280
*/
281
282
283
#define ADD_TO_RESULT(dest, len, totlen, value)			\
284
if ((totlen) < (len)) { dest[totlen] = value; } (totlen++);
285
#define IS_END(p, src, len)	(((char *)p - (char *)src) >= (len))
286
287
/*
288
  ml - a flag indicating whether automatically
289
       switch to the secondary level,
290
       or stop on the primary level
291
*/
292
                
293
#define NEXT_CMP_VALUE(src, p, pass, value, len, ml)	\
294
while (1)						\
295
{							\
296
  if (IS_END(p, src, len))				\
297
  {							\
298
    /* when we are at the end of string */		\
299
    /* return either 0 for end of string */		\
300
   /* or 1 for end of pass */				\
301
   value= pass < 3 && ml ? 1 : -1;			\
302
   if (pass != 3 && ml && len > 0)			\
303
   {							\
304
     p= src;						\
305
     pass++;						\
306
   }							\
307
   break;						\
308
  }							\
309
  /* not at end of string */				\
310
  value = CZ_SORT_TABLE[pass][*p];			\
311
  if (value == 0 && pass < 3)				\
312
  { p++; continue; } /* ignore value on levels 0,1,2 */	\
313
  if (value == 255)					\
314
  {							\
315
    int i;						\
316
    for (i= 0; i < (int) array_elements(doubles); i++)  \
317
    {							\
318
      const char * pattern = doubles[i].word;		\
319
      const char * q = (const char *) p;		\
320
      int j = 0;					\
321
      while (pattern[j])				\
322
      {							\
323
	if (IS_END(q, src, len) || (*q != pattern[j]))	\
324
	 break;						\
325
	j++; q++;					\
326
      }							\
327
      if (!(pattern[j]))				\
328
      {							\
329
	value = (int)(doubles[i].outvalue[pass]);	\
330
	p= (const uchar *) q - 1;			\
331
	break;						\
332
      }							\
333
    }							\
334
  }							\
335
  p++;							\
336
  break;						\
337
}
338
339
/*
340
  Function strnncoll, actually strcoll, with Czech sorting, which expect
341
  the length of the strings being specified
342
*/
343
344
static int my_strnncoll_czech(CHARSET_INFO *cs __attribute__((unused)),
345
			      const uchar *s1, size_t len1, 
346
			      const uchar *s2, size_t len2,
347
                              my_bool s2_is_prefix)
348
{
349
  int v1, v2;
350
  const uchar * p1, * p2;
351
  int pass1= 0, pass2= 0;
352
353
  if (s2_is_prefix && len1 > len2)
354
    len1=len2;
355
356
  p1= s1;
357
  p2= s2;
358
359
  do
360
  {
361
    int diff;
362
    NEXT_CMP_VALUE(s1, p1, pass1, v1, (int)len1, 1);
363
    NEXT_CMP_VALUE(s2, p2, pass2, v2, (int)len2, 1);
364
    if ((diff = v1 - v2))
365
      return diff;
366
  }
367
  while (v1);
368
  return 0;
369
}
370
371
372
373
/*
374
  Compare strings, ignore trailing spaces
375
*/
376
377
static int
378
my_strnncollsp_czech(CHARSET_INFO * cs __attribute__((unused)),
379
                     const uchar *s, size_t slen,
380
                     const uchar *t, size_t tlen,
381
                     my_bool diff_if_only_endspace_difference
382
                     __attribute__((unused)))
383
{
384
  int level;
385
386
  for (level= 0; level <= 3; level++)
387
  {
388
    const uchar *s1= s;
389
    const uchar *t1= t;
390
391
    for (;;)
392
    {
393
      int sval, tval, diff;
394
      NEXT_CMP_VALUE(s, s1, level, sval, (int) slen, 0);
395
      NEXT_CMP_VALUE(t, t1, level, tval, (int) tlen, 0);
396
      if (sval < 0)
397
      {
398
        sval= virtual_space[level];
399
        for (; tval >= 0 ;)
400
        {
401
          if ((diff= sval - tval))
402
            return diff;
403
          NEXT_CMP_VALUE(t, t1, level, tval, (int) tlen, 0);
404
        }
405
        break;
406
      }
407
      else if (tval < 0)
408
      {
409
        tval= virtual_space[level];
410
        for (; sval >= 0 ;)
411
        {
412
          if ((diff= sval - tval))
413
            return diff;
414
          NEXT_CMP_VALUE(s, s1, level, sval, (int) slen, 0);
415
        }
416
        break;
417
      }
418
419
      if ((diff= sval - tval))
420
        return diff;
421
    }
422
  }
423
  return 0;
424
}
425
426
427
/*
428
  Returns the number of bytes required for strnxfrm().
429
*/
430
static size_t
431
my_strnxfrmlen_czech(CHARSET_INFO *cs __attribute__((unused)), size_t len)
432
{
433
  return len * 4 + 4;
434
}
435
436
437
/*
438
  Function strnxfrm, actually strxfrm, with Czech sorting, which expect
439
  the length of the strings being specified
440
*/
441
static size_t
442
my_strnxfrm_czech(CHARSET_INFO * cs  __attribute__((unused)),
443
                  uchar *dst, size_t dstlen, uint nweights_arg,
444
                  const uchar *src, size_t srclen, uint flags)
445
{
446
  uint level;
447
  uchar *dst0= dst;
448
  uchar *de= dst + dstlen;
449
450
  if (!(flags & 0x0F)) /* All levels by default */
451
    flags|= 0x0F;
452
453
  for (level= 0; level <= 3; level++)
454
  {
455
    if (flags & (1 << level))
456
    {
457
      uint nweights= nweights_arg;
458
      const uchar *p= src;
459
      int value;
460
      uchar *dstl= dst;
461
      
462
      for (; dst < de && nweights; nweights--)
463
      {
464
        NEXT_CMP_VALUE(src, p, level, value, (int) srclen, 0);
465
        if (value < 0)
466
          break;
467
        *dst++= value;
468
      }
469
      
470
      if (dst < de && nweights && (flags & MY_STRXFRM_PAD_WITH_SPACE))
471
      {
472
        uint pad_length= de - dst;
473
        set_if_smaller(pad_length, nweights);
474
        /* fill with weight for space character */
475
        bfill(dst, pad_length, virtual_space[level]);
476
        dst+= pad_length;
477
      }
478
      
479
      my_strxfrm_desc_and_reverse(dstl, dst, flags, level);
480
      
481
      /* Add level delimiter */
482
      if (dst < de)
483
        *dst++= level < 3 ? 1 : 0;
484
    }
485
  }
486
487
  return dst - dst0;
488
}
489
490
491
#undef IS_END
492
493
494
/*
495
 */
496
497
498
/*
499
** Calculate min_str and max_str that ranges a LIKE string.
500
** Arguments:
501
** ptr		Pointer to LIKE string.
502
** ptr_length	Length of LIKE string.
503
** escape	Escape character in LIKE.  (Normally '\').
504
**		All escape characters should be removed from min_str and max_str
505
** res_length   Length of min_str and max_str.
506
** min_str      Smallest case sensitive string that ranges LIKE.
507
**		Should be space padded to res_length.
508
** max_str	Largest case sensitive string that ranges LIKE.
509
**		Normally padded with the biggest character sort value.
510
**
511
** The function should return 0 if ok and 1 if the LIKE string can't be
512
** optimized !
513
*/
514
515
#define min_sort_char 0x00
516
#define max_sort_char 0xAE
517
518
519
static my_bool my_like_range_czech(CHARSET_INFO *cs __attribute__((unused)),
520
				   const char *ptr,size_t ptr_length,
521
				   pbool escape, pbool w_one, pbool w_many,
522
				   size_t res_length, char *min_str,
523
				   char *max_str,
524
				   size_t *min_length,size_t *max_length)
525
{
526
  uchar value;
527
  const char *end=ptr+ptr_length;
528
  char *min_org=min_str;
529
  char *min_end=min_str+res_length;
530
531
  for (; ptr != end && min_str != min_end ; ptr++)
532
  {
533
    if (*ptr == w_one)		/* '_' in SQL */
534
    { break; }
535
    if (*ptr == w_many)		/* '%' in SQL */
536
    { break; }
537
538
    if (*ptr == escape && ptr+1 != end)
539
    { ptr++; }			/* Skip escape */
540
541
    value = CZ_SORT_TABLE[0][(int) (uchar) *ptr];
542
543
    if (value == 0)			/* Ignore in the first pass */
544
    { continue; }
545
    if (value <= 2)			/* End of pass or end of string */
546
    { break; }
547
    if (value == 255)		/* Double char too compicated */
548
    { break; }
549
550
    *min_str++= *max_str++ = *ptr;
551
  }
552
553
  if (cs->state & MY_CS_BINSORT)
554
    *min_length= (size_t) (min_str - min_org);
555
  else
556
  {
557
    /* 'a\0\0... is the smallest possible string */
558
    *min_length= res_length;
559
  }
560
  /* a\ff\ff... is the biggest possible string */
561
  *max_length= res_length;
562
563
  while (min_str != min_end)
564
  {
565
    *min_str++ = min_sort_char;	/* Because of key compression */
566
    *max_str++ = max_sort_char;
567
  }
568
  return 0;
569
}
570
571
572
/*
573
 * File generated by cset
574
 * (C) Abandoned 1997 Zarko Mocnik <zarko.mocnik@dem.si>
575
 *
576
 * definition table reworked by Jaromir Dolecek <dolecek@ics.muni.cz>
577
 */
578
579
static uchar ctype_czech[257] = {
580
0,
581
 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
582
 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
583
 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
584
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
585
 16,129,129,129,129,129,129,  1,  1,  1,  1,  1,  1,  1,  1,  1,
586
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
587
 16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
588
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
589
 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, 32,
590
 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 72,
591
  1, 16,  1, 16,  1,  1, 16,  0,  0,  1,  1,  1,  1, 16,  1,  1,
592
 16,  2, 16,  2, 16,  2,  2, 16, 16,  2,  2,  2,  2, 16,  2,  2,
593
  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
594
 16,  1,  1,  1,  1,  1,  1, 16,  1,  1,  1,  1,  1,  1,  1, 16,
595
  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
596
  2,  2,  2,  2,  2,  2,  2, 16,  2,  2,  2,  2,  2,  2,  2, 16,
597
};
598
599
static uchar to_lower_czech[] = {
600
  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
601
 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
602
 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
603
 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
604
 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
605
112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
606
 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
607
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
608
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
609
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
610
177,161,179,163,181,182,166,167,168,185,186,187,188,173,190,191,
611
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
612
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
613
208,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223,
614
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
615
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
616
};
617
618
static uchar to_upper_czech[] = {
619
  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
620
 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
621
 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
622
 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
623
 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
624
 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
625
 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
626
 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
627
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
628
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
629
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
630
176,160,178,162,180,164,165,183,184,169,170,171,172,189,174,175,
631
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
632
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
633
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
634
240,209,210,211,212,213,214,247,216,217,218,219,220,221,222,255,
635
};
636
637
static uchar sort_order_czech[] = {
638
  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
639
 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
640
 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
641
 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
642
 64, 65, 71, 72, 76, 78, 83, 84, 85, 86, 90, 91, 92, 96, 97,100,
643
105,106,107,110,114,117,122,123,124,125,127,131,132,133,134,135,
644
136, 65, 71, 72, 76, 78, 83, 84, 85, 86, 90, 91, 92, 96, 97,100,
645
105,106,107,110,114,117,122,123,124,125,127,137,138,139,140,  0,
646
  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
647
 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,255,
648
 66,255, 93,255, 94,111,255,255,255,112,113,115,128,255,129,130,
649
255, 66,255, 93,255, 94,111,255,255,112,113,115,128,255,129,130,
650
108, 67, 68, 69, 70, 95, 73, 75, 74, 79, 81, 82, 80, 89, 87, 77,
651
255, 98, 99,101,102,103,104,255,109,119,118,120,121,126,116,255,
652
108, 67, 68, 69, 70, 95, 73, 75, 74, 79, 81, 82, 80, 89, 88, 77,
653
255, 98, 99,101,102,103,104,255,109,119,118,120,121,126,116,255,
654
};
655
656
static uint16 tab_8859_2_uni[256]={
657
     0,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
658
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
659
0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
660
0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
661
0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
662
0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
663
0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
664
0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
665
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
666
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
667
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
668
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
669
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
670
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
671
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
672
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,     0,
673
     0,     0,     0,     0,     0,     0,     0,     0,
674
     0,     0,     0,     0,     0,     0,     0,     0,
675
     0,     0,     0,     0,     0,     0,     0,     0,
676
     0,     0,     0,     0,     0,     0,     0,     0,
677
0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
678
0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
679
0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
680
0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
681
0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
682
0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
683
0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
684
0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
685
0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
686
0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
687
0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
688
0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
689
};
690
691
692
/* 0000-00FD , 254 chars */
693
static uchar tab_uni_8859_2_plane00[]={
694
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
695
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
696
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
697
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
698
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
699
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
700
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
701
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x00,
702
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
703
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
704
0xA0,0x00,0x00,0x00,0xA4,0x00,0x00,0xA7,0xA8,0x00,0x00,0x00,0x00,0xAD,0x00,0x00,
705
0xB0,0x00,0x00,0x00,0xB4,0x00,0x00,0x00,0xB8,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
706
0x00,0xC1,0xC2,0x00,0xC4,0x00,0x00,0xC7,0x00,0xC9,0x00,0xCB,0x00,0xCD,0xCE,0x00,
707
0x00,0x00,0x00,0xD3,0xD4,0x00,0xD6,0xD7,0x00,0x00,0xDA,0x00,0xDC,0xDD,0x00,0xDF,
708
0x00,0xE1,0xE2,0x00,0xE4,0x00,0x00,0xE7,0x00,0xE9,0x00,0xEB,0x00,0xED,0xEE,0x00,
709
0x00,0x00,0x00,0xF3,0xF4,0x00,0xF6,0xF7,0x00,0x00,0xFA,0x00,0xFC,0xFD};
710
711
/* 0102-017E , 125 chars */
712
static uchar tab_uni_8859_2_plane01[]={
713
0xC3,0xE3,0xA1,0xB1,0xC6,0xE6,0x00,0x00,0x00,0x00,0xC8,0xE8,0xCF,0xEF,0xD0,0xF0,
714
0x00,0x00,0x00,0x00,0x00,0x00,0xCA,0xEA,0xCC,0xEC,0x00,0x00,0x00,0x00,0x00,0x00,
715
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
716
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xC5,0xE5,0x00,0x00,0xA5,0xB5,0x00,0x00,0xA3,
717
0xB3,0xD1,0xF1,0x00,0x00,0xD2,0xF2,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xD5,0xF5,
718
0x00,0x00,0xC0,0xE0,0x00,0x00,0xD8,0xF8,0xA6,0xB6,0x00,0x00,0xAA,0xBA,0xA9,0xB9,
719
0xDE,0xFE,0xAB,0xBB,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xD9,0xF9,0xDB,0xFB,
720
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xAC,0xBC,0xAF,0xBF,0xAE,0xBE};
721
722
/* 02C7-02DD ,  23 chars */
723
static uchar tab_uni_8859_2_plane02[]={
724
0xB7,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
725
0x00,0xA2,0xFF,0x00,0xB2,0x00,0xBD};
726
727
static MY_UNI_IDX idx_uni_8859_2[]={
728
  {0x0000,0x00FD,tab_uni_8859_2_plane00},
729
  {0x0102,0x017E,tab_uni_8859_2_plane01},
730
  {0x02C7,0x02DD,tab_uni_8859_2_plane02},
731
  {0,0,NULL}
732
};
733
734
735
static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
736
{
737
  NULL,			/* init */
738
  my_strnncoll_czech,
739
  my_strnncollsp_czech,
740
  my_strnxfrm_czech,
741
  my_strnxfrmlen_czech,
742
  my_like_range_czech,
743
  my_wildcmp_bin,
744
  my_strcasecmp_8bit,
745
  my_instr_simple,
746
  my_hash_sort_simple,
747
  my_propagate_simple
748
};
749
750
CHARSET_INFO my_charset_latin2_czech_ci =
751
{
752
    2,0,0,                                      /* number    */
753
    MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state     */
754
    "latin2",                                   /* cs name   */
755
    "latin2_czech_cs",                          /* name      */
756
    "",                                         /* comment   */
757
    NULL,                                       /* tailoring */
758
    ctype_czech,
759
    to_lower_czech,
760
    to_upper_czech,
761
    sort_order_czech,
762
    NULL,		/* contractions */
763
    NULL,		/* sort_order_big*/
764
    tab_8859_2_uni,	/* tab_to_uni   */
765
    idx_uni_8859_2,	/* tab_from_uni */
766
    my_unicase_default, /* caseinfo     */
767
    NULL,		/* state_map    */
768
    NULL,		/* ident_map    */
769
    4,			/* strxfrm_multiply */
770
    1,                  /* caseup_multiply  */
771
    1,                  /* casedn_multiply  */
772
    1,			/* mbminlen   */
773
    1,			/* mbmaxlen  */
774
    0,			/* min_sort_char */
775
    0,			/* max_sort_char */
776
    ' ',                /* pad char      */
777
    0,                  /* escape_with_backslash_is_dangerous */
778
    4,                  /* levels_for_compare */
779
    4,                  /* levels_for_order   */
780
    &my_charset_8bit_handler,
781
    &my_collation_latin2_czech_ci_handler
782
};
783
784
#endif