~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
212.5.39 by Monty Taylor
Phew. Moved my_base and my_global.
16
#include "m_string.h"
1 by brian
clean slate
17
#include "m_ctype.h"
18
19
#ifdef USE_MB
20
21
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
22
size_t my_caseup_str_mb(const CHARSET_INFO * const  cs, char *str)
1 by brian
clean slate
23
{
205 by Brian Aker
uint32 -> uin32_t
24
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
25
  register unsigned char *map= cs->to_upper;
1 by brian
clean slate
26
  char *str_orig= str;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
27
1 by brian
clean slate
28
  while (*str)
29
  {
30
    /* Pointing after the '\0' is safe here. */
31
    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
32
      str+= l;
33
    else
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
34
    {
481 by Brian Aker
Remove all of uchar.
35
      *str= (char) map[(unsigned char)*str];
1 by brian
clean slate
36
      str++;
37
    }
38
  }
39
  return (size_t) (str - str_orig);
40
}
41
42
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
43
size_t my_casedn_str_mb(const CHARSET_INFO * const  cs, char *str)
1 by brian
clean slate
44
{
205 by Brian Aker
uint32 -> uin32_t
45
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
46
  register unsigned char *map= cs->to_lower;
1 by brian
clean slate
47
  char *str_orig= str;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
48
1 by brian
clean slate
49
  while (*str)
50
  {
51
    /* Pointing after the '\0' is safe here. */
52
    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
53
      str+= l;
54
    else
55
    {
481 by Brian Aker
Remove all of uchar.
56
      *str= (char) map[(unsigned char)*str];
1 by brian
clean slate
57
      str++;
58
    }
59
  }
60
  return (size_t) (str - str_orig);
61
}
62
63
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
64
size_t my_caseup_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
65
                    char *dst, size_t dstlen)
1 by brian
clean slate
66
{
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
67
#ifdef NDEBUG
68
  (void)dst;
69
  (void)dstlen;
70
#endif
205 by Brian Aker
uint32 -> uin32_t
71
  register uint32_t l;
1 by brian
clean slate
72
  register char *srcend= src + srclen;
481 by Brian Aker
Remove all of uchar.
73
  register unsigned char *map= cs->to_upper;
1 by brian
clean slate
74
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
75
  assert(src == dst && srclen == dstlen);
1 by brian
clean slate
76
  while (src < srcend)
77
  {
78
    if ((l=my_ismbchar(cs, src, srcend)))
79
      src+= l;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
80
    else
1 by brian
clean slate
81
    {
481 by Brian Aker
Remove all of uchar.
82
      *src=(char) map[(unsigned char) *src];
1 by brian
clean slate
83
      src++;
84
    }
85
  }
86
  return srclen;
87
}
88
89
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
90
size_t my_casedn_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
91
                    char *dst, size_t dstlen)
1 by brian
clean slate
92
{
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
93
#ifdef NDEBUG
94
  (void)dst;
95
  (void)dstlen;
96
#endif
205 by Brian Aker
uint32 -> uin32_t
97
  register uint32_t l;
1 by brian
clean slate
98
  register char *srcend= src + srclen;
481 by Brian Aker
Remove all of uchar.
99
  register unsigned char *map=cs->to_lower;
1 by brian
clean slate
100
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
101
  assert(src == dst && srclen == dstlen);
1 by brian
clean slate
102
  while (src < srcend)
103
  {
104
    if ((l= my_ismbchar(cs, src, srcend)))
105
      src+= l;
106
    else
107
    {
481 by Brian Aker
Remove all of uchar.
108
      *src= (char) map[(unsigned char)*src];
1 by brian
clean slate
109
      src++;
110
    }
111
  }
112
  return srclen;
113
}
114
115
116
/*
117
  my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
118
 */
119
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
120
int my_strcasecmp_mb(const CHARSET_INFO * const  cs,const char *s, const char *t)
1 by brian
clean slate
121
{
205 by Brian Aker
uint32 -> uin32_t
122
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
123
  register unsigned char *map=cs->to_upper;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
124
1 by brian
clean slate
125
  while (*s && *t)
126
  {
127
    /* Pointing after the '\0' is safe here. */
128
    if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen)))
129
    {
130
      while (l--)
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
131
        if (*s++ != *t++)
1 by brian
clean slate
132
          return 1;
133
    }
134
    else if (my_mbcharlen(cs, *t) > 1)
135
      return 1;
481 by Brian Aker
Remove all of uchar.
136
    else if (map[(unsigned char) *s++] != map[(unsigned char) *t++])
1 by brian
clean slate
137
      return 1;
138
  }
139
  /* At least one of '*s' and '*t' is zero here. */
140
  return (*t != *s);
141
}
142
143
144
/*
145
** Compare string against string with wildcard
146
**	0 if matched
147
**	-1 if not matched with wildcard
148
**	 1 if matched with wildcard
149
*/
150
151
#define INC_PTR(cs,A,B) A+=(my_ismbchar(cs,A,B) ? my_ismbchar(cs,A,B) : 1)
152
481 by Brian Aker
Remove all of uchar.
153
#define likeconv(s,A) (unsigned char) (s)->sort_order[(unsigned char) (A)]
1 by brian
clean slate
154
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
155
int my_wildcmp_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
156
		  const char *str,const char *str_end,
157
		  const char *wildstr,const char *wildend,
158
		  int escape, int w_one, int w_many)
159
{
160
  int result= -1;				/* Not found, using wildcards */
161
162
  while (wildstr != wildend)
163
  {
164
    while (*wildstr != w_many && *wildstr != w_one)
165
    {
166
      int l;
167
      if (*wildstr == escape && wildstr+1 != wildend)
168
	wildstr++;
169
      if ((l = my_ismbchar(cs, wildstr, wildend)))
170
      {
171
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
172
	      return 1;
173
	  str += l;
174
	  wildstr += l;
175
      }
176
      else
177
      if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
178
	return(1);				/* No match */
179
      if (wildstr == wildend)
180
	return (str != str_end);		/* Match if both are at end */
181
      result=1;					/* Found an anchor char */
182
    }
183
    if (*wildstr == w_one)
184
    {
185
      do
186
      {
187
	if (str == str_end)			/* Skip one char if possible */
188
	  return (result);
189
	INC_PTR(cs,str,str_end);
190
      } while (++wildstr < wildend && *wildstr == w_one);
191
      if (wildstr == wildend)
192
	break;
193
    }
194
    if (*wildstr == w_many)
195
    {						/* Found w_many */
481 by Brian Aker
Remove all of uchar.
196
      unsigned char cmp;
1 by brian
clean slate
197
      const char* mb = wildstr;
198
      int mb_len=0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
199
1 by brian
clean slate
200
      wildstr++;
201
      /* Remove any '%' and '_' from the wild search string */
202
      for (; wildstr != wildend ; wildstr++)
203
      {
204
	if (*wildstr == w_many)
205
	  continue;
206
	if (*wildstr == w_one)
207
	{
208
	  if (str == str_end)
209
	    return (-1);
210
	  INC_PTR(cs,str,str_end);
211
	  continue;
212
	}
213
	break;					/* Not a wild character */
214
      }
215
      if (wildstr == wildend)
216
	return(0);				/* Ok if w_many is last */
217
      if (str == str_end)
218
	return -1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
219
1 by brian
clean slate
220
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
221
	cmp= *++wildstr;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
222
1 by brian
clean slate
223
      mb=wildstr;
224
      mb_len= my_ismbchar(cs, wildstr, wildend);
225
      INC_PTR(cs,wildstr,wildend);		/* This is compared trough cmp */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
226
      cmp=likeconv(cs,cmp);
1 by brian
clean slate
227
      do
228
      {
229
        for (;;)
230
        {
231
          if (str >= str_end)
232
            return -1;
233
          if (mb_len)
234
          {
235
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
236
            {
237
              str += mb_len;
238
              break;
239
            }
240
          }
241
          else if (!my_ismbchar(cs, str, str_end) &&
242
                   likeconv(cs,*str) == cmp)
243
          {
244
            str++;
245
            break;
246
          }
247
          INC_PTR(cs,str, str_end);
248
        }
249
	{
250
	  int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,
251
                                w_many);
252
	  if (tmp <= 0)
253
	    return (tmp);
254
	}
255
      } while (str != str_end && wildstr[0] != w_many);
256
      return(-1);
257
    }
258
  }
259
  return (str != str_end ? 1 : 0);
260
}
261
262
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
263
size_t my_numchars_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
264
		      const char *pos, const char *end)
265
{
266
  register size_t count= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
267
  while (pos < end)
1 by brian
clean slate
268
  {
482 by Brian Aker
Remove uint.
269
    uint32_t mb_len;
1 by brian
clean slate
270
    pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1;
271
    count++;
272
  }
273
  return count;
274
}
275
276
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
277
size_t my_charpos_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
278
		     const char *pos, const char *end, size_t length)
279
{
280
  const char *start= pos;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
281
1 by brian
clean slate
282
  while (length && pos < end)
283
  {
482 by Brian Aker
Remove uint.
284
    uint32_t mb_len;
1 by brian
clean slate
285
    pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1;
286
    length--;
287
  }
288
  return (size_t) (length ? end+2-start : pos-start);
289
}
290
291
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
292
size_t my_well_formed_len_mb(const CHARSET_INFO * const cs, const char *b, const char *e,
1 by brian
clean slate
293
                             size_t pos, int *error)
294
{
295
  const char *b_start= b;
296
  *error= 0;
297
  while (pos)
298
  {
299
    my_wc_t wc;
300
    int mb_len;
301
481 by Brian Aker
Remove all of uchar.
302
    if ((mb_len= cs->cset->mb_wc(cs, &wc, (const unsigned char*) b, (const unsigned char*) e)) <= 0)
1 by brian
clean slate
303
    {
304
      *error= b < e ? 1 : 0;
305
      break;
306
    }
307
    b+= mb_len;
308
    pos--;
309
  }
310
  return (size_t) (b - b_start);
311
}
312
313
482 by Brian Aker
Remove uint.
314
uint32_t my_instr_mb(const CHARSET_INFO * const cs,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
315
                 const char *b, size_t b_length,
1 by brian
clean slate
316
                 const char *s, size_t s_length,
482 by Brian Aker
Remove uint.
317
                 my_match_t *match, uint32_t nmatch)
1 by brian
clean slate
318
{
319
  register const char *end, *b0;
320
  int res= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
321
1 by brian
clean slate
322
  if (s_length <= b_length)
323
  {
324
    if (!s_length)
325
    {
326
      if (nmatch)
327
      {
328
        match->beg= 0;
329
        match->end= 0;
330
        match->mb_len= 0;
331
      }
332
      return 1;		/* Empty string is always found */
333
    }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
334
1 by brian
clean slate
335
    b0= b;
336
    end= b+b_length-s_length+1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
337
1 by brian
clean slate
338
    while (b < end)
339
    {
340
      int mb_len;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
341
342
      if (!cs->coll->strnncoll(cs, (const unsigned char*) b, s_length,
481 by Brian Aker
Remove all of uchar.
343
                                   (const unsigned char*) s, s_length, 0))
1 by brian
clean slate
344
      {
345
        if (nmatch)
346
        {
347
          match[0].beg= 0;
348
          match[0].end= (size_t) (b-b0);
349
          match[0].mb_len= res;
350
          if (nmatch > 1)
351
          {
352
            match[1].beg= match[0].end;
353
            match[1].end= match[0].end+s_length;
354
            match[1].mb_len= 0;	/* Not computed */
355
          }
356
        }
357
        return 2;
358
      }
359
      mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1;
360
      b+= mb_len;
361
      b_length-= mb_len;
362
      res++;
363
    }
364
  }
365
  return 0;
366
}
367
368
369
/* BINARY collations handlers for MB charsets */
370
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
371
int my_strnncoll_mb_bin(const CHARSET_INFO * const,
481 by Brian Aker
Remove all of uchar.
372
                        const unsigned char *s, size_t slen,
373
                        const unsigned char *t, size_t tlen,
276 by Brian Aker
Cleaned out my_bool from strings.
374
                        bool t_is_prefix)
1 by brian
clean slate
375
{
398.1.4 by Monty Taylor
Renamed max/min.
376
  size_t len=cmin(slen,tlen);
1 by brian
clean slate
377
  int cmp= memcmp(s,t,len);
378
  return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
379
}
380
381
382
/*
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
383
  Compare two strings.
384
1 by brian
clean slate
385
  SYNOPSIS
386
    my_strnncollsp_mb_bin()
387
    cs			Chararacter set
388
    s			String to compare
389
    slen		Length of 's'
390
    t			String to compare
391
    tlen		Length of 't'
392
    diff_if_only_endspace_difference
393
		        Set to 1 if the strings should be regarded as different
394
                        if they only difference in end space
395
396
  NOTE
397
   This function is used for character strings with binary collations.
398
   The shorter string is extended with end space to be as long as the longer
399
   one.
400
401
  RETURN
402
    A negative number if s < t
403
    A positive number if s > t
404
    0 if strings are equal
405
*/
406
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
407
int my_strnncollsp_mb_bin(const CHARSET_INFO * const,
481 by Brian Aker
Remove all of uchar.
408
                          const unsigned char *a, size_t a_length,
409
                          const unsigned char *b, size_t b_length,
276 by Brian Aker
Cleaned out my_bool from strings.
410
                          bool diff_if_only_endspace_difference)
1 by brian
clean slate
411
{
481 by Brian Aker
Remove all of uchar.
412
  const unsigned char *end;
1 by brian
clean slate
413
  size_t length;
414
  int res;
415
416
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
417
  diff_if_only_endspace_difference= 0;
418
#endif
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
419
398.1.4 by Monty Taylor
Renamed max/min.
420
  end= a + (length= cmin(a_length, b_length));
1 by brian
clean slate
421
  while (a < end)
422
  {
423
    if (*a++ != *b++)
424
      return ((int) a[-1] - (int) b[-1]);
425
  }
426
  res= 0;
427
  if (a_length != b_length)
428
  {
429
    int swap= 1;
430
    if (diff_if_only_endspace_difference)
431
      res= 1;                                   /* Assume 'a' is bigger */
432
    /*
433
      Check the next not space character of the longer key. If it's < ' ',
434
      then it's smaller than the other key.
435
    */
436
    if (a_length < b_length)
437
    {
438
      /* put shorter key in s */
439
      a_length= b_length;
440
      a= b;
441
      swap= -1;					/* swap sign of result */
442
      res= -res;
443
    }
444
    for (end= a + a_length-length; a < end ; a++)
445
    {
446
      if (*a != ' ')
447
	return (*a < ' ') ? -swap : swap;
448
    }
449
  }
450
  return res;
451
}
452
453
454
/*
455
  Copy one non-ascii character.
456
  "dst" must have enough room for the character.
457
  Note, we don't use sort_order[] in this macros.
458
  This is correct even for case insensitive collations:
459
  - basic Latin letters are processed outside this macros;
460
  - for other characters sort_order[x] is equal to x.
461
*/
462
#define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se)                  \
463
{                                                                        \
464
  switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \
465
  case 4:                                                                \
466
    *dst++= *src++;                                                      \
467
    /* fall through */                                                   \
468
  case 3:                                                                \
469
    *dst++= *src++;                                                      \
470
    /* fall through */                                                   \
471
  case 2:                                                                \
472
    *dst++= *src++;                                                      \
473
    /* fall through */                                                   \
474
  case 0:                                                                \
475
    *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */  \
476
  }                                                                      \
477
}
478
479
480
/*
481
  For character sets with two or three byte multi-byte
482
  characters having multibyte weights *equal* to their codes:
483
  cp932, euckr, gb2312, sjis, eucjpms, ujis.
484
*/
485
size_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
486
my_strnxfrm_mb(const CHARSET_INFO * const cs,
482 by Brian Aker
Remove uint.
487
               unsigned char *dst, size_t dstlen, uint32_t nweights,
488
               const unsigned char *src, size_t srclen, uint32_t flags)
1 by brian
clean slate
489
{
481 by Brian Aker
Remove all of uchar.
490
  unsigned char *d0= dst;
491
  unsigned char *de= dst + dstlen;
492
  const unsigned char *se= src + srclen;
493
  const unsigned char *sort_order= cs->sort_order;
1 by brian
clean slate
494
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
495
  assert(cs->mbmaxlen <= 4);
1 by brian
clean slate
496
497
  /*
498
    If "srclen" is smaller than both "dstlen" and "nweights"
499
    then we can run a simplified loop -
500
    without checking "nweights" and "de".
501
  */
502
  if (dstlen >= srclen && nweights >= srclen)
503
  {
504
    if (sort_order)
505
    {
506
      /* Optimized version for a case insensitive collation */
507
      for (; src < se; nweights--)
508
      {
509
        if (*src < 128) /* quickly catch ASCII characters */
510
          *dst++= sort_order[*src++];
511
        else
512
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
513
      }
514
    }
515
    else
516
    {
517
      /* Optimized version for a case sensitive collation (no sort_order) */
518
      for (; src < se; nweights--)
519
      {
520
        if (*src < 128) /* quickly catch ASCII characters */
521
          *dst++= *src++;
522
        else
523
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
524
      }
525
    }
526
    goto pad;
527
  }
528
529
  /*
530
    A thourough loop, checking all possible limits:
531
    "se", "nweights" and "de".
532
  */
533
  for (; src < se && nweights; nweights--)
534
  {
535
    int chlen;
536
    if (*src < 128 ||
537
        !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se)))
538
    {
539
      /* Single byte character */
540
      if (dst >= de)
541
        break;
542
      *dst++= sort_order ? sort_order[*src++] : *src++;
543
    }
544
    else
545
    {
546
      /* Multi-byte character */
547
      if (dst + chlen > de)
548
        break;
549
      *dst++= *src++;
550
      *dst++= *src++;
551
      if (chlen == 3)
552
        *dst++= *src++;
553
    }
554
  }
555
556
pad:
557
  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
558
}
559
560
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
561
int my_strcasecmp_mb_bin(const CHARSET_INFO * const,
1 by brian
clean slate
562
                         const char *s, const char *t)
563
{
564
  return strcmp(s,t);
565
}
566
567
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
568
void my_hash_sort_mb_bin(const CHARSET_INFO * const,
569
                         const unsigned char *key, size_t len,
570
                         uint32_t *nr1, uint32_t *nr2)
1 by brian
clean slate
571
{
481 by Brian Aker
Remove all of uchar.
572
  const unsigned char *pos = key;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
573
1 by brian
clean slate
574
  /*
575
     Remove trailing spaces. We have to do this to be able to compare
576
    'A ' and 'A' as identical
577
  */
578
  key= skip_trailing_space(key, len);
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
579
481 by Brian Aker
Remove all of uchar.
580
  for (; pos < (const unsigned char*) key ; pos++)
1 by brian
clean slate
581
  {
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
582
    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) *
1 by brian
clean slate
583
	     ((uint)*pos)) + (nr1[0] << 8);
584
    nr2[0]+=3;
585
  }
586
}
587
588
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
589
/*
1 by brian
clean slate
590
  Fill the given buffer with 'maximum character' for given charset
591
  SYNOPSIS
592
      pad_max_char()
593
      cs   Character set
594
      str  Start of buffer to fill
595
      end  End of buffer to fill
596
597
  DESCRIPTION
598
      Write max key:
599
      - for non-Unicode character sets:
600
        just set to 255.
601
      - for Unicode character set (utf-8):
602
        create a buffer with multibyte representation of the max_sort_char
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
603
        character, and copy it into max_str in a loop.
1 by brian
clean slate
604
*/
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
605
static void pad_max_char(const CHARSET_INFO * const cs, char *str, char *end)
1 by brian
clean slate
606
{
607
  char buf[10];
608
  char buflen;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
609
1 by brian
clean slate
610
  if (!(cs->state & MY_CS_UNICODE))
611
  {
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
612
    memset(str, 255, end - str);
1 by brian
clean slate
613
    return;
614
  }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
615
481 by Brian Aker
Remove all of uchar.
616
  buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (unsigned char*) buf,
617
                          (unsigned char*) buf + sizeof(buf));
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
618
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
619
  assert(buflen > 0);
1 by brian
clean slate
620
  do
621
  {
622
    if ((str + buflen) < end)
623
    {
624
      /* Enough space for the characer */
625
      memcpy(str, buf, buflen);
626
      str+= buflen;
627
    }
628
    else
629
    {
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
630
      /*
1 by brian
clean slate
631
        There is no space for whole multibyte
632
        character, then add trailing spaces.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
633
      */
1 by brian
clean slate
634
      *str++= ' ';
635
    }
636
  } while (str < end);
637
}
638
639
/*
640
** Calculate min_str and max_str that ranges a LIKE string.
641
** Arguments:
642
** ptr		Pointer to LIKE string.
643
** ptr_length	Length of LIKE string.
644
** escape	Escape character in LIKE.  (Normally '\').
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
645
**		All escape characters should be removed from
646
**              min_str and max_str
647
** w_one        Single char matching char in LIKE (Normally '_')
648
** w_many       Multiple char matching char in LIKE (Normally '%')
1 by brian
clean slate
649
** res_length	Length of min_str and max_str.
650
** min_str	Smallest case sensitive string that ranges LIKE.
651
**		Should be space padded to res_length.
652
** max_str	Largest case sensitive string that ranges LIKE.
653
**		Normally padded with the biggest character sort value.
654
**
655
** The function should return 0 if ok and 1 if the LIKE string can't be
656
** optimized !
657
*/
658
276 by Brian Aker
Cleaned out my_bool from strings.
659
bool my_like_range_mb(const CHARSET_INFO * const cs,
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
660
                         const char *ptr,size_t ptr_length,
661
                         char escape, char w_one, char w_many,
662
                         size_t res_length,
663
                         char *min_str,char *max_str,
664
                         size_t *min_length,size_t *max_length)
1 by brian
clean slate
665
{
482 by Brian Aker
Remove uint.
666
  uint32_t mb_len;
1 by brian
clean slate
667
  const char *end= ptr + ptr_length;
668
  char *min_org= min_str;
669
  char *min_end= min_str + res_length;
670
  char *max_end= max_str + res_length;
671
  size_t maxcharlen= res_length / cs->mbmaxlen;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
672
  const char *contraction_flags= cs->contractions ?
1 by brian
clean slate
673
              ((const char*) cs->contractions) + 0x40*0x40 : NULL;
674
675
  for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
676
  {
677
    /* We assume here that escape, w_any, w_namy are one-byte characters */
678
    if (*ptr == escape && ptr+1 != end)
679
      ptr++;                                    /* Skip escape */
680
    else if (*ptr == w_one || *ptr == w_many)   /* '_' and '%' in SQL */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
681
    {
1 by brian
clean slate
682
fill_max_and_min:
683
      /*
684
        Calculate length of keys:
685
        'a\0\0... is the smallest possible string when we have space expand
686
        a\ff\ff... is the biggest possible string
687
      */
688
      *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
689
                    res_length);
690
      *max_length= res_length;
691
      /* Create min key  */
692
      do
693
      {
694
	*min_str++= (char) cs->min_sort_char;
695
      } while (min_str != min_end);
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
696
697
      /*
1 by brian
clean slate
698
        Write max key: create a buffer with multibyte
699
        representation of the max_sort_char character,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
700
        and copy it into max_str in a loop.
1 by brian
clean slate
701
      */
702
      *max_length= res_length;
703
      pad_max_char(cs, max_str, max_end);
704
      return 0;
705
    }
706
    if ((mb_len= my_ismbchar(cs, ptr, end)) > 1)
707
    {
708
      if (ptr+mb_len > end || min_str+mb_len > min_end)
709
        break;
710
      while (mb_len--)
711
       *min_str++= *max_str++= *ptr++;
712
    }
713
    else
714
    {
715
      /*
716
        Special case for collations with contractions.
717
        For example, in Chezh, 'ch' is a separate letter
718
        which is sorted between 'h' and 'i'.
719
        If the pattern 'abc%', 'c' at the end can mean:
720
        - letter 'c' itself,
721
        - beginning of the contraction 'ch'.
722
723
        If we simply return this LIKE range:
724
725
         'abc\min\min\min' and 'abc\max\max\max'
726
727
        then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
728
        will only find values starting from 'abc[^h]',
729
        but won't find values starting from 'abch'.
730
731
        We must ignore contraction heads followed by w_one or w_many.
732
        ('Contraction head' means any letter which can be the first
733
        letter in a contraction)
734
735
        For example, for Czech 'abc%', we will return LIKE range,
736
        which is equal to LIKE range for 'ab%':
737
738
        'ab\min\min\min\min' and 'ab\max\max\max\max'.
739
740
      */
741
      if (contraction_flags && ptr + 1 < end &&
481 by Brian Aker
Remove all of uchar.
742
          contraction_flags[(unsigned char) *ptr])
1 by brian
clean slate
743
      {
744
        /* Ptr[0] is a contraction head. */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
745
1 by brian
clean slate
746
        if (ptr[1] == w_one || ptr[1] == w_many)
747
        {
748
          /* Contraction head followed by a wildcard, quit. */
749
          goto fill_max_and_min;
750
        }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
751
1 by brian
clean slate
752
        /*
753
          Some letters can be both contraction heads and contraction tails.
754
          For example, in Danish 'aa' is a separate single letter which
755
          is sorted after 'z'. So 'a' can be both head and tail.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
756
1 by brian
clean slate
757
          If ptr[0]+ptr[1] is a contraction,
758
          then put both letters together.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
759
1 by brian
clean slate
760
          If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
761
          is not a contraction, then we put only ptr[0],
762
          and continue with ptr[1] on the next loop.
763
        */
481 by Brian Aker
Remove all of uchar.
764
        if (contraction_flags[(unsigned char) ptr[1]] &&
1 by brian
clean slate
765
            cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
766
        {
767
          /* Contraction found */
768
          if (maxcharlen == 1 || min_str + 1 >= min_end)
769
          {
770
            /* Both contraction parts don't fit, quit */
771
            goto fill_max_and_min;
772
          }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
773
1 by brian
clean slate
774
          /* Put contraction head */
775
          *min_str++= *max_str++= *ptr++;
776
          maxcharlen--;
777
        }
778
      }
779
      /* Put contraction tail, or a single character */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
780
      *min_str++= *max_str++= *ptr++;
1 by brian
clean slate
781
    }
782
  }
783
784
  *min_length= *max_length = (size_t) (min_str - min_org);
785
  while (min_str != min_end)
786
    *min_str++= *max_str++= ' ';           /* Because if key compression */
787
  return 0;
788
}
789
790
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
791
int my_wildcmp_mb_bin(const CHARSET_INFO * const cs,
1 by brian
clean slate
792
                      const char *str,const char *str_end,
793
                      const char *wildstr,const char *wildend,
794
                      int escape, int w_one, int w_many)
795
{
796
  int result= -1;				/* Not found, using wildcards */
797
798
  while (wildstr != wildend)
799
  {
800
    while (*wildstr != w_many && *wildstr != w_one)
801
    {
802
      int l;
803
      if (*wildstr == escape && wildstr+1 != wildend)
804
	wildstr++;
805
      if ((l = my_ismbchar(cs, wildstr, wildend)))
806
      {
807
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
808
	      return 1;
809
	  str += l;
810
	  wildstr += l;
811
      }
812
      else
813
      if (str == str_end || *wildstr++ != *str++)
814
	return(1);				/* No match */
815
      if (wildstr == wildend)
816
	return (str != str_end);		/* Match if both are at end */
817
      result=1;					/* Found an anchor char */
818
    }
819
    if (*wildstr == w_one)
820
    {
821
      do
822
      {
823
	if (str == str_end)			/* Skip one char if possible */
824
	  return (result);
825
	INC_PTR(cs,str,str_end);
826
      } while (++wildstr < wildend && *wildstr == w_one);
827
      if (wildstr == wildend)
828
	break;
829
    }
830
    if (*wildstr == w_many)
831
    {						/* Found w_many */
481 by Brian Aker
Remove all of uchar.
832
      unsigned char cmp;
1 by brian
clean slate
833
      const char* mb = wildstr;
834
      int mb_len=0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
835
1 by brian
clean slate
836
      wildstr++;
837
      /* Remove any '%' and '_' from the wild search string */
838
      for (; wildstr != wildend ; wildstr++)
839
      {
840
	if (*wildstr == w_many)
841
	  continue;
842
	if (*wildstr == w_one)
843
	{
844
	  if (str == str_end)
845
	    return (-1);
846
	  INC_PTR(cs,str,str_end);
847
	  continue;
848
	}
849
	break;					/* Not a wild character */
850
      }
851
      if (wildstr == wildend)
852
	return(0);				/* Ok if w_many is last */
853
      if (str == str_end)
854
	return -1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
855
1 by brian
clean slate
856
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
857
	cmp= *++wildstr;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
858
1 by brian
clean slate
859
      mb=wildstr;
860
      mb_len= my_ismbchar(cs, wildstr, wildend);
861
      INC_PTR(cs,wildstr,wildend);		/* This is compared trough cmp */
862
      do
863
      {
864
        for (;;)
865
        {
866
          if (str >= str_end)
867
            return -1;
868
          if (mb_len)
869
          {
870
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
871
            {
872
              str += mb_len;
873
              break;
874
            }
875
          }
876
          else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
877
          {
878
            str++;
879
            break;
880
          }
881
          INC_PTR(cs,str, str_end);
882
        }
883
	{
884
	  int tmp=my_wildcmp_mb_bin(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
885
	  if (tmp <= 0)
886
	    return (tmp);
887
	}
888
      } while (str != str_end && wildstr[0] != w_many);
889
      return(-1);
890
    }
891
  }
892
  return (str != str_end ? 1 : 0);
893
}
894
895
896
/*
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
897
  Data was produced from EastAsianWidth.txt
1 by brian
clean slate
898
  using utt11-dump utility.
899
*/
900
static char pg11[256]=
901
{
902
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
903
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
904
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,
905
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
906
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
907
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
908
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
909
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
910
};
911
912
static char pg23[256]=
913
{
914
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
915
0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
916
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
917
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
918
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
919
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
920
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
921
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
922
};
923
924
static char pg2E[256]=
925
{
926
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
927
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
928
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
929
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
930
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
931
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
932
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
933
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
934
};
935
936
static char pg2F[256]=
937
{
938
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
939
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
940
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
941
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
942
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
943
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
944
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
945
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0
946
};
947
948
static char pg30[256]=
949
{
950
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
951
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
952
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
953
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
954
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,
955
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
956
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
957
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
958
};
959
960
static char pg31[256]=
961
{
962
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
963
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
964
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
965
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
966
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
967
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
968
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
969
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
970
};
971
972
static char pg32[256]=
973
{
974
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
975
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
976
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
977
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,
978
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
979
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
980
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
981
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
982
};
983
984
static char pg4D[256]=
985
{
986
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
987
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
988
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
989
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
990
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
991
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
992
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
993
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
994
};
995
996
static char pg9F[256]=
997
{
998
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
999
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1000
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1001
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1002
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1003
1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1004
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1005
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1006
};
1007
1008
static char pgA4[256]=
1009
{
1010
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1011
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1012
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1013
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1014
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1015
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1016
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1017
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1018
};
1019
1020
static char pgD7[256]=
1021
{
1022
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1023
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1024
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1025
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1026
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1027
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1028
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1029
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1030
};
1031
1032
static char pgFA[256]=
1033
{
1034
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1035
1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1036
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1037
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1038
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1039
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1040
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1041
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1042
};
1043
1044
static char pgFE[256]=
1045
{
1046
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1047
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1048
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,
1049
1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1050
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1051
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1052
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1053
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1054
};
1055
1056
static char pgFF[256]=
1057
{
1058
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1059
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1060
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1061
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1062
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1063
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1064
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1065
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1066
};
1067
1068
static struct {int page; char *p;} utr11_data[256]=
1069
{
1070
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1071
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1072
{0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1073
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1074
{0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1075
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F},
1076
{0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1077
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1078
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1079
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL},
1080
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1081
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1082
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1083
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1084
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1085
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1086
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1087
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1088
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1089
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F},
1090
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL},
1091
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1092
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1093
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1094
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1095
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1096
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7},
1097
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1098
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1099
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1100
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1101
{0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF}
1102
};
1103
1104
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1105
size_t my_numcells_mb(const CHARSET_INFO * const cs, const char *b, const char *e)
1 by brian
clean slate
1106
{
1107
  my_wc_t wc;
1108
  size_t clen= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
1109
1 by brian
clean slate
1110
  while (b < e)
1111
  {
1112
    int mb_len;
482 by Brian Aker
Remove uint.
1113
    uint32_t pg;
481 by Brian Aker
Remove all of uchar.
1114
    if ((mb_len= cs->cset->mb_wc(cs, &wc, (unsigned char*) b, (unsigned char*) e)) <= 0 ||
1 by brian
clean slate
1115
        wc > 0xFFFF)
1116
    {
1117
      /*
1118
        Let's think a wrong sequence takes 1 dysplay cell.
1119
        Also, consider supplementary characters as taking one cell.
1120
      */
1121
      mb_len= 1;
1122
      b++;
1123
      continue;
1124
    }
1125
    b+= mb_len;
1126
    pg= (wc >> 8) & 0xFF;
1127
    clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page;
1128
    clen++;
1129
  }
1130
  return clen;
1131
}
1132
1133
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1134
int my_mb_ctype_mb(const CHARSET_INFO * const cs, int *ctype,
481 by Brian Aker
Remove all of uchar.
1135
                   const unsigned char *s, const unsigned char *e)
1 by brian
clean slate
1136
{
1137
  my_wc_t wc;
1138
  int res= cs->cset->mb_wc(cs, &wc, s, e);
1139
  if (res <= 0 || wc > 0xFFFF)
1140
    *ctype= 0;
1141
  else
1142
    *ctype= my_uni_ctype[wc>>8].ctype ?
1143
            my_uni_ctype[wc>>8].ctype[wc&0xFF] :
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
1144
            my_uni_ctype[wc>>8].pctype;
1 by brian
clean slate
1145
  return res;
1146
}
1147
1148
1149
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
1150
{
1151
    NULL,              /* init */
1152
    my_strnncoll_mb_bin,
1153
    my_strnncollsp_mb_bin,
1154
    my_strnxfrm_mb,
1155
    my_strnxfrmlen_simple,
1156
    my_like_range_mb,
1157
    my_wildcmp_mb_bin,
1158
    my_strcasecmp_mb_bin,
1159
    my_instr_mb,
1160
    my_hash_sort_mb_bin,
1161
    my_propagate_simple
1162
};
1163
1164
#endif