~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
1130.3.26 by Monty Taylor
Removed global.h from headers.
16
#include "drizzled/global.h"
17
212.5.39 by Monty Taylor
Phew. Moved my_base and my_global.
18
#include "m_string.h"
1 by brian
clean slate
19
#include "m_ctype.h"
20
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
21
#include <algorithm>
22
23
using namespace std;
24
1 by brian
clean slate
25
26
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
27
size_t my_caseup_str_mb(const CHARSET_INFO * const  cs, char *str)
1 by brian
clean slate
28
{
205 by Brian Aker
uint32 -> uin32_t
29
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
30
  register unsigned char *map= cs->to_upper;
1 by brian
clean slate
31
  char *str_orig= str;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
32
1 by brian
clean slate
33
  while (*str)
34
  {
35
    /* Pointing after the '\0' is safe here. */
36
    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
37
      str+= l;
38
    else
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
39
    {
481 by Brian Aker
Remove all of uchar.
40
      *str= (char) map[(unsigned char)*str];
1 by brian
clean slate
41
      str++;
42
    }
43
  }
44
  return (size_t) (str - str_orig);
45
}
46
47
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
48
size_t my_casedn_str_mb(const CHARSET_INFO * const  cs, char *str)
1 by brian
clean slate
49
{
205 by Brian Aker
uint32 -> uin32_t
50
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
51
  register unsigned char *map= cs->to_lower;
1 by brian
clean slate
52
  char *str_orig= str;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
53
1 by brian
clean slate
54
  while (*str)
55
  {
56
    /* Pointing after the '\0' is safe here. */
57
    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
58
      str+= l;
59
    else
60
    {
481 by Brian Aker
Remove all of uchar.
61
      *str= (char) map[(unsigned char)*str];
1 by brian
clean slate
62
      str++;
63
    }
64
  }
65
  return (size_t) (str - str_orig);
66
}
67
68
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
69
size_t my_caseup_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
70
                    char *dst, size_t dstlen)
1 by brian
clean slate
71
{
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
72
#ifdef NDEBUG
73
  (void)dst;
74
  (void)dstlen;
75
#endif
205 by Brian Aker
uint32 -> uin32_t
76
  register uint32_t l;
1 by brian
clean slate
77
  register char *srcend= src + srclen;
481 by Brian Aker
Remove all of uchar.
78
  register unsigned char *map= cs->to_upper;
1 by brian
clean slate
79
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
80
  assert(src == dst && srclen == dstlen);
1 by brian
clean slate
81
  while (src < srcend)
82
  {
83
    if ((l=my_ismbchar(cs, src, srcend)))
84
      src+= l;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
85
    else
1 by brian
clean slate
86
    {
481 by Brian Aker
Remove all of uchar.
87
      *src=(char) map[(unsigned char) *src];
1 by brian
clean slate
88
      src++;
89
    }
90
  }
91
  return srclen;
92
}
93
94
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
95
size_t my_casedn_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
96
                    char *dst, size_t dstlen)
1 by brian
clean slate
97
{
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
98
#ifdef NDEBUG
99
  (void)dst;
100
  (void)dstlen;
101
#endif
205 by Brian Aker
uint32 -> uin32_t
102
  register uint32_t l;
1 by brian
clean slate
103
  register char *srcend= src + srclen;
481 by Brian Aker
Remove all of uchar.
104
  register unsigned char *map=cs->to_lower;
1 by brian
clean slate
105
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
106
  assert(src == dst && srclen == dstlen);
1 by brian
clean slate
107
  while (src < srcend)
108
  {
109
    if ((l= my_ismbchar(cs, src, srcend)))
110
      src+= l;
111
    else
112
    {
481 by Brian Aker
Remove all of uchar.
113
      *src= (char) map[(unsigned char)*src];
1 by brian
clean slate
114
      src++;
115
    }
116
  }
117
  return srclen;
118
}
119
120
121
/*
122
  my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
123
 */
124
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
125
int my_strcasecmp_mb(const CHARSET_INFO * const  cs,const char *s, const char *t)
1 by brian
clean slate
126
{
205 by Brian Aker
uint32 -> uin32_t
127
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
128
  register unsigned char *map=cs->to_upper;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
129
1 by brian
clean slate
130
  while (*s && *t)
131
  {
132
    /* Pointing after the '\0' is safe here. */
133
    if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen)))
134
    {
135
      while (l--)
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
136
        if (*s++ != *t++)
1 by brian
clean slate
137
          return 1;
138
    }
139
    else if (my_mbcharlen(cs, *t) > 1)
140
      return 1;
481 by Brian Aker
Remove all of uchar.
141
    else if (map[(unsigned char) *s++] != map[(unsigned char) *t++])
1 by brian
clean slate
142
      return 1;
143
  }
144
  /* At least one of '*s' and '*t' is zero here. */
145
  return (*t != *s);
146
}
147
148
149
/*
150
** Compare string against string with wildcard
151
**	0 if matched
152
**	-1 if not matched with wildcard
153
**	 1 if matched with wildcard
154
*/
155
156
#define INC_PTR(cs,A,B) A+=(my_ismbchar(cs,A,B) ? my_ismbchar(cs,A,B) : 1)
157
481 by Brian Aker
Remove all of uchar.
158
#define likeconv(s,A) (unsigned char) (s)->sort_order[(unsigned char) (A)]
1 by brian
clean slate
159
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
160
int my_wildcmp_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
161
		  const char *str,const char *str_end,
162
		  const char *wildstr,const char *wildend,
163
		  int escape, int w_one, int w_many)
164
{
165
  int result= -1;				/* Not found, using wildcards */
166
167
  while (wildstr != wildend)
168
  {
169
    while (*wildstr != w_many && *wildstr != w_one)
170
    {
171
      int l;
172
      if (*wildstr == escape && wildstr+1 != wildend)
173
	wildstr++;
174
      if ((l = my_ismbchar(cs, wildstr, wildend)))
175
      {
176
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
177
	      return 1;
178
	  str += l;
179
	  wildstr += l;
180
      }
181
      else
182
      if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
183
	return(1);				/* No match */
184
      if (wildstr == wildend)
185
	return (str != str_end);		/* Match if both are at end */
186
      result=1;					/* Found an anchor char */
187
    }
188
    if (*wildstr == w_one)
189
    {
190
      do
191
      {
192
	if (str == str_end)			/* Skip one char if possible */
193
	  return (result);
194
	INC_PTR(cs,str,str_end);
195
      } while (++wildstr < wildend && *wildstr == w_one);
196
      if (wildstr == wildend)
197
	break;
198
    }
199
    if (*wildstr == w_many)
200
    {						/* Found w_many */
481 by Brian Aker
Remove all of uchar.
201
      unsigned char cmp;
1 by brian
clean slate
202
      const char* mb = wildstr;
203
      int mb_len=0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
204
1 by brian
clean slate
205
      wildstr++;
206
      /* Remove any '%' and '_' from the wild search string */
207
      for (; wildstr != wildend ; wildstr++)
208
      {
209
	if (*wildstr == w_many)
210
	  continue;
211
	if (*wildstr == w_one)
212
	{
213
	  if (str == str_end)
214
	    return (-1);
215
	  INC_PTR(cs,str,str_end);
216
	  continue;
217
	}
218
	break;					/* Not a wild character */
219
      }
220
      if (wildstr == wildend)
221
	return(0);				/* Ok if w_many is last */
222
      if (str == str_end)
223
	return -1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
224
1 by brian
clean slate
225
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
226
	cmp= *++wildstr;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
227
1 by brian
clean slate
228
      mb=wildstr;
229
      mb_len= my_ismbchar(cs, wildstr, wildend);
230
      INC_PTR(cs,wildstr,wildend);		/* This is compared trough cmp */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
231
      cmp=likeconv(cs,cmp);
1 by brian
clean slate
232
      do
233
      {
234
        for (;;)
235
        {
236
          if (str >= str_end)
237
            return -1;
238
          if (mb_len)
239
          {
240
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
241
            {
242
              str += mb_len;
243
              break;
244
            }
245
          }
246
          else if (!my_ismbchar(cs, str, str_end) &&
247
                   likeconv(cs,*str) == cmp)
248
          {
249
            str++;
250
            break;
251
          }
252
          INC_PTR(cs,str, str_end);
253
        }
254
	{
255
	  int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,
256
                                w_many);
257
	  if (tmp <= 0)
258
	    return (tmp);
259
	}
260
      } while (str != str_end && wildstr[0] != w_many);
261
      return(-1);
262
    }
263
  }
264
  return (str != str_end ? 1 : 0);
265
}
266
267
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
268
size_t my_numchars_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
269
		      const char *pos, const char *end)
270
{
271
  register size_t count= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
272
  while (pos < end)
1 by brian
clean slate
273
  {
482 by Brian Aker
Remove uint.
274
    uint32_t mb_len;
1 by brian
clean slate
275
    pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1;
276
    count++;
277
  }
278
  return count;
279
}
280
281
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
282
size_t my_charpos_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
283
		     const char *pos, const char *end, size_t length)
284
{
285
  const char *start= pos;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
286
1 by brian
clean slate
287
  while (length && pos < end)
288
  {
482 by Brian Aker
Remove uint.
289
    uint32_t mb_len;
1 by brian
clean slate
290
    pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1;
291
    length--;
292
  }
293
  return (size_t) (length ? end+2-start : pos-start);
294
}
295
296
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
297
size_t my_well_formed_len_mb(const CHARSET_INFO * const cs, const char *b, const char *e,
1 by brian
clean slate
298
                             size_t pos, int *error)
299
{
300
  const char *b_start= b;
301
  *error= 0;
302
  while (pos)
303
  {
304
    my_wc_t wc;
305
    int mb_len;
306
481 by Brian Aker
Remove all of uchar.
307
    if ((mb_len= cs->cset->mb_wc(cs, &wc, (const unsigned char*) b, (const unsigned char*) e)) <= 0)
1 by brian
clean slate
308
    {
309
      *error= b < e ? 1 : 0;
310
      break;
311
    }
312
    b+= mb_len;
313
    pos--;
314
  }
315
  return (size_t) (b - b_start);
316
}
317
318
482 by Brian Aker
Remove uint.
319
uint32_t my_instr_mb(const CHARSET_INFO * const cs,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
320
                 const char *b, size_t b_length,
1 by brian
clean slate
321
                 const char *s, size_t s_length,
482 by Brian Aker
Remove uint.
322
                 my_match_t *match, uint32_t nmatch)
1 by brian
clean slate
323
{
324
  register const char *end, *b0;
325
  int res= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
326
1 by brian
clean slate
327
  if (s_length <= b_length)
328
  {
329
    if (!s_length)
330
    {
331
      if (nmatch)
332
      {
333
        match->beg= 0;
334
        match->end= 0;
335
        match->mb_len= 0;
336
      }
337
      return 1;		/* Empty string is always found */
338
    }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
339
1 by brian
clean slate
340
    b0= b;
341
    end= b+b_length-s_length+1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
342
1 by brian
clean slate
343
    while (b < end)
344
    {
345
      int mb_len;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
346
347
      if (!cs->coll->strnncoll(cs, (const unsigned char*) b, s_length,
481 by Brian Aker
Remove all of uchar.
348
                                   (const unsigned char*) s, s_length, 0))
1 by brian
clean slate
349
      {
350
        if (nmatch)
351
        {
352
          match[0].beg= 0;
353
          match[0].end= (size_t) (b-b0);
354
          match[0].mb_len= res;
355
          if (nmatch > 1)
356
          {
357
            match[1].beg= match[0].end;
358
            match[1].end= match[0].end+s_length;
359
            match[1].mb_len= 0;	/* Not computed */
360
          }
361
        }
362
        return 2;
363
      }
364
      mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1;
365
      b+= mb_len;
366
      b_length-= mb_len;
367
      res++;
368
    }
369
  }
370
  return 0;
371
}
372
373
374
/* BINARY collations handlers for MB charsets */
375
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
376
int my_strnncoll_mb_bin(const CHARSET_INFO * const,
481 by Brian Aker
Remove all of uchar.
377
                        const unsigned char *s, size_t slen,
378
                        const unsigned char *t, size_t tlen,
276 by Brian Aker
Cleaned out my_bool from strings.
379
                        bool t_is_prefix)
1 by brian
clean slate
380
{
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
381
  size_t len= min(slen,tlen);
1 by brian
clean slate
382
  int cmp= memcmp(s,t,len);
383
  return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
384
}
385
386
387
/*
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
388
  Compare two strings.
389
1 by brian
clean slate
390
  SYNOPSIS
391
    my_strnncollsp_mb_bin()
392
    cs			Chararacter set
393
    s			String to compare
394
    slen		Length of 's'
395
    t			String to compare
396
    tlen		Length of 't'
397
    diff_if_only_endspace_difference
398
		        Set to 1 if the strings should be regarded as different
399
                        if they only difference in end space
400
401
  NOTE
402
   This function is used for character strings with binary collations.
403
   The shorter string is extended with end space to be as long as the longer
404
   one.
405
406
  RETURN
407
    A negative number if s < t
408
    A positive number if s > t
409
    0 if strings are equal
410
*/
411
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
412
int my_strnncollsp_mb_bin(const CHARSET_INFO * const,
481 by Brian Aker
Remove all of uchar.
413
                          const unsigned char *a, size_t a_length,
414
                          const unsigned char *b, size_t b_length,
276 by Brian Aker
Cleaned out my_bool from strings.
415
                          bool diff_if_only_endspace_difference)
1 by brian
clean slate
416
{
481 by Brian Aker
Remove all of uchar.
417
  const unsigned char *end;
1 by brian
clean slate
418
  size_t length;
419
  int res;
420
421
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
422
  diff_if_only_endspace_difference= 0;
423
#endif
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
424
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
425
  end= a + (length= min(a_length, b_length));
1 by brian
clean slate
426
  while (a < end)
427
  {
428
    if (*a++ != *b++)
429
      return ((int) a[-1] - (int) b[-1]);
430
  }
431
  res= 0;
432
  if (a_length != b_length)
433
  {
434
    int swap= 1;
435
    if (diff_if_only_endspace_difference)
436
      res= 1;                                   /* Assume 'a' is bigger */
437
    /*
438
      Check the next not space character of the longer key. If it's < ' ',
439
      then it's smaller than the other key.
440
    */
441
    if (a_length < b_length)
442
    {
443
      /* put shorter key in s */
444
      a_length= b_length;
445
      a= b;
446
      swap= -1;					/* swap sign of result */
447
      res= -res;
448
    }
449
    for (end= a + a_length-length; a < end ; a++)
450
    {
451
      if (*a != ' ')
452
	return (*a < ' ') ? -swap : swap;
453
    }
454
  }
455
  return res;
456
}
457
458
459
/*
460
  Copy one non-ascii character.
461
  "dst" must have enough room for the character.
462
  Note, we don't use sort_order[] in this macros.
463
  This is correct even for case insensitive collations:
464
  - basic Latin letters are processed outside this macros;
465
  - for other characters sort_order[x] is equal to x.
466
*/
467
#define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se)                  \
468
{                                                                        \
469
  switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \
470
  case 4:                                                                \
471
    *dst++= *src++;                                                      \
472
    /* fall through */                                                   \
473
  case 3:                                                                \
474
    *dst++= *src++;                                                      \
475
    /* fall through */                                                   \
476
  case 2:                                                                \
477
    *dst++= *src++;                                                      \
478
    /* fall through */                                                   \
479
  case 0:                                                                \
480
    *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */  \
481
  }                                                                      \
482
}
483
484
485
/*
486
  For character sets with two or three byte multi-byte
487
  characters having multibyte weights *equal* to their codes:
488
  cp932, euckr, gb2312, sjis, eucjpms, ujis.
489
*/
490
size_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
491
my_strnxfrm_mb(const CHARSET_INFO * const cs,
482 by Brian Aker
Remove uint.
492
               unsigned char *dst, size_t dstlen, uint32_t nweights,
493
               const unsigned char *src, size_t srclen, uint32_t flags)
1 by brian
clean slate
494
{
481 by Brian Aker
Remove all of uchar.
495
  unsigned char *d0= dst;
496
  unsigned char *de= dst + dstlen;
497
  const unsigned char *se= src + srclen;
498
  const unsigned char *sort_order= cs->sort_order;
1 by brian
clean slate
499
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
500
  assert(cs->mbmaxlen <= 4);
1 by brian
clean slate
501
502
  /*
503
    If "srclen" is smaller than both "dstlen" and "nweights"
504
    then we can run a simplified loop -
505
    without checking "nweights" and "de".
506
  */
507
  if (dstlen >= srclen && nweights >= srclen)
508
  {
509
    if (sort_order)
510
    {
511
      /* Optimized version for a case insensitive collation */
512
      for (; src < se; nweights--)
513
      {
514
        if (*src < 128) /* quickly catch ASCII characters */
515
          *dst++= sort_order[*src++];
516
        else
517
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
518
      }
519
    }
520
    else
521
    {
522
      /* Optimized version for a case sensitive collation (no sort_order) */
523
      for (; src < se; nweights--)
524
      {
525
        if (*src < 128) /* quickly catch ASCII characters */
526
          *dst++= *src++;
527
        else
528
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
529
      }
530
    }
531
    goto pad;
532
  }
533
534
  /*
535
    A thourough loop, checking all possible limits:
536
    "se", "nweights" and "de".
537
  */
538
  for (; src < se && nweights; nweights--)
539
  {
540
    int chlen;
541
    if (*src < 128 ||
542
        !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se)))
543
    {
544
      /* Single byte character */
545
      if (dst >= de)
546
        break;
547
      *dst++= sort_order ? sort_order[*src++] : *src++;
548
    }
549
    else
550
    {
551
      /* Multi-byte character */
552
      if (dst + chlen > de)
553
        break;
554
      *dst++= *src++;
555
      *dst++= *src++;
556
      if (chlen == 3)
557
        *dst++= *src++;
558
    }
559
  }
560
561
pad:
562
  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
563
}
564
565
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
566
int my_strcasecmp_mb_bin(const CHARSET_INFO * const,
1 by brian
clean slate
567
                         const char *s, const char *t)
568
{
569
  return strcmp(s,t);
570
}
571
572
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
573
void my_hash_sort_mb_bin(const CHARSET_INFO * const,
574
                         const unsigned char *key, size_t len,
575
                         uint32_t *nr1, uint32_t *nr2)
1 by brian
clean slate
576
{
481 by Brian Aker
Remove all of uchar.
577
  const unsigned char *pos = key;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
578
1 by brian
clean slate
579
  /*
580
     Remove trailing spaces. We have to do this to be able to compare
581
    'A ' and 'A' as identical
582
  */
583
  key= skip_trailing_space(key, len);
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
584
481 by Brian Aker
Remove all of uchar.
585
  for (; pos < (const unsigned char*) key ; pos++)
1 by brian
clean slate
586
  {
895 by Brian Aker
Completion (?) of uint conversion.
587
    nr1[0]^=(ulong) ((((uint32_t) nr1[0] & 63)+nr2[0]) *
588
	     ((uint32_t)*pos)) + (nr1[0] << 8);
1 by brian
clean slate
589
    nr2[0]+=3;
590
  }
591
}
592
593
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
594
/*
1 by brian
clean slate
595
  Fill the given buffer with 'maximum character' for given charset
596
  SYNOPSIS
597
      pad_max_char()
598
      cs   Character set
599
      str  Start of buffer to fill
600
      end  End of buffer to fill
601
602
  DESCRIPTION
603
      Write max key:
604
      - for non-Unicode character sets:
605
        just set to 255.
606
      - for Unicode character set (utf-8):
607
        create a buffer with multibyte representation of the max_sort_char
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
608
        character, and copy it into max_str in a loop.
1 by brian
clean slate
609
*/
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
610
static void pad_max_char(const CHARSET_INFO * const cs, char *str, char *end)
1 by brian
clean slate
611
{
612
  char buf[10];
613
  char buflen;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
614
1 by brian
clean slate
615
  if (!(cs->state & MY_CS_UNICODE))
616
  {
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
617
    memset(str, 255, end - str);
1 by brian
clean slate
618
    return;
619
  }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
620
481 by Brian Aker
Remove all of uchar.
621
  buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (unsigned char*) buf,
622
                          (unsigned char*) buf + sizeof(buf));
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
623
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
624
  assert(buflen > 0);
1 by brian
clean slate
625
  do
626
  {
627
    if ((str + buflen) < end)
628
    {
629
      /* Enough space for the characer */
630
      memcpy(str, buf, buflen);
631
      str+= buflen;
632
    }
633
    else
634
    {
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
635
      /*
1 by brian
clean slate
636
        There is no space for whole multibyte
637
        character, then add trailing spaces.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
638
      */
1 by brian
clean slate
639
      *str++= ' ';
640
    }
641
  } while (str < end);
642
}
643
644
/*
645
** Calculate min_str and max_str that ranges a LIKE string.
646
** Arguments:
647
** ptr		Pointer to LIKE string.
648
** ptr_length	Length of LIKE string.
649
** escape	Escape character in LIKE.  (Normally '\').
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
650
**		All escape characters should be removed from
651
**              min_str and max_str
652
** w_one        Single char matching char in LIKE (Normally '_')
653
** w_many       Multiple char matching char in LIKE (Normally '%')
1 by brian
clean slate
654
** res_length	Length of min_str and max_str.
655
** min_str	Smallest case sensitive string that ranges LIKE.
656
**		Should be space padded to res_length.
657
** max_str	Largest case sensitive string that ranges LIKE.
658
**		Normally padded with the biggest character sort value.
659
**
660
** The function should return 0 if ok and 1 if the LIKE string can't be
661
** optimized !
662
*/
663
276 by Brian Aker
Cleaned out my_bool from strings.
664
bool my_like_range_mb(const CHARSET_INFO * const cs,
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
665
                         const char *ptr,size_t ptr_length,
666
                         char escape, char w_one, char w_many,
667
                         size_t res_length,
668
                         char *min_str,char *max_str,
669
                         size_t *min_length,size_t *max_length)
1 by brian
clean slate
670
{
482 by Brian Aker
Remove uint.
671
  uint32_t mb_len;
1 by brian
clean slate
672
  const char *end= ptr + ptr_length;
673
  char *min_org= min_str;
674
  char *min_end= min_str + res_length;
675
  char *max_end= max_str + res_length;
676
  size_t maxcharlen= res_length / cs->mbmaxlen;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
677
  const char *contraction_flags= cs->contractions ?
1 by brian
clean slate
678
              ((const char*) cs->contractions) + 0x40*0x40 : NULL;
679
680
  for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
681
  {
682
    /* We assume here that escape, w_any, w_namy are one-byte characters */
683
    if (*ptr == escape && ptr+1 != end)
684
      ptr++;                                    /* Skip escape */
685
    else if (*ptr == w_one || *ptr == w_many)   /* '_' and '%' in SQL */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
686
    {
1 by brian
clean slate
687
fill_max_and_min:
688
      /*
689
        Calculate length of keys:
690
        'a\0\0... is the smallest possible string when we have space expand
691
        a\ff\ff... is the biggest possible string
692
      */
693
      *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
694
                    res_length);
695
      *max_length= res_length;
696
      /* Create min key  */
697
      do
698
      {
699
	*min_str++= (char) cs->min_sort_char;
700
      } while (min_str != min_end);
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
701
702
      /*
1 by brian
clean slate
703
        Write max key: create a buffer with multibyte
704
        representation of the max_sort_char character,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
705
        and copy it into max_str in a loop.
1 by brian
clean slate
706
      */
707
      *max_length= res_length;
708
      pad_max_char(cs, max_str, max_end);
709
      return 0;
710
    }
711
    if ((mb_len= my_ismbchar(cs, ptr, end)) > 1)
712
    {
713
      if (ptr+mb_len > end || min_str+mb_len > min_end)
714
        break;
715
      while (mb_len--)
716
       *min_str++= *max_str++= *ptr++;
717
    }
718
    else
719
    {
720
      /*
721
        Special case for collations with contractions.
722
        For example, in Chezh, 'ch' is a separate letter
723
        which is sorted between 'h' and 'i'.
724
        If the pattern 'abc%', 'c' at the end can mean:
725
        - letter 'c' itself,
726
        - beginning of the contraction 'ch'.
727
728
        If we simply return this LIKE range:
729
730
         'abc\min\min\min' and 'abc\max\max\max'
731
732
        then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
733
        will only find values starting from 'abc[^h]',
734
        but won't find values starting from 'abch'.
735
736
        We must ignore contraction heads followed by w_one or w_many.
737
        ('Contraction head' means any letter which can be the first
738
        letter in a contraction)
739
740
        For example, for Czech 'abc%', we will return LIKE range,
741
        which is equal to LIKE range for 'ab%':
742
743
        'ab\min\min\min\min' and 'ab\max\max\max\max'.
744
745
      */
746
      if (contraction_flags && ptr + 1 < end &&
481 by Brian Aker
Remove all of uchar.
747
          contraction_flags[(unsigned char) *ptr])
1 by brian
clean slate
748
      {
749
        /* Ptr[0] is a contraction head. */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
750
1 by brian
clean slate
751
        if (ptr[1] == w_one || ptr[1] == w_many)
752
        {
753
          /* Contraction head followed by a wildcard, quit. */
754
          goto fill_max_and_min;
755
        }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
756
1 by brian
clean slate
757
        /*
758
          Some letters can be both contraction heads and contraction tails.
759
          For example, in Danish 'aa' is a separate single letter which
760
          is sorted after 'z'. So 'a' can be both head and tail.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
761
1 by brian
clean slate
762
          If ptr[0]+ptr[1] is a contraction,
763
          then put both letters together.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
764
1 by brian
clean slate
765
          If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
766
          is not a contraction, then we put only ptr[0],
767
          and continue with ptr[1] on the next loop.
768
        */
481 by Brian Aker
Remove all of uchar.
769
        if (contraction_flags[(unsigned char) ptr[1]] &&
1 by brian
clean slate
770
            cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
771
        {
772
          /* Contraction found */
773
          if (maxcharlen == 1 || min_str + 1 >= min_end)
774
          {
775
            /* Both contraction parts don't fit, quit */
776
            goto fill_max_and_min;
777
          }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
778
1 by brian
clean slate
779
          /* Put contraction head */
780
          *min_str++= *max_str++= *ptr++;
781
          maxcharlen--;
782
        }
783
      }
784
      /* Put contraction tail, or a single character */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
785
      *min_str++= *max_str++= *ptr++;
1 by brian
clean slate
786
    }
787
  }
788
789
  *min_length= *max_length = (size_t) (min_str - min_org);
790
  while (min_str != min_end)
791
    *min_str++= *max_str++= ' ';           /* Because if key compression */
792
  return 0;
793
}
794
795
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
796
int my_wildcmp_mb_bin(const CHARSET_INFO * const cs,
1 by brian
clean slate
797
                      const char *str,const char *str_end,
798
                      const char *wildstr,const char *wildend,
799
                      int escape, int w_one, int w_many)
800
{
801
  int result= -1;				/* Not found, using wildcards */
802
803
  while (wildstr != wildend)
804
  {
805
    while (*wildstr != w_many && *wildstr != w_one)
806
    {
807
      int l;
808
      if (*wildstr == escape && wildstr+1 != wildend)
809
	wildstr++;
810
      if ((l = my_ismbchar(cs, wildstr, wildend)))
811
      {
812
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
813
	      return 1;
814
	  str += l;
815
	  wildstr += l;
816
      }
817
      else
818
      if (str == str_end || *wildstr++ != *str++)
819
	return(1);				/* No match */
820
      if (wildstr == wildend)
821
	return (str != str_end);		/* Match if both are at end */
822
      result=1;					/* Found an anchor char */
823
    }
824
    if (*wildstr == w_one)
825
    {
826
      do
827
      {
828
	if (str == str_end)			/* Skip one char if possible */
829
	  return (result);
830
	INC_PTR(cs,str,str_end);
831
      } while (++wildstr < wildend && *wildstr == w_one);
832
      if (wildstr == wildend)
833
	break;
834
    }
835
    if (*wildstr == w_many)
836
    {						/* Found w_many */
481 by Brian Aker
Remove all of uchar.
837
      unsigned char cmp;
1 by brian
clean slate
838
      const char* mb = wildstr;
839
      int mb_len=0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
840
1 by brian
clean slate
841
      wildstr++;
842
      /* Remove any '%' and '_' from the wild search string */
843
      for (; wildstr != wildend ; wildstr++)
844
      {
845
	if (*wildstr == w_many)
846
	  continue;
847
	if (*wildstr == w_one)
848
	{
849
	  if (str == str_end)
850
	    return (-1);
851
	  INC_PTR(cs,str,str_end);
852
	  continue;
853
	}
854
	break;					/* Not a wild character */
855
      }
856
      if (wildstr == wildend)
857
	return(0);				/* Ok if w_many is last */
858
      if (str == str_end)
859
	return -1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
860
1 by brian
clean slate
861
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
862
	cmp= *++wildstr;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
863
1 by brian
clean slate
864
      mb=wildstr;
865
      mb_len= my_ismbchar(cs, wildstr, wildend);
866
      INC_PTR(cs,wildstr,wildend);		/* This is compared trough cmp */
867
      do
868
      {
869
        for (;;)
870
        {
871
          if (str >= str_end)
872
            return -1;
873
          if (mb_len)
874
          {
875
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
876
            {
877
              str += mb_len;
878
              break;
879
            }
880
          }
881
          else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
882
          {
883
            str++;
884
            break;
885
          }
886
          INC_PTR(cs,str, str_end);
887
        }
888
	{
889
	  int tmp=my_wildcmp_mb_bin(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
890
	  if (tmp <= 0)
891
	    return (tmp);
892
	}
893
      } while (str != str_end && wildstr[0] != w_many);
894
      return(-1);
895
    }
896
  }
897
  return (str != str_end ? 1 : 0);
898
}
899
900
901
/*
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
902
  Data was produced from EastAsianWidth.txt
1 by brian
clean slate
903
  using utt11-dump utility.
904
*/
905
static char pg11[256]=
906
{
907
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
908
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
909
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,
910
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
911
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
912
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
913
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
914
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
915
};
916
917
static char pg23[256]=
918
{
919
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
920
0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
921
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
922
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
923
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
924
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
925
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
926
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
927
};
928
929
static char pg2E[256]=
930
{
931
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
932
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
933
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
934
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
935
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
936
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
937
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
938
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
939
};
940
941
static char pg2F[256]=
942
{
943
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
944
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
945
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
946
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
947
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
948
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
949
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
950
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0
951
};
952
953
static char pg30[256]=
954
{
955
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
956
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
957
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
958
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
959
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,
960
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
961
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
962
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
963
};
964
965
static char pg31[256]=
966
{
967
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
968
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
969
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
970
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
971
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
972
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
973
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
974
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
975
};
976
977
static char pg32[256]=
978
{
979
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
980
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
981
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
982
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,
983
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
984
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
985
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
986
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
987
};
988
989
static char pg4D[256]=
990
{
991
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
992
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
993
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
994
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
995
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
996
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
997
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
998
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
999
};
1000
1001
static char pg9F[256]=
1002
{
1003
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1004
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1005
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1006
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1007
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1008
1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1009
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1010
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1011
};
1012
1013
static char pgA4[256]=
1014
{
1015
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1016
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1017
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1018
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1019
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1020
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1021
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1022
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1023
};
1024
1025
static char pgD7[256]=
1026
{
1027
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1028
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1029
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1030
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1031
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1032
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1033
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1034
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1035
};
1036
1037
static char pgFA[256]=
1038
{
1039
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1040
1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1041
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1042
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1043
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1044
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1045
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1046
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1047
};
1048
1049
static char pgFE[256]=
1050
{
1051
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1052
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1053
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,
1054
1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1055
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1056
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1057
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1058
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1059
};
1060
1061
static char pgFF[256]=
1062
{
1063
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1064
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1065
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1066
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1067
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1068
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1069
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1070
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1071
};
1072
1073
static struct {int page; char *p;} utr11_data[256]=
1074
{
1075
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1076
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1077
{0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1078
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1079
{0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1080
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F},
1081
{0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1082
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1083
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1084
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL},
1085
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1086
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1087
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1088
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1089
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1090
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1091
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1092
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1093
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1094
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F},
1095
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL},
1096
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1097
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1098
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1099
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1100
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1101
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7},
1102
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1103
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1104
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1105
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1106
{0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF}
1107
};
1108
1109
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1110
size_t my_numcells_mb(const CHARSET_INFO * const cs, const char *b, const char *e)
1 by brian
clean slate
1111
{
1112
  my_wc_t wc;
1113
  size_t clen= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
1114
1 by brian
clean slate
1115
  while (b < e)
1116
  {
1117
    int mb_len;
482 by Brian Aker
Remove uint.
1118
    uint32_t pg;
481 by Brian Aker
Remove all of uchar.
1119
    if ((mb_len= cs->cset->mb_wc(cs, &wc, (unsigned char*) b, (unsigned char*) e)) <= 0 ||
1 by brian
clean slate
1120
        wc > 0xFFFF)
1121
    {
1122
      /*
1123
        Let's think a wrong sequence takes 1 dysplay cell.
1124
        Also, consider supplementary characters as taking one cell.
1125
      */
1126
      mb_len= 1;
1127
      b++;
1128
      continue;
1129
    }
1130
    b+= mb_len;
1131
    pg= (wc >> 8) & 0xFF;
1132
    clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page;
1133
    clen++;
1134
  }
1135
  return clen;
1136
}
1137
1138
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1139
int my_mb_ctype_mb(const CHARSET_INFO * const cs, int *ctype,
481 by Brian Aker
Remove all of uchar.
1140
                   const unsigned char *s, const unsigned char *e)
1 by brian
clean slate
1141
{
1142
  my_wc_t wc;
1143
  int res= cs->cset->mb_wc(cs, &wc, s, e);
1144
  if (res <= 0 || wc > 0xFFFF)
1145
    *ctype= 0;
1146
  else
1147
    *ctype= my_uni_ctype[wc>>8].ctype ?
1148
            my_uni_ctype[wc>>8].ctype[wc&0xFF] :
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
1149
            my_uni_ctype[wc>>8].pctype;
1 by brian
clean slate
1150
  return res;
1151
}
1152
1153
1154
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
1155
{
1156
    NULL,              /* init */
1157
    my_strnncoll_mb_bin,
1158
    my_strnncollsp_mb_bin,
1159
    my_strnxfrm_mb,
1160
    my_strnxfrmlen_simple,
1161
    my_like_range_mb,
1162
    my_wildcmp_mb_bin,
1163
    my_strcasecmp_mb_bin,
1164
    my_instr_mb,
1165
    my_hash_sort_mb_bin,
1166
    my_propagate_simple
1167
};
1168