~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
212.5.39 by Monty Taylor
Phew. Moved my_base and my_global.
16
#include "m_string.h"
1 by brian
clean slate
17
#include "m_ctype.h"
18
19
#ifdef USE_MB
20
21
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
22
size_t my_caseup_str_mb(const CHARSET_INFO * const  cs, char *str)
1 by brian
clean slate
23
{
205 by Brian Aker
uint32 -> uin32_t
24
  register uint32_t l;
1 by brian
clean slate
25
  register uchar *map= cs->to_upper;
26
  char *str_orig= str;
27
  
28
  while (*str)
29
  {
30
    /* Pointing after the '\0' is safe here. */
31
    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
32
      str+= l;
33
    else
34
    { 
35
      *str= (char) map[(uchar)*str];
36
      str++;
37
    }
38
  }
39
  return (size_t) (str - str_orig);
40
}
41
42
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
43
size_t my_casedn_str_mb(const CHARSET_INFO * const  cs, char *str)
1 by brian
clean slate
44
{
205 by Brian Aker
uint32 -> uin32_t
45
  register uint32_t l;
1 by brian
clean slate
46
  register uchar *map= cs->to_lower;
47
  char *str_orig= str;
48
  
49
  while (*str)
50
  {
51
    /* Pointing after the '\0' is safe here. */
52
    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
53
      str+= l;
54
    else
55
    {
56
      *str= (char) map[(uchar)*str];
57
      str++;
58
    }
59
  }
60
  return (size_t) (str - str_orig);
61
}
62
63
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
64
size_t my_caseup_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
1 by brian
clean slate
65
                    char *dst __attribute__((unused)),
66
                    size_t dstlen __attribute__((unused)))
67
{
205 by Brian Aker
uint32 -> uin32_t
68
  register uint32_t l;
1 by brian
clean slate
69
  register char *srcend= src + srclen;
70
  register uchar *map= cs->to_upper;
71
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
72
  assert(src == dst && srclen == dstlen);
1 by brian
clean slate
73
  while (src < srcend)
74
  {
75
    if ((l=my_ismbchar(cs, src, srcend)))
76
      src+= l;
77
    else 
78
    {
79
      *src=(char) map[(uchar) *src];
80
      src++;
81
    }
82
  }
83
  return srclen;
84
}
85
86
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
87
size_t my_casedn_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
1 by brian
clean slate
88
                    char *dst __attribute__((unused)),
89
                    size_t dstlen __attribute__((unused)))
90
{
205 by Brian Aker
uint32 -> uin32_t
91
  register uint32_t l;
1 by brian
clean slate
92
  register char *srcend= src + srclen;
93
  register uchar *map=cs->to_lower;
94
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
95
  assert(src == dst && srclen == dstlen);  
1 by brian
clean slate
96
  while (src < srcend)
97
  {
98
    if ((l= my_ismbchar(cs, src, srcend)))
99
      src+= l;
100
    else
101
    {
102
      *src= (char) map[(uchar)*src];
103
      src++;
104
    }
105
  }
106
  return srclen;
107
}
108
109
110
/*
111
  my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
112
 */
113
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
114
int my_strcasecmp_mb(const CHARSET_INFO * const  cs,const char *s, const char *t)
1 by brian
clean slate
115
{
205 by Brian Aker
uint32 -> uin32_t
116
  register uint32_t l;
1 by brian
clean slate
117
  register uchar *map=cs->to_upper;
118
  
119
  while (*s && *t)
120
  {
121
    /* Pointing after the '\0' is safe here. */
122
    if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen)))
123
    {
124
      while (l--)
125
        if (*s++ != *t++) 
126
          return 1;
127
    }
128
    else if (my_mbcharlen(cs, *t) > 1)
129
      return 1;
130
    else if (map[(uchar) *s++] != map[(uchar) *t++])
131
      return 1;
132
  }
133
  /* At least one of '*s' and '*t' is zero here. */
134
  return (*t != *s);
135
}
136
137
138
/*
139
** Compare string against string with wildcard
140
**	0 if matched
141
**	-1 if not matched with wildcard
142
**	 1 if matched with wildcard
143
*/
144
145
#define INC_PTR(cs,A,B) A+=(my_ismbchar(cs,A,B) ? my_ismbchar(cs,A,B) : 1)
146
147
#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
148
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
149
int my_wildcmp_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
150
		  const char *str,const char *str_end,
151
		  const char *wildstr,const char *wildend,
152
		  int escape, int w_one, int w_many)
153
{
154
  int result= -1;				/* Not found, using wildcards */
155
156
  while (wildstr != wildend)
157
  {
158
    while (*wildstr != w_many && *wildstr != w_one)
159
    {
160
      int l;
161
      if (*wildstr == escape && wildstr+1 != wildend)
162
	wildstr++;
163
      if ((l = my_ismbchar(cs, wildstr, wildend)))
164
      {
165
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
166
	      return 1;
167
	  str += l;
168
	  wildstr += l;
169
      }
170
      else
171
      if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
172
	return(1);				/* No match */
173
      if (wildstr == wildend)
174
	return (str != str_end);		/* Match if both are at end */
175
      result=1;					/* Found an anchor char */
176
    }
177
    if (*wildstr == w_one)
178
    {
179
      do
180
      {
181
	if (str == str_end)			/* Skip one char if possible */
182
	  return (result);
183
	INC_PTR(cs,str,str_end);
184
      } while (++wildstr < wildend && *wildstr == w_one);
185
      if (wildstr == wildend)
186
	break;
187
    }
188
    if (*wildstr == w_many)
189
    {						/* Found w_many */
190
      uchar cmp;
191
      const char* mb = wildstr;
192
      int mb_len=0;
193
      
194
      wildstr++;
195
      /* Remove any '%' and '_' from the wild search string */
196
      for (; wildstr != wildend ; wildstr++)
197
      {
198
	if (*wildstr == w_many)
199
	  continue;
200
	if (*wildstr == w_one)
201
	{
202
	  if (str == str_end)
203
	    return (-1);
204
	  INC_PTR(cs,str,str_end);
205
	  continue;
206
	}
207
	break;					/* Not a wild character */
208
      }
209
      if (wildstr == wildend)
210
	return(0);				/* Ok if w_many is last */
211
      if (str == str_end)
212
	return -1;
213
      
214
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
215
	cmp= *++wildstr;
216
	
217
      mb=wildstr;
218
      mb_len= my_ismbchar(cs, wildstr, wildend);
219
      INC_PTR(cs,wildstr,wildend);		/* This is compared trough cmp */
220
      cmp=likeconv(cs,cmp);   
221
      do
222
      {
223
        for (;;)
224
        {
225
          if (str >= str_end)
226
            return -1;
227
          if (mb_len)
228
          {
229
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
230
            {
231
              str += mb_len;
232
              break;
233
            }
234
          }
235
          else if (!my_ismbchar(cs, str, str_end) &&
236
                   likeconv(cs,*str) == cmp)
237
          {
238
            str++;
239
            break;
240
          }
241
          INC_PTR(cs,str, str_end);
242
        }
243
	{
244
	  int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,
245
                                w_many);
246
	  if (tmp <= 0)
247
	    return (tmp);
248
	}
249
      } while (str != str_end && wildstr[0] != w_many);
250
      return(-1);
251
    }
252
  }
253
  return (str != str_end ? 1 : 0);
254
}
255
256
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
257
size_t my_numchars_mb(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
258
		      const char *pos, const char *end)
259
{
260
  register size_t count= 0;
261
  while (pos < end) 
262
  {
263
    uint mb_len;
264
    pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1;
265
    count++;
266
  }
267
  return count;
268
}
269
270
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
271
size_t my_charpos_mb(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
272
		     const char *pos, const char *end, size_t length)
273
{
274
  const char *start= pos;
275
  
276
  while (length && pos < end)
277
  {
278
    uint mb_len;
279
    pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1;
280
    length--;
281
  }
282
  return (size_t) (length ? end+2-start : pos-start);
283
}
284
285
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
286
size_t my_well_formed_len_mb(const CHARSET_INFO * const cs, const char *b, const char *e,
1 by brian
clean slate
287
                             size_t pos, int *error)
288
{
289
  const char *b_start= b;
290
  *error= 0;
291
  while (pos)
292
  {
293
    my_wc_t wc;
294
    int mb_len;
295
266.5.1 by Andy Lester
Fixing casts that remove constness
296
    if ((mb_len= cs->cset->mb_wc(cs, &wc, (const uchar*) b, (const uchar*) e)) <= 0)
1 by brian
clean slate
297
    {
298
      *error= b < e ? 1 : 0;
299
      break;
300
    }
301
    b+= mb_len;
302
    pos--;
303
  }
304
  return (size_t) (b - b_start);
305
}
306
307
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
308
uint my_instr_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
309
                 const char *b, size_t b_length, 
310
                 const char *s, size_t s_length,
311
                 my_match_t *match, uint nmatch)
312
{
313
  register const char *end, *b0;
314
  int res= 0;
315
  
316
  if (s_length <= b_length)
317
  {
318
    if (!s_length)
319
    {
320
      if (nmatch)
321
      {
322
        match->beg= 0;
323
        match->end= 0;
324
        match->mb_len= 0;
325
      }
326
      return 1;		/* Empty string is always found */
327
    }
328
    
329
    b0= b;
330
    end= b+b_length-s_length+1;
331
    
332
    while (b < end)
333
    {
334
      int mb_len;
335
      
266.5.1 by Andy Lester
Fixing casts that remove constness
336
      if (!cs->coll->strnncoll(cs, (const uchar*) b, s_length, 
337
                                   (const uchar*) s, s_length, 0))
1 by brian
clean slate
338
      {
339
        if (nmatch)
340
        {
341
          match[0].beg= 0;
342
          match[0].end= (size_t) (b-b0);
343
          match[0].mb_len= res;
344
          if (nmatch > 1)
345
          {
346
            match[1].beg= match[0].end;
347
            match[1].end= match[0].end+s_length;
348
            match[1].mb_len= 0;	/* Not computed */
349
          }
350
        }
351
        return 2;
352
      }
353
      mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1;
354
      b+= mb_len;
355
      b_length-= mb_len;
356
      res++;
357
    }
358
  }
359
  return 0;
360
}
361
362
363
/* BINARY collations handlers for MB charsets */
364
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
365
int my_strnncoll_mb_bin(const CHARSET_INFO * const  cs __attribute__((unused)),
1 by brian
clean slate
366
                        const uchar *s, size_t slen,
367
                        const uchar *t, size_t tlen,
276 by Brian Aker
Cleaned out my_bool from strings.
368
                        bool t_is_prefix)
1 by brian
clean slate
369
{
370
  size_t len=min(slen,tlen);
371
  int cmp= memcmp(s,t,len);
372
  return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
373
}
374
375
376
/*
377
  Compare two strings. 
378
  
379
  SYNOPSIS
380
    my_strnncollsp_mb_bin()
381
    cs			Chararacter set
382
    s			String to compare
383
    slen		Length of 's'
384
    t			String to compare
385
    tlen		Length of 't'
386
    diff_if_only_endspace_difference
387
		        Set to 1 if the strings should be regarded as different
388
                        if they only difference in end space
389
390
  NOTE
391
   This function is used for character strings with binary collations.
392
   The shorter string is extended with end space to be as long as the longer
393
   one.
394
395
  RETURN
396
    A negative number if s < t
397
    A positive number if s > t
398
    0 if strings are equal
399
*/
400
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
401
int my_strnncollsp_mb_bin(const CHARSET_INFO * const  cs __attribute__((unused)),
1 by brian
clean slate
402
                          const uchar *a, size_t a_length,
403
                          const uchar *b, size_t b_length,
276 by Brian Aker
Cleaned out my_bool from strings.
404
                          bool diff_if_only_endspace_difference)
1 by brian
clean slate
405
{
406
  const uchar *end;
407
  size_t length;
408
  int res;
409
410
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
411
  diff_if_only_endspace_difference= 0;
412
#endif
413
  
414
  end= a + (length= min(a_length, b_length));
415
  while (a < end)
416
  {
417
    if (*a++ != *b++)
418
      return ((int) a[-1] - (int) b[-1]);
419
  }
420
  res= 0;
421
  if (a_length != b_length)
422
  {
423
    int swap= 1;
424
    if (diff_if_only_endspace_difference)
425
      res= 1;                                   /* Assume 'a' is bigger */
426
    /*
427
      Check the next not space character of the longer key. If it's < ' ',
428
      then it's smaller than the other key.
429
    */
430
    if (a_length < b_length)
431
    {
432
      /* put shorter key in s */
433
      a_length= b_length;
434
      a= b;
435
      swap= -1;					/* swap sign of result */
436
      res= -res;
437
    }
438
    for (end= a + a_length-length; a < end ; a++)
439
    {
440
      if (*a != ' ')
441
	return (*a < ' ') ? -swap : swap;
442
    }
443
  }
444
  return res;
445
}
446
447
448
/*
449
  Copy one non-ascii character.
450
  "dst" must have enough room for the character.
451
  Note, we don't use sort_order[] in this macros.
452
  This is correct even for case insensitive collations:
453
  - basic Latin letters are processed outside this macros;
454
  - for other characters sort_order[x] is equal to x.
455
*/
456
#define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se)                  \
457
{                                                                        \
458
  switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \
459
  case 4:                                                                \
460
    *dst++= *src++;                                                      \
461
    /* fall through */                                                   \
462
  case 3:                                                                \
463
    *dst++= *src++;                                                      \
464
    /* fall through */                                                   \
465
  case 2:                                                                \
466
    *dst++= *src++;                                                      \
467
    /* fall through */                                                   \
468
  case 0:                                                                \
469
    *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */  \
470
  }                                                                      \
471
}
472
473
474
/*
475
  For character sets with two or three byte multi-byte
476
  characters having multibyte weights *equal* to their codes:
477
  cp932, euckr, gb2312, sjis, eucjpms, ujis.
478
*/
479
size_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
480
my_strnxfrm_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
481
               uchar *dst, size_t dstlen, uint nweights,
482
               const uchar *src, size_t srclen, uint flags)
483
{
484
  uchar *d0= dst;
485
  uchar *de= dst + dstlen;
486
  const uchar *se= src + srclen;
487
  const uchar *sort_order= cs->sort_order;
488
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
489
  assert(cs->mbmaxlen <= 4);
1 by brian
clean slate
490
491
  /*
492
    If "srclen" is smaller than both "dstlen" and "nweights"
493
    then we can run a simplified loop -
494
    without checking "nweights" and "de".
495
  */
496
  if (dstlen >= srclen && nweights >= srclen)
497
  {
498
    if (sort_order)
499
    {
500
      /* Optimized version for a case insensitive collation */
501
      for (; src < se; nweights--)
502
      {
503
        if (*src < 128) /* quickly catch ASCII characters */
504
          *dst++= sort_order[*src++];
505
        else
506
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
507
      }
508
    }
509
    else
510
    {
511
      /* Optimized version for a case sensitive collation (no sort_order) */
512
      for (; src < se; nweights--)
513
      {
514
        if (*src < 128) /* quickly catch ASCII characters */
515
          *dst++= *src++;
516
        else
517
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
518
      }
519
    }
520
    goto pad;
521
  }
522
523
  /*
524
    A thourough loop, checking all possible limits:
525
    "se", "nweights" and "de".
526
  */
527
  for (; src < se && nweights; nweights--)
528
  {
529
    int chlen;
530
    if (*src < 128 ||
531
        !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se)))
532
    {
533
      /* Single byte character */
534
      if (dst >= de)
535
        break;
536
      *dst++= sort_order ? sort_order[*src++] : *src++;
537
    }
538
    else
539
    {
540
      /* Multi-byte character */
541
      if (dst + chlen > de)
542
        break;
543
      *dst++= *src++;
544
      *dst++= *src++;
545
      if (chlen == 3)
546
        *dst++= *src++;
547
    }
548
  }
549
550
pad:
551
  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
552
}
553
554
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
555
int my_strcasecmp_mb_bin(const CHARSET_INFO * const  cs __attribute__((unused)),
1 by brian
clean slate
556
                         const char *s, const char *t)
557
{
558
  return strcmp(s,t);
559
}
560
561
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
562
void my_hash_sort_mb_bin(const CHARSET_INFO * const cs __attribute__((unused)),
290 by Brian Aker
Update for ulong change over.
563
                         const uchar *key, size_t len, uint32_t *nr1, uint32_t *nr2)
1 by brian
clean slate
564
{
565
  const uchar *pos = key;
566
  
567
  /*
568
     Remove trailing spaces. We have to do this to be able to compare
569
    'A ' and 'A' as identical
570
  */
571
  key= skip_trailing_space(key, len);
572
  
266.5.1 by Andy Lester
Fixing casts that remove constness
573
  for (; pos < (const uchar*) key ; pos++)
1 by brian
clean slate
574
  {
575
    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
576
	     ((uint)*pos)) + (nr1[0] << 8);
577
    nr2[0]+=3;
578
  }
579
}
580
581
582
/* 
583
  Fill the given buffer with 'maximum character' for given charset
584
  SYNOPSIS
585
      pad_max_char()
586
      cs   Character set
587
      str  Start of buffer to fill
588
      end  End of buffer to fill
589
590
  DESCRIPTION
591
      Write max key:
592
      - for non-Unicode character sets:
593
        just set to 255.
594
      - for Unicode character set (utf-8):
595
        create a buffer with multibyte representation of the max_sort_char
596
        character, and copy it into max_str in a loop. 
597
*/
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
598
static void pad_max_char(const CHARSET_INFO * const cs, char *str, char *end)
1 by brian
clean slate
599
{
600
  char buf[10];
601
  char buflen;
602
  
603
  if (!(cs->state & MY_CS_UNICODE))
604
  {
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
605
    memset(str, 255, end - str);
1 by brian
clean slate
606
    return;
607
  }
608
  
609
  buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
610
                          (uchar*) buf + sizeof(buf));
611
  
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
612
  assert(buflen > 0);
1 by brian
clean slate
613
  do
614
  {
615
    if ((str + buflen) < end)
616
    {
617
      /* Enough space for the characer */
618
      memcpy(str, buf, buflen);
619
      str+= buflen;
620
    }
621
    else
622
    {
623
      /* 
624
        There is no space for whole multibyte
625
        character, then add trailing spaces.
626
      */  
627
      *str++= ' ';
628
    }
629
  } while (str < end);
630
}
631
632
/*
633
** Calculate min_str and max_str that ranges a LIKE string.
634
** Arguments:
635
** ptr		Pointer to LIKE string.
636
** ptr_length	Length of LIKE string.
637
** escape	Escape character in LIKE.  (Normally '\').
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
638
**		All escape characters should be removed from
639
**              min_str and max_str
640
** w_one        Single char matching char in LIKE (Normally '_')
641
** w_many       Multiple char matching char in LIKE (Normally '%')
1 by brian
clean slate
642
** res_length	Length of min_str and max_str.
643
** min_str	Smallest case sensitive string that ranges LIKE.
644
**		Should be space padded to res_length.
645
** max_str	Largest case sensitive string that ranges LIKE.
646
**		Normally padded with the biggest character sort value.
647
**
648
** The function should return 0 if ok and 1 if the LIKE string can't be
649
** optimized !
650
*/
651
276 by Brian Aker
Cleaned out my_bool from strings.
652
bool my_like_range_mb(const CHARSET_INFO * const cs,
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
653
                         const char *ptr,size_t ptr_length,
654
                         char escape, char w_one, char w_many,
655
                         size_t res_length,
656
                         char *min_str,char *max_str,
657
                         size_t *min_length,size_t *max_length)
1 by brian
clean slate
658
{
659
  uint mb_len;
660
  const char *end= ptr + ptr_length;
661
  char *min_org= min_str;
662
  char *min_end= min_str + res_length;
663
  char *max_end= max_str + res_length;
664
  size_t maxcharlen= res_length / cs->mbmaxlen;
665
  const char *contraction_flags= cs->contractions ? 
666
              ((const char*) cs->contractions) + 0x40*0x40 : NULL;
667
668
  for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
669
  {
670
    /* We assume here that escape, w_any, w_namy are one-byte characters */
671
    if (*ptr == escape && ptr+1 != end)
672
      ptr++;                                    /* Skip escape */
673
    else if (*ptr == w_one || *ptr == w_many)   /* '_' and '%' in SQL */
674
    {      
675
fill_max_and_min:
676
      /*
677
        Calculate length of keys:
678
        'a\0\0... is the smallest possible string when we have space expand
679
        a\ff\ff... is the biggest possible string
680
      */
681
      *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
682
                    res_length);
683
      *max_length= res_length;
684
      /* Create min key  */
685
      do
686
      {
687
	*min_str++= (char) cs->min_sort_char;
688
      } while (min_str != min_end);
689
      
690
      /* 
691
        Write max key: create a buffer with multibyte
692
        representation of the max_sort_char character,
693
        and copy it into max_str in a loop. 
694
      */
695
      *max_length= res_length;
696
      pad_max_char(cs, max_str, max_end);
697
      return 0;
698
    }
699
    if ((mb_len= my_ismbchar(cs, ptr, end)) > 1)
700
    {
701
      if (ptr+mb_len > end || min_str+mb_len > min_end)
702
        break;
703
      while (mb_len--)
704
       *min_str++= *max_str++= *ptr++;
705
    }
706
    else
707
    {
708
      /*
709
        Special case for collations with contractions.
710
        For example, in Chezh, 'ch' is a separate letter
711
        which is sorted between 'h' and 'i'.
712
        If the pattern 'abc%', 'c' at the end can mean:
713
        - letter 'c' itself,
714
        - beginning of the contraction 'ch'.
715
716
        If we simply return this LIKE range:
717
718
         'abc\min\min\min' and 'abc\max\max\max'
719
720
        then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
721
        will only find values starting from 'abc[^h]',
722
        but won't find values starting from 'abch'.
723
724
        We must ignore contraction heads followed by w_one or w_many.
725
        ('Contraction head' means any letter which can be the first
726
        letter in a contraction)
727
728
        For example, for Czech 'abc%', we will return LIKE range,
729
        which is equal to LIKE range for 'ab%':
730
731
        'ab\min\min\min\min' and 'ab\max\max\max\max'.
732
733
      */
734
      if (contraction_flags && ptr + 1 < end &&
735
          contraction_flags[(uchar) *ptr])
736
      {
737
        /* Ptr[0] is a contraction head. */
738
        
739
        if (ptr[1] == w_one || ptr[1] == w_many)
740
        {
741
          /* Contraction head followed by a wildcard, quit. */
742
          goto fill_max_and_min;
743
        }
744
        
745
        /*
746
          Some letters can be both contraction heads and contraction tails.
747
          For example, in Danish 'aa' is a separate single letter which
748
          is sorted after 'z'. So 'a' can be both head and tail.
749
          
750
          If ptr[0]+ptr[1] is a contraction,
751
          then put both letters together.
752
          
753
          If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
754
          is not a contraction, then we put only ptr[0],
755
          and continue with ptr[1] on the next loop.
756
        */
757
        if (contraction_flags[(uchar) ptr[1]] &&
758
            cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
759
        {
760
          /* Contraction found */
761
          if (maxcharlen == 1 || min_str + 1 >= min_end)
762
          {
763
            /* Both contraction parts don't fit, quit */
764
            goto fill_max_and_min;
765
          }
766
          
767
          /* Put contraction head */
768
          *min_str++= *max_str++= *ptr++;
769
          maxcharlen--;
770
        }
771
      }
772
      /* Put contraction tail, or a single character */
773
      *min_str++= *max_str++= *ptr++;    
774
    }
775
  }
776
777
  *min_length= *max_length = (size_t) (min_str - min_org);
778
  while (min_str != min_end)
779
    *min_str++= *max_str++= ' ';           /* Because if key compression */
780
  return 0;
781
}
782
783
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
784
int my_wildcmp_mb_bin(const CHARSET_INFO * const cs,
1 by brian
clean slate
785
                      const char *str,const char *str_end,
786
                      const char *wildstr,const char *wildend,
787
                      int escape, int w_one, int w_many)
788
{
789
  int result= -1;				/* Not found, using wildcards */
790
791
  while (wildstr != wildend)
792
  {
793
    while (*wildstr != w_many && *wildstr != w_one)
794
    {
795
      int l;
796
      if (*wildstr == escape && wildstr+1 != wildend)
797
	wildstr++;
798
      if ((l = my_ismbchar(cs, wildstr, wildend)))
799
      {
800
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
801
	      return 1;
802
	  str += l;
803
	  wildstr += l;
804
      }
805
      else
806
      if (str == str_end || *wildstr++ != *str++)
807
	return(1);				/* No match */
808
      if (wildstr == wildend)
809
	return (str != str_end);		/* Match if both are at end */
810
      result=1;					/* Found an anchor char */
811
    }
812
    if (*wildstr == w_one)
813
    {
814
      do
815
      {
816
	if (str == str_end)			/* Skip one char if possible */
817
	  return (result);
818
	INC_PTR(cs,str,str_end);
819
      } while (++wildstr < wildend && *wildstr == w_one);
820
      if (wildstr == wildend)
821
	break;
822
    }
823
    if (*wildstr == w_many)
824
    {						/* Found w_many */
825
      uchar cmp;
826
      const char* mb = wildstr;
827
      int mb_len=0;
828
      
829
      wildstr++;
830
      /* Remove any '%' and '_' from the wild search string */
831
      for (; wildstr != wildend ; wildstr++)
832
      {
833
	if (*wildstr == w_many)
834
	  continue;
835
	if (*wildstr == w_one)
836
	{
837
	  if (str == str_end)
838
	    return (-1);
839
	  INC_PTR(cs,str,str_end);
840
	  continue;
841
	}
842
	break;					/* Not a wild character */
843
      }
844
      if (wildstr == wildend)
845
	return(0);				/* Ok if w_many is last */
846
      if (str == str_end)
847
	return -1;
848
      
849
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
850
	cmp= *++wildstr;
851
	
852
      mb=wildstr;
853
      mb_len= my_ismbchar(cs, wildstr, wildend);
854
      INC_PTR(cs,wildstr,wildend);		/* This is compared trough cmp */
855
      do
856
      {
857
        for (;;)
858
        {
859
          if (str >= str_end)
860
            return -1;
861
          if (mb_len)
862
          {
863
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
864
            {
865
              str += mb_len;
866
              break;
867
            }
868
          }
869
          else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
870
          {
871
            str++;
872
            break;
873
          }
874
          INC_PTR(cs,str, str_end);
875
        }
876
	{
877
	  int tmp=my_wildcmp_mb_bin(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
878
	  if (tmp <= 0)
879
	    return (tmp);
880
	}
881
      } while (str != str_end && wildstr[0] != w_many);
882
      return(-1);
883
    }
884
  }
885
  return (str != str_end ? 1 : 0);
886
}
887
888
889
/*
890
  Data was produced from EastAsianWidth.txt 
891
  using utt11-dump utility.
892
*/
893
static char pg11[256]=
894
{
895
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
896
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
897
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,
898
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
899
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
900
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
901
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
902
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
903
};
904
905
static char pg23[256]=
906
{
907
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
908
0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
909
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
910
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
911
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
912
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
913
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
914
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
915
};
916
917
static char pg2E[256]=
918
{
919
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
920
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
921
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
922
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
923
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
924
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
925
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
926
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
927
};
928
929
static char pg2F[256]=
930
{
931
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
932
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
933
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
934
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
935
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
936
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
937
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
938
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0
939
};
940
941
static char pg30[256]=
942
{
943
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
944
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
945
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
946
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
947
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,
948
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
949
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
950
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
951
};
952
953
static char pg31[256]=
954
{
955
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
956
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
957
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
958
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
959
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
960
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
961
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
962
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
963
};
964
965
static char pg32[256]=
966
{
967
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
968
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
969
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
970
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,
971
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
972
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
973
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
974
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
975
};
976
977
static char pg4D[256]=
978
{
979
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
980
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
981
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
982
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
983
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
984
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
985
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
986
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
987
};
988
989
static char pg9F[256]=
990
{
991
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
992
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
993
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
994
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
995
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
996
1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
997
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
998
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
999
};
1000
1001
static char pgA4[256]=
1002
{
1003
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1004
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1005
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1006
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1007
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1008
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1009
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1010
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1011
};
1012
1013
static char pgD7[256]=
1014
{
1015
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1016
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1017
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1018
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1019
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1020
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1021
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1022
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1023
};
1024
1025
static char pgFA[256]=
1026
{
1027
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1028
1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1029
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1030
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1031
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1032
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1033
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1034
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1035
};
1036
1037
static char pgFE[256]=
1038
{
1039
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1040
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1041
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,
1042
1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1043
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1044
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1045
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1046
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1047
};
1048
1049
static char pgFF[256]=
1050
{
1051
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1052
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1053
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1054
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1055
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1056
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1057
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1058
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1059
};
1060
1061
static struct {int page; char *p;} utr11_data[256]=
1062
{
1063
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1064
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1065
{0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1066
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1067
{0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1068
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F},
1069
{0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1070
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1071
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1072
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL},
1073
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1074
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1075
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1076
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1077
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1078
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1079
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1080
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1081
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1082
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F},
1083
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL},
1084
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1085
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1086
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1087
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1088
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1089
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7},
1090
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1091
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1092
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1093
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1094
{0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF}
1095
};
1096
1097
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1098
size_t my_numcells_mb(const CHARSET_INFO * const cs, const char *b, const char *e)
1 by brian
clean slate
1099
{
1100
  my_wc_t wc;
1101
  size_t clen= 0;
1102
  
1103
  while (b < e)
1104
  {
1105
    int mb_len;
1106
    uint pg;
1107
    if ((mb_len= cs->cset->mb_wc(cs, &wc, (uchar*) b, (uchar*) e)) <= 0 ||
1108
        wc > 0xFFFF)
1109
    {
1110
      /*
1111
        Let's think a wrong sequence takes 1 dysplay cell.
1112
        Also, consider supplementary characters as taking one cell.
1113
      */
1114
      mb_len= 1;
1115
      b++;
1116
      continue;
1117
    }
1118
    b+= mb_len;
1119
    pg= (wc >> 8) & 0xFF;
1120
    clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page;
1121
    clen++;
1122
  }
1123
  return clen;
1124
}
1125
1126
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1127
int my_mb_ctype_mb(const CHARSET_INFO * const cs, int *ctype,
1 by brian
clean slate
1128
                   const uchar *s, const uchar *e)
1129
{
1130
  my_wc_t wc;
1131
  int res= cs->cset->mb_wc(cs, &wc, s, e);
1132
  if (res <= 0 || wc > 0xFFFF)
1133
    *ctype= 0;
1134
  else
1135
    *ctype= my_uni_ctype[wc>>8].ctype ?
1136
            my_uni_ctype[wc>>8].ctype[wc&0xFF] :
1137
            my_uni_ctype[wc>>8].pctype;    
1138
  return res;
1139
}
1140
1141
1142
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
1143
{
1144
    NULL,              /* init */
1145
    my_strnncoll_mb_bin,
1146
    my_strnncollsp_mb_bin,
1147
    my_strnxfrm_mb,
1148
    my_strnxfrmlen_simple,
1149
    my_like_range_mb,
1150
    my_wildcmp_mb_bin,
1151
    my_strcasecmp_mb_bin,
1152
    my_instr_mb,
1153
    my_hash_sort_mb_bin,
1154
    my_propagate_simple
1155
};
1156
1157
#endif