~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
   
3
   This library is free software; you can redistribute it and/or
4
   modify it under the terms of the GNU Library General Public
5
   License as published by the Free Software Foundation; version 2
6
   of the License.
7
   
8
   This library is distributed in the hope that it will be useful,
9
   but WITHOUT ANY WARRANTY; without even the implied warranty of
10
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
   Library General Public License for more details.
12
   
13
   You should have received a copy of the GNU Library General Public
14
   License along with this library; if not, write to the Free
15
   Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
16
   MA 02111-1307, USA */
17
18
/* UCS2 support. Written by Alexander Barkov <bar@mysql.com> */
19
20
#include <my_global.h>
21
#include <my_sys.h>
22
#include "m_string.h"
23
#include "m_ctype.h"
24
#include <errno.h>
25
#include <stdarg.h>
26
27
28
#ifndef EILSEQ
29
#define EILSEQ ENOENT
30
#endif
31
32
#undef  ULONGLONG_MAX
33
#define ULONGLONG_MAX                (~(ulonglong) 0)
80.1.1 by Brian Aker
LL() cleanup
34
#define MAX_NEGATIVE_NUMBER        ((ulonglong) 0x8000000000000000LL)
1 by brian
clean slate
35
#define INIT_CNT  9
80.1.1 by Brian Aker
LL() cleanup
36
#define LFACTOR   1000000000ULL
37
#define LFACTOR1  10000000000ULL
38
#define LFACTOR2  100000000000ULL
1 by brian
clean slate
39
40
#define REPLACEMENT_CHAR 0xFFFD;
41
42
43
#if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2)
44
#define HAVE_CHARSET_mb2
45
#endif
46
47
48
#if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32)
49
#define HAVE_CHARSET_mb2_or_mb4
50
#endif
51
52
53
#ifdef HAVE_CHARSET_mb2_or_mb4
54
static inline int
55
my_bincmp(const uchar *s, const uchar *se,
56
          const uchar *t, const uchar *te)
57
{
58
  int slen= (int) (se - s), tlen= (int) (te - t);
59
  int len= min(slen, tlen);
60
  int cmp= memcmp(s, t, len);
61
  return cmp ? cmp : slen - tlen;
62
}
63
64
65
static size_t
66
my_caseup_str_mb2_or_mb4(CHARSET_INFO * cs  __attribute__((unused)), 
67
                         char * s __attribute__((unused)))
68
{
69
  DBUG_ASSERT(0);
70
  return 0;
71
}
72
73
74
static size_t
75
my_casedn_str_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)), 
76
                         char * s __attribute__((unused)))
77
{
78
  DBUG_ASSERT(0);
79
  return 0;
80
}
81
82
83
static int
84
my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
85
                         const char *s __attribute__((unused)),
86
                         const char *t __attribute__((unused)))
87
{
88
  DBUG_ASSERT(0);
89
  return 0;
90
}
91
92
93
static long
94
my_strntol_mb2_or_mb4(CHARSET_INFO *cs,
95
                      const char *nptr, size_t l, int base,
96
                      char **endptr, int *err)
97
{
98
  int      negative= 0;
99
  int      overflow;
100
  int      cnv;
101
  my_wc_t  wc;
102
  register unsigned int cutlim;
103
  register uint32 cutoff;
104
  register uint32 res;
105
  register const uchar *s= (const uchar*) nptr;
106
  register const uchar *e= (const uchar*) nptr+l;
107
  const uchar *save;
108
  
109
  *err= 0;
110
  do
111
  {
112
    if ((cnv= cs->cset->mb_wc(cs, &wc, s, e))>0)
113
    {
114
      switch (wc)
115
      {
116
        case ' ' : break;
117
        case '\t': break;
118
        case '-' : negative= !negative; break;
119
        case '+' : break;
120
        default  : goto bs;
121
      }
122
    } 
123
    else /* No more characters or bad multibyte sequence */
124
    {
125
      if (endptr != NULL )
126
        *endptr= (char*) s;
127
      err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
128
      return 0;
129
    } 
130
    s+= cnv;
131
  } while (1);
132
  
133
bs:
134
135
#ifdef NOT_USED  
136
  if (base <= 0 || base == 1 || base > 36)
137
    base = 10;
138
#endif
139
  
140
  overflow= 0;
141
  res= 0;
142
  save= s;
143
  cutoff= ((uint32)~0L) / (uint32) base;
144
  cutlim= (uint) (((uint32)~0L) % (uint32) base);
145
  
146
  do {
147
    if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
148
    {
149
      s+= cnv;
150
      if (wc >= '0' && wc <= '9')
151
        wc-= '0';
152
      else if (wc >= 'A' && wc <= 'Z')
153
        wc= wc - 'A' + 10;
154
      else if (wc >= 'a' && wc <= 'z')
155
        wc= wc - 'a' + 10;
156
      else
157
        break;
158
      if ((int)wc >= base)
159
        break;
160
      if (res > cutoff || (res == cutoff && wc > cutlim))
161
        overflow= 1;
162
      else
163
      {
164
        res*= (uint32) base;
165
        res+= wc;
166
      }
167
    }
168
    else if (cnv == MY_CS_ILSEQ)
169
    {
170
      if (endptr !=NULL )
171
        *endptr = (char*) s;
172
      err[0]= EILSEQ;
173
      return 0;
174
    } 
175
    else
176
    {
177
      /* No more characters */
178
      break;
179
    }
180
  } while(1);
181
  
182
  if (endptr != NULL)
183
    *endptr = (char *) s;
184
  
185
  if (s == save)
186
  {
187
    err[0]= EDOM;
188
    return 0L;
189
  }
190
  
191
  if (negative)
192
  {
193
    if (res > (uint32) INT_MIN32)
194
      overflow= 1;
195
  }
196
  else if (res > INT_MAX32)
197
    overflow= 1;
198
  
199
  if (overflow)
200
  {
201
    err[0]= ERANGE;
202
    return negative ? INT_MIN32 : INT_MAX32;
203
  }
204
  
205
  return (negative ? -((long) res) : (long) res);
206
}
207
208
209
static ulong
210
my_strntoul_mb2_or_mb4(CHARSET_INFO *cs,
211
                       const char *nptr, size_t l, int base, 
212
                       char **endptr, int *err)
213
{
214
  int      negative= 0;
215
  int      overflow;
216
  int      cnv;
217
  my_wc_t  wc;
218
  register unsigned int cutlim;
219
  register uint32 cutoff;
220
  register uint32 res;
221
  register const uchar *s= (const uchar*) nptr;
222
  register const uchar *e= (const uchar*) nptr + l;
223
  const uchar *save;
224
  
225
  *err= 0;
226
  do
227
  {
228
    if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
229
    {
230
      switch (wc)
231
      {
232
        case ' ' : break;
233
        case '\t': break;
234
        case '-' : negative= !negative; break;
235
        case '+' : break;
236
        default  : goto bs;
237
      }
238
    } 
239
    else /* No more characters or bad multibyte sequence */
240
    {
241
      if (endptr !=NULL )
242
        *endptr= (char*)s;
243
      err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM;
244
      return 0;
245
    } 
246
    s+= cnv;
247
  } while (1);
248
  
249
bs:
250
251
#ifdef NOT_USED
252
  if (base <= 0 || base == 1 || base > 36)
253
    base = 10;
254
#endif
255
256
  overflow= 0;
257
  res= 0;
258
  save= s;
259
  cutoff= ((uint32)~0L) / (uint32) base;
260
  cutlim= (uint) (((uint32)~0L) % (uint32) base);
261
  
262
  do
263
  {
264
    if ((cnv= cs->cset->mb_wc(cs, &wc, s, e)) > 0)
265
    {
266
      s+= cnv;
267
      if (wc >= '0' && wc <= '9')
268
        wc-= '0';
269
      else if (wc >= 'A' && wc <= 'Z')
270
        wc= wc - 'A' + 10;
271
      else if (wc >= 'a' && wc <= 'z')
272
        wc= wc - 'a' + 10;
273
      else
274
        break;
275
      if ((int) wc >= base)
276
        break;
277
      if (res > cutoff || (res == cutoff && wc > cutlim))
278
        overflow = 1;
279
      else
280
      {
281
        res*= (uint32) base;
282
        res+= wc;
283
      }
284
    }
285
    else if (cnv == MY_CS_ILSEQ)
286
    {
287
      if (endptr != NULL )
288
        *endptr= (char*)s;
289
      err[0]= EILSEQ;
290
      return 0;
291
    } 
292
    else
293
    {
294
      /* No more characters */
295
      break;
296
    }
297
  } while(1);
298
  
299
  if (endptr != NULL)
300
    *endptr= (char *) s;
301
  
302
  if (s == save)
303
  {
304
    err[0]= EDOM;
305
    return 0L;
306
  }
307
  
308
  if (overflow)
309
  {
310
    err[0]= (ERANGE);
311
    return (~(uint32) 0);
312
  }
313
  
314
  return (negative ? -((long) res) : (long) res);
315
}
316
317
318
static longlong 
319
my_strntoll_mb2_or_mb4(CHARSET_INFO *cs,
320
                       const char *nptr, size_t l, int base,
321
                       char **endptr, int *err)
322
{
323
  int      negative=0;
324
  int      overflow;
325
  int      cnv;
326
  my_wc_t  wc;
327
  register ulonglong    cutoff;
328
  register unsigned int cutlim;
329
  register ulonglong    res;
330
  register const uchar *s= (const uchar*) nptr;
331
  register const uchar *e= (const uchar*) nptr+l;
332
  const uchar *save;
333
  
334
  *err= 0;
335
  do
336
  {
337
    if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
338
    {
339
      switch (wc)
340
      {
341
        case ' ' : break;
342
        case '\t': break;
343
        case '-' : negative= !negative; break;
344
        case '+' : break;
345
        default  : goto bs;
346
      }
347
    } 
348
    else /* No more characters or bad multibyte sequence */
349
    {
350
      if (endptr !=NULL )
351
        *endptr = (char*)s;
352
      err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
353
      return 0;
354
    } 
355
    s+=cnv;
356
  } while (1);
357
  
358
bs:
359
360
#ifdef NOT_USED  
361
  if (base <= 0 || base == 1 || base > 36)
362
    base = 10;
363
#endif
364
365
  overflow = 0;
366
  res = 0;
367
  save = s;
368
  cutoff = (~(ulonglong) 0) / (unsigned long int) base;
369
  cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
370
371
  do {
372
    if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
373
    {
374
      s+=cnv;
375
      if ( wc>='0' && wc<='9')
376
        wc -= '0';
377
      else if ( wc>='A' && wc<='Z')
378
        wc = wc - 'A' + 10;
379
      else if ( wc>='a' && wc<='z')
380
        wc = wc - 'a' + 10;
381
      else
382
        break;
383
      if ((int)wc >= base)
384
        break;
385
      if (res > cutoff || (res == cutoff && wc > cutlim))
386
        overflow = 1;
387
      else
388
      {
389
        res *= (ulonglong) base;
390
        res += wc;
391
      }
392
    }
393
    else if (cnv==MY_CS_ILSEQ)
394
    {
395
      if (endptr !=NULL )
396
        *endptr = (char*)s;
397
      err[0]=EILSEQ;
398
      return 0;
399
    } 
400
    else
401
    {
402
      /* No more characters */
403
      break;
404
    }
405
  } while(1);
406
  
407
  if (endptr != NULL)
408
    *endptr = (char *) s;
409
  
410
  if (s == save)
411
  {
412
    err[0]=EDOM;
413
    return 0L;
414
  }
415
  
416
  if (negative)
417
  {
418
    if (res  > (ulonglong) LONGLONG_MIN)
419
      overflow = 1;
420
  }
421
  else if (res > (ulonglong) LONGLONG_MAX)
422
    overflow = 1;
423
  
424
  if (overflow)
425
  {
426
    err[0]=ERANGE;
427
    return negative ? LONGLONG_MIN : LONGLONG_MAX;
428
  }
429
  
430
  return (negative ? -((longlong)res) : (longlong)res);
431
}
432
433
434
static ulonglong
435
my_strntoull_mb2_or_mb4(CHARSET_INFO *cs,
436
                        const char *nptr, size_t l, int base,
437
                        char **endptr, int *err)
438
{
439
  int      negative= 0;
440
  int      overflow;
441
  int      cnv;
442
  my_wc_t  wc;
443
  register ulonglong    cutoff;
444
  register unsigned int cutlim;
445
  register ulonglong    res;
446
  register const uchar *s= (const uchar*) nptr;
447
  register const uchar *e= (const uchar*) nptr + l;
448
  const uchar *save;
449
  
450
  *err= 0;
451
  do
452
  {
453
    if ((cnv= cs->cset->mb_wc(cs,&wc,s,e)) > 0)
454
    {
455
      switch (wc)
456
      {
457
        case ' ' : break;
458
        case '\t': break;
459
        case '-' : negative= !negative; break;
460
        case '+' : break;
461
        default  : goto bs;
462
      }
463
    } 
464
    else /* No more characters or bad multibyte sequence */
465
    {
466
      if (endptr !=NULL )
467
        *endptr = (char*)s;
468
      err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM;
469
      return 0;
470
    } 
471
    s+=cnv;
472
  } while (1);
473
  
474
bs:
475
  
476
#ifdef NOT_USED
477
  if (base <= 0 || base == 1 || base > 36)
478
    base = 10;
479
#endif
480
481
  overflow = 0;
482
  res = 0;
483
  save = s;
484
  cutoff = (~(ulonglong) 0) / (unsigned long int) base;
485
  cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base);
486
487
  do
488
  {
489
    if ((cnv=cs->cset->mb_wc(cs,&wc,s,e))>0)
490
    {
491
      s+=cnv;
492
      if ( wc>='0' && wc<='9')
493
        wc -= '0';
494
      else if ( wc>='A' && wc<='Z')
495
        wc = wc - 'A' + 10;
496
      else if ( wc>='a' && wc<='z')
497
        wc = wc - 'a' + 10;
498
      else
499
        break;
500
      if ((int)wc >= base)
501
        break;
502
      if (res > cutoff || (res == cutoff && wc > cutlim))
503
        overflow = 1;
504
      else
505
      {
506
        res *= (ulonglong) base;
507
        res += wc;
508
      }
509
    }
510
    else if (cnv==MY_CS_ILSEQ)
511
    {
512
      if (endptr !=NULL )
513
        *endptr = (char*)s;
514
      err[0]= EILSEQ;
515
      return 0;
516
    } 
517
    else
518
    {
519
      /* No more characters */
520
      break;
521
    }
522
  } while(1);
523
  
524
  if (endptr != NULL)
525
    *endptr = (char *) s;
526
  
527
  if (s == save)
528
  {
529
    err[0]= EDOM;
530
    return 0L;
531
  }
532
  
533
  if (overflow)
534
  {
535
    err[0]= ERANGE;
536
    return (~(ulonglong) 0);
537
  }
538
539
  return (negative ? -((longlong) res) : (longlong) res);
540
}
541
542
543
static double
544
my_strntod_mb2_or_mb4(CHARSET_INFO *cs,
545
                      char *nptr, size_t length, 
546
                      char **endptr, int *err)
547
{
548
  char     buf[256];
549
  double   res;
550
  register char *b= buf;
551
  register const uchar *s= (const uchar*) nptr;
552
  const uchar *end;
553
  my_wc_t  wc;
554
  int     cnv;
555
556
  *err= 0;
557
  /* Cut too long strings */
558
  if (length >= sizeof(buf))
559
    length= sizeof(buf) - 1;
560
  end= s + length;
561
562
  while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
563
  {
564
    s+= cnv;
565
    if (wc > (int) (uchar) 'e' || !wc)
566
      break;                                        /* Can't be part of double */
567
    *b++= (char) wc;
568
  }
569
570
  *endptr= b;
571
  res= my_strtod(buf, endptr, err);
572
  *endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf);
573
  return res;
574
}
575
576
577
static ulonglong
578
my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO *cs,
579
                             const char *nptr, size_t length,
580
                             int unsign_fl,
581
                             char **endptr, int *err)
582
{
583
  char  buf[256], *b= buf;
584
  ulonglong res;
585
  const uchar *end, *s= (const uchar*) nptr;
586
  my_wc_t  wc;
587
  int     cnv;
588
589
  /* Cut too long strings */
590
  if (length >= sizeof(buf))
591
    length= sizeof(buf)-1;
592
  end= s + length;
593
594
  while ((cnv= cs->cset->mb_wc(cs,&wc,s,end)) > 0)
595
  {
596
    s+= cnv;
597
    if (wc > (int) (uchar) 'e' || !wc)
598
      break;                            /* Can't be a number part */
599
    *b++= (char) wc;
600
  }
601
602
  res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err);
603
  *endptr= (char*) nptr + cs->mbminlen * (size_t) (*endptr - buf);
604
  return res;
605
}
606
607
608
/*
609
  This is a fast version optimized for the case of radix 10 / -10
610
*/
611
612
static size_t
613
my_l10tostr_mb2_or_mb4(CHARSET_INFO *cs,
614
                       char *dst, size_t len, int radix, long int val)
615
{
616
  char buffer[66];
617
  register char *p, *db, *de;
618
  long int new_val;
619
  int  sl= 0;
620
  unsigned long int uval = (unsigned long int) val;
621
  
622
  p= &buffer[sizeof(buffer) - 1];
623
  *p= '\0';
624
  
625
  if (radix < 0)
626
  {
627
    if (val < 0)
628
    {
629
      sl= 1;
630
      /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
631
      uval  = (unsigned long int)0 - uval;
632
    }
633
  }
634
  
635
  new_val = (long) (uval / 10);
636
  *--p    = '0'+ (char) (uval - (unsigned long) new_val * 10);
637
  val= new_val;
638
  
639
  while (val != 0)
640
  {
641
    new_val= val / 10;
642
    *--p= '0' + (char) (val - new_val * 10);
643
    val= new_val;
644
  }
645
  
646
  if (sl)
647
  {
648
    *--p= '-';
649
  }
650
  
651
  for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
652
  {
653
    int cnvres= cs->cset->wc_mb(cs,(my_wc_t)p[0],(uchar*) dst, (uchar*) de);
654
    if (cnvres > 0)
655
      dst+= cnvres;
656
    else
657
      break;
658
  }
659
  return (int) (dst - db);
660
}
661
662
663
static size_t
664
my_ll10tostr_mb2_or_mb4(CHARSET_INFO *cs,
665
                        char *dst, size_t len, int radix, longlong val)
666
{
667
  char buffer[65];
668
  register char *p, *db, *de;
669
  long long_val;
670
  int sl= 0;
671
  ulonglong uval= (ulonglong) val;
672
  
673
  if (radix < 0)
674
  {
675
    if (val < 0)
676
    {
677
      sl= 1;
678
      /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */
679
      uval = (ulonglong)0 - uval;
680
    }
681
  }
682
  
683
  p= &buffer[sizeof(buffer)-1];
684
  *p='\0';
685
  
686
  if (uval == 0)
687
  {
688
    *--p= '0';
689
    goto cnv;
690
  }
691
  
692
  while (uval > (ulonglong) LONG_MAX)
693
  {
694
    ulonglong quo= uval/(uint) 10;
695
    uint rem= (uint) (uval- quo* (uint) 10);
696
    *--p= '0' + rem;
697
    uval= quo;
698
  }
699
  
700
  long_val= (long) uval;
701
  while (long_val != 0)
702
  {
703
    long quo= long_val/10;
704
    *--p= (char) ('0' + (long_val - quo*10));
705
    long_val= quo;
706
  }
707
  
708
cnv:
709
  if (sl)
710
  {
711
    *--p= '-';
712
  }
713
  
714
  for ( db= dst, de= dst + len ; (dst < de) && *p ; p++)
715
  {
716
    int cnvres= cs->cset->wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de);
717
    if (cnvres > 0)
718
      dst+= cnvres;
719
    else
720
      break;
721
  }
722
  return (int) (dst -db);
723
}
724
725
#endif
726
727
728
#ifdef HAVE_CHARSET_mb2
729
static longlong
730
my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)),
731
                 const char *nptr, char **endptr, int *error)
732
{
733
  const char *s, *end, *start, *n_end, *true_end;
734
  uchar c;
735
  unsigned long i, j, k;
736
  ulonglong li;
737
  int negative;
738
  ulong cutoff, cutoff2, cutoff3;
739
740
  s= nptr;
741
  /* If fixed length string */
742
  if (endptr)
743
  {
744
    /* Make sure string length is even */
745
    end= s + ((*endptr - s) / 2) * 2;
746
    while (s < end && !s[0] && (s[1] == ' ' || s[1] == '\t'))
747
      s+= 2;
748
    if (s == end)
749
      goto no_conv;
750
  }
751
  else
752
  {
753
     /* We don't support null terminated strings in UCS2 */
754
     goto no_conv;
755
  }
756
757
  /* Check for a sign. */
758
  negative= 0;
759
  if (!s[0] && s[1] == '-')
760
  {
761
    *error= -1;                                        /* Mark as negative number */
762
    negative= 1;
763
    s+= 2;
764
    if (s == end)
765
      goto no_conv;
766
    cutoff=  MAX_NEGATIVE_NUMBER / LFACTOR2;
767
    cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
768
    cutoff3=  MAX_NEGATIVE_NUMBER % 100;
769
  }
770
  else
771
  {
772
    *error= 0;
773
    if (!s[0] && s[1] == '+')
774
    {
775
      s+= 2;
776
      if (s == end)
777
        goto no_conv;
778
    }
779
    cutoff=  ULONGLONG_MAX / LFACTOR2;
780
    cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
781
    cutoff3=  ULONGLONG_MAX % 100;
782
  }
783
784
  /* Handle case where we have a lot of pre-zero */
785
  if (!s[0] && s[1] == '0')
786
  {
787
    i= 0;
788
    do
789
    {
790
      s+= 2;
791
      if (s == end)
792
        goto end_i;                                /* Return 0 */
793
    }
794
    while (!s[0] && s[1] == '0');
795
    n_end= s + 2 * INIT_CNT;
796
  }
797
  else
798
  {
799
    /* Read first digit to check that it's a valid number */
800
    if (s[0] || (c= (s[1]-'0')) > 9)
801
      goto no_conv;
802
    i= c;
803
    s+= 2;
804
    n_end= s + 2 * (INIT_CNT-1);
805
  }
806
807
  /* Handle first 9 digits and store them in i */
808
  if (n_end > end)
809
    n_end= end;
810
  for (; s != n_end ; s+= 2)
811
  {
812
    if (s[0] || (c= (s[1]-'0')) > 9)
813
      goto end_i;
814
    i= i*10+c;
815
  }
816
  if (s == end)
817
    goto end_i;
818
819
  /* Handle next 9 digits and store them in j */
820
  j= 0;
821
  start= s;                                /* Used to know how much to shift i */
822
  n_end= true_end= s + 2 * INIT_CNT;
823
  if (n_end > end)
824
    n_end= end;
825
  do
826
  {
827
    if (s[0] || (c= (s[1]-'0')) > 9)
828
      goto end_i_and_j;
829
    j= j*10+c;
830
    s+= 2;
831
  } while (s != n_end);
832
  if (s == end)
833
  {
834
    if (s != true_end)
835
      goto end_i_and_j;
836
    goto end3;
837
  }
838
  if (s[0] || (c= (s[1]-'0')) > 9)
839
    goto end3;
840
841
  /* Handle the next 1 or 2 digits and store them in k */
842
  k=c;
843
  s+= 2;
844
  if (s == end || s[0] || (c= (s[1]-'0')) > 9)
845
    goto end4;
846
  k= k*10+c;
847
  s+= 2;
848
  *endptr= (char*) s;
849
850
  /* number string should have ended here */
851
  if (s != end && !s[0] && (c= (s[1]-'0')) <= 9)
852
    goto overflow;
853
854
  /* Check that we didn't get an overflow with the last digit */
855
  if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
856
                                     k > cutoff3)))
857
    goto overflow;
858
  li=i*LFACTOR2+ (ulonglong) j*100 + k;
859
  return (longlong) li;
860
861
overflow:                                        /* *endptr is set here */
862
  *error= MY_ERRNO_ERANGE;
863
  return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
864
865
end_i:
866
  *endptr= (char*) s;
867
  return (negative ? ((longlong) -(long) i) : (longlong) i);
868
869
end_i_and_j:
870
  li= (ulonglong) i * lfactor[(size_t) (s-start) / 2] + j;
871
  *endptr= (char*) s;
872
  return (negative ? -((longlong) li) : (longlong) li);
873
874
end3:
875
  li=(ulonglong) i*LFACTOR+ (ulonglong) j;
876
  *endptr= (char*) s;
877
  return (negative ? -((longlong) li) : (longlong) li);
878
879
end4:
880
  li=(ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
881
  *endptr= (char*) s;
882
  if (negative)
883
  {
884
   if (li > MAX_NEGATIVE_NUMBER)
885
     goto overflow;
886
   return -((longlong) li);
887
  }
888
  return (longlong) li;
889
890
no_conv:
891
  /* There was no number to convert.  */
892
  *error= MY_ERRNO_EDOM;
893
  *endptr= (char *) nptr;
894
  return 0;
895
}
896
897
898
static size_t
899
my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)),
900
            const char *str, const char *end, int sequence_type)
901
{
902
  const char *str0= str;
903
  end--; /* for easier loop condition, because of two bytes per character */
904
  
905
  switch (sequence_type)
906
  {
907
  case MY_SEQ_SPACES:
908
    for ( ; str < end; str+= 2)
909
    {
910
      if (str[0] != '\0' || str[1] != ' ')
911
        break;
912
    }
913
    return (size_t) (str - str0);
914
  default:
915
    return 0;
916
  }
917
}
918
919
920
static void
921
my_fill_mb2(CHARSET_INFO *cs __attribute__((unused)),
922
            char *s, size_t l, int fill)
923
{
924
  for ( ; l >= 2; s[0]= 0, s[1]= fill, s+= 2, l-= 2);
925
}
926
927
928
static int
929
my_vsnprintf_mb2(char *dst, size_t n, const char* fmt, va_list ap)
930
{
931
  char *start=dst, *end= dst + n - 1;
932
  for (; *fmt ; fmt++)
933
  {
934
    if (fmt[0] != '%')
935
    {
936
      if (dst == end)                     /* End of buffer */
937
        break;
938
      
939
      *dst++='\0';
940
      *dst++= *fmt;          /* Copy ordinary char */
941
      continue;
942
    }
943
    
944
    fmt++;
945
    
946
    /* Skip if max size is used (to be compatible with printf) */
947
    while ( (*fmt >= '0' && *fmt <= '9') || *fmt == '.' || *fmt == '-')
948
      fmt++;
949
    
950
    if (*fmt == 'l')
951
      fmt++;
952
    
953
    if (*fmt == 's')                      /* String parameter */
954
    {
955
      char *par= va_arg(ap, char *);
956
      size_t plen;
957
      size_t left_len= (size_t)(end-dst);
958
      if (!par)
959
        par= (char*) "(null)";
960
      plen= strlen(par);
961
      if (left_len <= plen * 2)
962
        plen = left_len / 2 - 1;
963
964
      for ( ; plen ; plen--, dst+=2, par++)
965
      {
966
        dst[0]= '\0';
967
        dst[1]= par[0];
968
      }
969
      continue;
970
    }
971
    else if (*fmt == 'd' || *fmt == 'u')  /* Integer parameter */
972
    {
973
      int iarg;
974
      char nbuf[16];
975
      char *pbuf= nbuf;
976
      
977
      if ((size_t) (end - dst) < 32)
978
        break;
979
      iarg= va_arg(ap, int);
980
      if (*fmt == 'd')
981
        int10_to_str((long) iarg, nbuf, -10);
982
      else
983
        int10_to_str((long) (uint) iarg, nbuf,10);
984
985
      for (; pbuf[0]; pbuf++)
986
      {
987
        *dst++= '\0';
988
        *dst++= *pbuf;
989
      }
990
      continue;
991
    }
992
    
993
    /* We come here on '%%', unknown code or too long parameter */
994
    if (dst == end)
995
      break;
996
    *dst++= '\0';
997
    *dst++= '%';                            /* % used as % or unknown code */
998
  }
999
  
1000
  DBUG_ASSERT(dst <= end);
1001
  *dst='\0';                                /* End of errmessage */
1002
  return (size_t) (dst - start);
1003
}
1004
1005
1006
static size_t
1007
my_snprintf_mb2(CHARSET_INFO *cs __attribute__((unused)),
77.1.18 by Monty Taylor
Removed my_vsnprintf and my_snprintf.
1008
             char* to, size_t n, const char* fmt, ...)
1 by brian
clean slate
1009
{
1010
  va_list args;
1011
  va_start(args,fmt);
1012
  return my_vsnprintf_mb2(to, n, fmt, args);
1013
}
1014
1015
1016
static size_t
1017
my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)),
1018
                const char *ptr, size_t length)
1019
{
1020
  const char *end= ptr + length;
1021
  while (end > ptr + 1 && end[-1] == ' ' && end[-2] == '\0')
1022
    end-= 2;
1023
  return (size_t) (end - ptr);
1024
}
1025
1026
#endif
1027
1028
1029
#ifdef HAVE_CHARSET_utf16
1030
1031
/*
1032
  D800..DB7F - Non-provate surrogate high (896 pages)
1033
  DB80..DBFF - Private surrogate high     (128 pages)
1034
  DC00..DFFF - Surrogate low              (1024 codes in a page)
1035
*/
1036
1037
#define MY_UTF16_HIGH_HEAD(x)  ((((uchar) (x)) & 0xFC) == 0xD8)
1038
#define MY_UTF16_LOW_HEAD(x)   ((((uchar) (x)) & 0xFC) == 0xDC)
1039
#define MY_UTF16_SURROGATE(x)  (((x) & 0xF800) == 0xD800)
1040
1041
static int
1042
my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)),
1043
             my_wc_t *pwc, const uchar *s, const uchar *e)
1044
{
1045
  if (s + 2 > e)
1046
    return MY_CS_TOOSMALL2;
1047
  
1048
  /*
1049
    High bytes: 0xD[89AB] = B'110110??'
1050
    Low bytes:  0xD[CDEF] = B'110111??'
1051
    Surrogate mask:  0xFC = B'11111100'
1052
  */
1053
1054
  if (MY_UTF16_HIGH_HEAD(*s)) /* Surrogate head */
1055
  {
1056
    if (s + 4 > e)
1057
      return MY_CS_TOOSMALL4;
1058
1059
    if (!MY_UTF16_LOW_HEAD(s[2]))  /* Broken surrigate pair */
1060
      return MY_CS_ILSEQ;
1061
1062
    /*
1063
      s[0]= 110110??  (<< 18)
1064
      s[1]= ????????  (<< 10)
1065
      s[2]= 110111??  (<<  8)
1066
      s[3]= ????????  (<<  0)
1067
    */ 
1068
1069
    *pwc= ((s[0] & 3) << 18) + (s[1] << 10) +
1070
          ((s[2] & 3) << 8) + s[3] + 0x10000;
1071
1072
    return 4;
1073
  }
1074
1075
  if (MY_UTF16_LOW_HEAD(*s)) /* Low surrogate part without high part */
1076
    return MY_CS_ILSEQ;
1077
  
1078
  *pwc= (s[0] << 8) + s[1];
1079
  return 2;
1080
}
1081
1082
1083
static int
1084
my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)),
1085
             my_wc_t wc, uchar *s, uchar *e)
1086
{
1087
  if (wc <= 0xFFFF)
1088
  {
1089
    if (s + 2 > e)
1090
      return MY_CS_TOOSMALL2;
1091
    if (MY_UTF16_SURROGATE(wc))
1092
      return MY_CS_ILUNI;
1093
    *s++= (uchar) (wc >> 8);
1094
    *s= (uchar) (wc & 0xFF);
1095
    return 2;
1096
  }
1097
1098
  if (wc <= 0x10FFFF)
1099
  {
1100
    if (s + 4 > e)
1101
      return MY_CS_TOOSMALL4;
1102
    *s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8;
1103
    *s++= (uchar) (wc >> 10) & 0xFF;
1104
    *s++= (uchar) ((wc >> 8) & 3) | 0xDC;
1105
    *s= (uchar) wc & 0xFF;
1106
    return 4;
1107
  }
1108
1109
  return MY_CS_ILUNI;
1110
}
1111
1112
1113
static inline void
1114
my_tolower_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
1115
{
1116
  int page= *wc >> 8;
1117
  if (page < 256 && uni_plane[page])
1118
    *wc= uni_plane[page][*wc & 0xFF].tolower;
1119
}
1120
1121
1122
static inline void
1123
my_toupper_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
1124
{
1125
  int page= *wc >> 8;
1126
  if (page < 256 && uni_plane[page])
1127
    *wc= uni_plane[page][*wc & 0xFF].toupper;
1128
}
1129
1130
1131
static inline void
1132
my_tosort_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
1133
{
1134
  int page= *wc >> 8;
1135
  if (page < 256)
1136
  {
1137
    if (uni_plane[page])
1138
      *wc= uni_plane[page][*wc & 0xFF].sort;
1139
  }
1140
  else
1141
  {
1142
    *wc= REPLACEMENT_CHAR;
1143
  }
1144
}
1145
1146
1147
static size_t
1148
my_caseup_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
1149
                char *dst __attribute__((unused)),
1150
                size_t dstlen __attribute__((unused)))
1151
{
1152
  my_wc_t wc;
1153
  int res;
1154
  char *srcend= src + srclen;
1155
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
1156
  DBUG_ASSERT(src == dst && srclen == dstlen);
1157
  
1158
  while ((src < srcend) &&
1159
         (res= my_utf16_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
1160
  {
1161
    my_toupper_utf16(uni_plane, &wc);
1162
    if (res != my_uni_utf16(cs, wc, (uchar*) src, (uchar*) srcend))
1163
      break;
1164
    src+= res;
1165
  }
1166
  return srclen;
1167
}
1168
1169
1170
static void
1171
my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen,
1172
                   ulong *n1, ulong *n2)
1173
{
1174
  my_wc_t wc;
1175
  int res;
1176
  const uchar *e= s+slen;
1177
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
1178
1179
  while (e > s + 1 && e[-1] == ' ' && e[-2] == '\0')
1180
    e-= 2;
1181
1182
  while ((s < e) && (res= my_utf16_uni(cs, &wc, (uchar *)s, (uchar*)e)) > 0)
1183
  {
1184
    my_tosort_utf16(uni_plane, &wc);
1185
    n1[0]^= (((n1[0] & 63) + n2[0]) * (wc & 0xFF)) + (n1[0] << 8);
1186
    n2[0]+= 3;
1187
    n1[0]^= (((n1[0] & 63) + n2[0]) * (wc >> 8)) + (n1[0] << 8);
1188
    n2[0]+= 3;
1189
    s+= res;
1190
  }
1191
}
1192
1193
1194
static size_t
1195
my_casedn_utf16(CHARSET_INFO *cs, char *src, size_t srclen,
1196
                char *dst __attribute__((unused)),
1197
                size_t dstlen __attribute__((unused)))
1198
{
1199
  my_wc_t wc;
1200
  int res;
1201
  char *srcend= src + srclen;
1202
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
1203
  DBUG_ASSERT(src == dst && srclen == dstlen);
1204
1205
  while ((src < srcend) &&
1206
         (res= my_utf16_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
1207
  {
1208
    my_tolower_utf16(uni_plane, &wc);
1209
    if (res != my_uni_utf16(cs, wc, (uchar*) src, (uchar*) srcend))
1210
      break;
1211
    src+= res;
1212
  }
1213
  return srclen;
1214
}
1215
1216
1217
static int
1218
my_strnncoll_utf16(CHARSET_INFO *cs, 
1219
                   const uchar *s, size_t slen, 
1220
                   const uchar *t, size_t tlen,
1221
                   my_bool t_is_prefix)
1222
{
1223
  int s_res, t_res;
6 by Brian Aker
Second pass on pthread cleanup
1224
  my_wc_t s_wc= 0,t_wc= 0;
1 by brian
clean slate
1225
  const uchar *se= s + slen;
1226
  const uchar *te= t + tlen;
1227
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
1228
1229
  while (s < se && t < te)
1230
  {
1231
    s_res= my_utf16_uni(cs, &s_wc, s, se);
1232
    t_res= my_utf16_uni(cs, &t_wc, t, te);
1233
1234
    if (s_res <= 0 || t_res <= 0)
1235
    {
1236
      /* Incorrect string, compare by char value */
1237
      return my_bincmp(s, se, t, te);
1238
    }
1239
1240
    my_tosort_utf16(uni_plane, &s_wc);
1241
    my_tosort_utf16(uni_plane, &t_wc);
1242
1243
    if (s_wc != t_wc)
1244
    {
1245
      return  s_wc > t_wc ? 1 : -1;
1246
    }
1247
1248
    s+= s_res;
1249
    t+= t_res;
1250
  }
1251
  return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
1252
}
1253
1254
1255
/**
1256
  Compare strings, discarding end space
1257
1258
  If one string is shorter as the other, then we space extend the other
1259
  so that the strings have equal length.
1260
1261
  This will ensure that the following things hold:
1262
1263
    "a"  == "a "
1264
    "a\0" < "a"
1265
    "a\0" < "a "
1266
1267
  @param  cs        Character set pinter.
1268
  @param  a         First string to compare.
1269
  @param  a_length  Length of 'a'.
1270
  @param  b         Second string to compare.
1271
  @param  b_length  Length of 'b'.
1272
1273
  IMPLEMENTATION
1274
1275
  @return Comparison result.
1276
    @retval Negative number, if a less than b.
1277
    @retval 0, if a is equal to b
1278
    @retval Positive number, if a > b
1279
*/
1280
1281
static int
1282
my_strnncollsp_utf16(CHARSET_INFO *cs,
1283
                     const uchar *s, size_t slen,
1284
                     const uchar *t, size_t tlen,
1285
                     my_bool diff_if_only_endspace_difference)
1286
{
1287
  int res;
6 by Brian Aker
Second pass on pthread cleanup
1288
  my_wc_t s_wc= 0, t_wc= 0;
1 by brian
clean slate
1289
  const uchar *se= s + slen, *te= t + tlen;
1290
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
1291
1292
  DBUG_ASSERT((slen % 2) == 0);
1293
  DBUG_ASSERT((tlen % 2) == 0);
1294
1295
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
1296
  diff_if_only_endspace_difference= FALSE;
1297
#endif
1298
1299
  while (s < se && t < te)
1300
  {
1301
    int s_res= my_utf16_uni(cs, &s_wc, s, se);
1302
    int t_res= my_utf16_uni(cs, &t_wc, t, te);
1303
1304
    if (s_res <= 0 || t_res <= 0)
1305
    {
1306
      /* Incorrect string, compare bytewise */
1307
      return my_bincmp(s, se, t, te);
1308
    }
1309
1310
    my_tosort_utf16(uni_plane, &s_wc);
1311
    my_tosort_utf16(uni_plane, &t_wc);
1312
    
1313
    if (s_wc != t_wc)
1314
    {
1315
      return s_wc > t_wc ? 1 : -1;
1316
    }
1317
1318
    s+= s_res;
1319
    t+= t_res;
1320
  }
1321
1322
  slen= (size_t) (se - s);
1323
  tlen= (size_t) (te - t);
1324
  res= 0;
1325
1326
  if (slen != tlen)
1327
  {
1328
    int s_res, swap= 1;
1329
    if (diff_if_only_endspace_difference)
1330
      res= 1;                                   /* Assume 's' is bigger */
1331
    if (slen < tlen)
1332
    {
1333
      slen= tlen;
1334
      s= t;
1335
      se= te;
1336
      swap= -1;
1337
      res= -res;
1338
    }
1339
1340
    for ( ; s < se; s+= s_res)
1341
    {
1342
      if ((s_res= my_utf16_uni(cs, &s_wc, s, se)) < 0)
1343
      {
1344
        DBUG_ASSERT(0);
1345
        return 0;
1346
      }
1347
      if (s_wc != ' ')
1348
        return (s_wc < ' ') ? -swap : swap;
1349
    }
1350
  }
1351
  return res;
1352
}
1353
1354
1355
static size_t
1356
my_strnxfrm_utf16(CHARSET_INFO *cs, 
1357
                  uchar *dst, size_t dstlen, uint nweights,
1358
                  const uchar *src, size_t srclen, uint flags)
1359
{
6 by Brian Aker
Second pass on pthread cleanup
1360
  my_wc_t wc= 0;
1 by brian
clean slate
1361
  int res;
1362
  uchar *de= dst + dstlen;
1363
  uchar *d0= dst;
1364
  const uchar *se= src + srclen;
1365
  MY_UNICASE_INFO **uni_plane= (cs->state & MY_CS_BINSORT) ?
1366
                               NULL : cs->caseinfo;
1367
1368
  for (; src < se && dst < de && nweights; nweights--)
1369
  {
1370
    if ((res= my_utf16_uni(cs,&wc, src, se))<0)
1371
      break;
1372
    src+= res;
1373
    
1374
    if (uni_plane)
1375
      my_tosort_utf16(uni_plane, &wc);
1376
    
1377
    if (dst + 2 >= de)
1378
      break;
1379
    
1380
    *dst++= (uchar) (wc >> 8);
1381
    *dst++= (uchar) (wc & 0xFF);
1382
  }
1383
  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de,
1384
                                         nweights, flags, 0);
1385
}
1386
1387
1388
static uint
1389
my_ismbchar_utf16(CHARSET_INFO *cs __attribute__((unused)),
1390
                  const char *b __attribute__((unused)),
1391
                  const char *e __attribute__((unused)))
1392
{
1393
  if (b + 2 > e)
1394
    return 0;
1395
  
1396
  if (MY_UTF16_HIGH_HEAD(*b))
1397
  {
1398
    return (b + 4 <= e) && MY_UTF16_LOW_HEAD(b[2]) ? 4 : 0;
1399
  }
1400
  
1401
  if (MY_UTF16_LOW_HEAD(*b))
1402
    return 0;
1403
  
1404
  return 2;
1405
}
1406
1407
1408
static uint
1409
my_mbcharlen_utf16(CHARSET_INFO *cs  __attribute__((unused)),
1410
                   uint c __attribute__((unused)))
1411
{
1412
  return MY_UTF16_HIGH_HEAD(c) ? 4 : 2;
1413
}
1414
1415
1416
static size_t
1417
my_numchars_utf16(CHARSET_INFO *cs,
1418
                  const char *b, const char *e)
1419
{
1420
  size_t nchars= 0;
1421
  for ( ; ; nchars++)
1422
  {
1423
    size_t charlen= my_ismbchar_utf16(cs, b, e);
1424
    if (!charlen)
1425
      break;
1426
    b+= charlen;
1427
  }
1428
  return nchars;
1429
}
1430
1431
1432
static size_t
1433
my_charpos_utf16(CHARSET_INFO *cs,
1434
                 const char *b, const char *e, size_t pos)
1435
{
1436
  const char *b0= b;
1437
  uint charlen;
1438
  
1439
  for ( ; pos; b+= charlen, pos--)
1440
  {
1441
    if (!(charlen= my_ismbchar(cs, b, e)))
1442
      return (e + 2 - b0); /* Error, return pos outside the string */
1443
  }
1444
  return (size_t) (pos ? (e + 2 - b0) : (b - b0));
1445
}
1446
1447
1448
static size_t
1449
my_well_formed_len_utf16(CHARSET_INFO *cs,
1450
                         const char *b, const char *e,
1451
                         size_t nchars, int *error)
1452
{
1453
  const char *b0= b;
1454
  uint charlen;
1455
  *error= 0;
1456
  
1457
  for ( ; nchars; b+= charlen, nchars--)
1458
  {
1459
    if (!(charlen= my_ismbchar(cs, b, e)))
1460
    {
1461
      *error= b < e ? 1 : 0;
1462
      break;
1463
    }
1464
  }
1465
  return (size_t) (b - b0);
1466
}
1467
1468
1469
static int
1470
my_wildcmp_utf16_ci(CHARSET_INFO *cs,
1471
                    const char *str,const char *str_end,
1472
                    const char *wildstr,const char *wildend,
1473
                    int escape, int w_one, int w_many)
1474
{
1475
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
1476
  return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1477
                            escape, w_one, w_many, uni_plane); 
1478
}
1479
1480
1481
static int
1482
my_wildcmp_utf16_bin(CHARSET_INFO *cs,
1483
                     const char *str,const char *str_end,
1484
                     const char *wildstr,const char *wildend,
1485
                     int escape, int w_one, int w_many)
1486
{
1487
  return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
1488
                            escape, w_one, w_many, NULL); 
1489
}
1490
1491
1492
static int
1493
my_strnncoll_utf16_bin(CHARSET_INFO *cs, 
1494
                       const uchar *s, size_t slen,
1495
                       const uchar *t, size_t tlen,
1496
                       my_bool t_is_prefix)
1497
{
1498
  int s_res,t_res;
6 by Brian Aker
Second pass on pthread cleanup
1499
  my_wc_t s_wc= 0,t_wc= 0;
1 by brian
clean slate
1500
  const uchar *se=s+slen;
1501
  const uchar *te=t+tlen;
1502
1503
  while ( s < se && t < te )
1504
  {
1505
    s_res= my_utf16_uni(cs,&s_wc, s, se);
1506
    t_res= my_utf16_uni(cs,&t_wc, t, te);
1507
    
1508
    if (s_res <= 0 || t_res <= 0)
1509
    {
1510
      /* Incorrect string, compare by char value */
1511
      return my_bincmp(s, se, t, te);
1512
    }
1513
    if (s_wc != t_wc)
1514
    {
1515
      return  s_wc > t_wc ? 1 : -1;
1516
    }
1517
    
1518
    s+= s_res;
1519
    t+= t_res;
1520
  }
1521
  return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
1522
}
1523
1524
1525
static int
1526
my_strnncollsp_utf16_bin(CHARSET_INFO *cs,
1527
                         const uchar *s, size_t slen,
1528
                         const uchar *t, size_t tlen,
1529
                         my_bool diff_if_only_endspace_difference)
1530
{
1531
  int res;
6 by Brian Aker
Second pass on pthread cleanup
1532
  my_wc_t s_wc= 0, t_wc= 0;
1 by brian
clean slate
1533
  const uchar *se= s + slen, *te= t + tlen;
1534
1535
  DBUG_ASSERT((slen % 2) == 0);
1536
  DBUG_ASSERT((tlen % 2) == 0);
1537
1538
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
1539
  diff_if_only_endspace_difference= FALSE;
1540
#endif
1541
1542
  while (s < se && t < te)
1543
  {
1544
    int s_res= my_utf16_uni(cs, &s_wc, s, se);
1545
    int t_res= my_utf16_uni(cs, &t_wc, t, te);
1546
1547
    if (s_res <= 0 || t_res <= 0)
1548
    {
1549
      /* Incorrect string, compare bytewise */
1550
      return my_bincmp(s, se, t, te);
1551
    }
1552
1553
    if (s_wc != t_wc)
1554
    {
1555
      return s_wc > t_wc ? 1 : -1;
1556
    }
1557
1558
    s+= s_res;
1559
    t+= t_res;
1560
  }
1561
1562
  slen= (size_t) (se - s);
1563
  tlen= (size_t) (te - t);
1564
  res= 0;
1565
1566
  if (slen != tlen)
1567
  {
1568
    int s_res, swap= 1;
1569
    if (diff_if_only_endspace_difference)
1570
      res= 1;                                   /* Assume 's' is bigger */
1571
    if (slen < tlen)
1572
    {
1573
      slen= tlen;
1574
      s= t;
1575
      se= te;
1576
      swap= -1;
1577
      res= -res;
1578
    }
1579
1580
    for ( ; s < se; s+= s_res)
1581
    {
1582
      if ((s_res= my_utf16_uni(cs, &s_wc, s, se)) < 0)
1583
      {
1584
        DBUG_ASSERT(0);
1585
        return 0;
1586
      }
1587
      if (s_wc != ' ')
1588
        return (s_wc < ' ') ? -swap : swap;
1589
    }
1590
  }
1591
  return res;
1592
}
1593
1594
1595
static size_t
1596
my_strnxfrm_utf16_bin(CHARSET_INFO *cs,
1597
                      uchar *dst, size_t dstlen, uint nweights,
1598
                      const uchar *src, size_t srclen, uint flags)
1599
{
1600
  /* TODO */
1601
  uint frmlen;
1602
  if ((frmlen= min(dstlen, nweights * 2)) > srclen)
1603
    frmlen= srclen;
1604
  if (dst != src)
1605
    memcpy(dst, src, frmlen);
1606
  return my_strxfrm_pad_desc_and_reverse(cs, dst, dst + frmlen, dst + dstlen,
1607
                                         nweights - frmlen / 2, flags, 0);
1608
}
1609
1610
1611
static void
1612
my_hash_sort_utf16_bin(CHARSET_INFO *cs __attribute__((unused)),
1613
                       const uchar *key, size_t len,ulong *nr1, ulong *nr2)
1614
{
1615
  const uchar *pos = key;
1616
  
1617
  key+= len;
1618
1619
  while (key > pos + 1 && key[-1] == ' ' && key[-2] == '\0')
1620
    key-= 2;
1621
1622
  for (; pos < (uchar*) key ; pos++)
1623
  {
1624
    nr1[0]^= (ulong) ((((uint) nr1[0] & 63) + nr2[0]) * 
1625
              ((uint)*pos)) + (nr1[0] << 8);
1626
    nr2[0]+= 3;
1627
  }
1628
}
1629
1630
1631
/**
1632
   Calculate min_str and max_str that ranges a LIKE string.
1633
1634
   @param ptr        Pointer to LIKE pattern.
1635
   @param ptr_length Length of LIKE pattern.
1636
   @param escape     Escape character in LIKE.  (Normally '\').
1637
                     All escape characters should be removed
1638
                     from min_str and max_str.
1639
   @param res_length Length of min_str and max_str.
1640
   @param min_str    Smallest case sensitive string that ranges LIKE.
1641
                     Should be space padded to res_length.
1642
   @param max_str    Largest case sensitive string that ranges LIKE.
1643
                     Normally padded with the biggest character sort value.
1644
1645
   @return Optimization status.
1646
     @retval FALSE if LIKE pattern can be optimized
1647
     @rerval TRUE if LIKE can't be optimized.
1648
*/
1649
1650
my_bool
1651
my_like_range_utf16(CHARSET_INFO *cs,
1652
                    const char *ptr, size_t ptr_length,
1653
                    pbool escape, pbool w_one, pbool w_many,
1654
                    size_t res_length,
1655
                    char *min_str,char *max_str,
1656
                    size_t *min_length,size_t *max_length)
1657
{
1658
  const char *end=ptr+ptr_length;
1659
  char *min_org=min_str;
1660
  char *min_end=min_str+res_length;
1661
  size_t charlen= res_length / cs->mbmaxlen;
1662
  
1663
  for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
1664
        ; ptr+=2, charlen--)
1665
  {
1666
    if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
1667
    {
1668
      ptr+=2;                                        /* Skip escape */
1669
      *min_str++= *max_str++ = ptr[0];
1670
      *min_str++= *max_str++ = ptr[1];
1671
      continue;
1672
    }
1673
    if (ptr[0] == '\0' && ptr[1] == w_one)        /* '_' in SQL */
1674
    {
1675
      *min_str++= (char) (cs->min_sort_char >> 8);
1676
      *min_str++= (char) (cs->min_sort_char & 255);
1677
      *max_str++= (char) (cs->max_sort_char >> 8);
1678
      *max_str++= (char) (cs->max_sort_char & 255);
1679
      continue;
1680
    }
1681
    if (ptr[0] == '\0' && ptr[1] == w_many)        /* '%' in SQL */
1682
    {
1683
      /*
1684
        Calculate length of keys:
1685
        'a\0\0... is the smallest possible string when we have space expand
1686
        a\ff\ff... is the biggest possible string
1687
      */
1688
      *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
1689
                    res_length);
1690
      *max_length= res_length;
1691
      do {
1692
        *min_str++ = 0;
1693
        *min_str++ = 0;
1694
        *max_str++ = (char) (cs->max_sort_char >> 8);
1695
        *max_str++ = (char) (cs->max_sort_char & 255);
1696
      } while (min_str + 1 < min_end);
1697
      return FALSE;
1698
    }
1699
    *min_str++= *max_str++ = ptr[0];
1700
    *min_str++= *max_str++ = ptr[1];
1701
  }
1702
1703
  /* Temporary fix for handling w_one at end of string (key compression) */
1704
  {
1705
    char *tmp;
1706
    for (tmp= min_str ; tmp-1 > min_org && tmp[-1] == '\0' && tmp[-2]=='\0';)
1707
    {
1708
      *--tmp=' ';
1709
      *--tmp='\0';
1710
    }
1711
  }
1712
  
1713
  *min_length= *max_length = (size_t) (min_str - min_org);
1714
  while (min_str + 1 < min_end)
1715
  {
1716
    *min_str++ = *max_str++ = '\0';
1717
    *min_str++ = *max_str++ = ' ';      /* Because if key compression */
1718
  }
1719
  return FALSE;
1720
}
1721
1722
1723
static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
1724
{
1725
  NULL,                /* init */
1726
  my_strnncoll_utf16,
1727
  my_strnncollsp_utf16,
1728
  my_strnxfrm_utf16,
1729
  my_strnxfrmlen_simple,
1730
  my_like_range_utf16,
1731
  my_wildcmp_utf16_ci,
1732
  my_strcasecmp_mb2_or_mb4,
1733
  my_instr_mb,
1734
  my_hash_sort_utf16,
1735
  my_propagate_simple
1736
};
1737
1738
1739
static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
1740
{
1741
  NULL,                /* init */
1742
  my_strnncoll_utf16_bin,
1743
  my_strnncollsp_utf16_bin,
1744
  my_strnxfrm_utf16_bin,
1745
  my_strnxfrmlen_simple,
1746
  my_like_range_utf16,
1747
  my_wildcmp_utf16_bin,
1748
  my_strcasecmp_mb2_or_mb4,
1749
  my_instr_mb,
1750
  my_hash_sort_utf16_bin,
1751
  my_propagate_simple
1752
};
1753
1754
1755
MY_CHARSET_HANDLER my_charset_utf16_handler=
1756
{
1757
  NULL,                /* init         */
1758
  my_ismbchar_utf16,   /* ismbchar     */
1759
  my_mbcharlen_utf16,  /* mbcharlen    */
1760
  my_numchars_utf16,
1761
  my_charpos_utf16,
1762
  my_well_formed_len_utf16,
1763
  my_lengthsp_mb2,
1764
  my_numcells_mb,
1765
  my_utf16_uni,        /* mb_wc        */
1766
  my_uni_utf16,        /* wc_mb        */
1767
  my_mb_ctype_mb,
1768
  my_caseup_str_mb2_or_mb4,
1769
  my_casedn_str_mb2_or_mb4,
1770
  my_caseup_utf16,
1771
  my_casedn_utf16,
1772
  my_snprintf_mb2,
1773
  my_l10tostr_mb2_or_mb4,
1774
  my_ll10tostr_mb2_or_mb4,
1775
  my_fill_mb2,
1776
  my_strntol_mb2_or_mb4,
1777
  my_strntoul_mb2_or_mb4,
1778
  my_strntoll_mb2_or_mb4,
1779
  my_strntoull_mb2_or_mb4,
1780
  my_strntod_mb2_or_mb4,
1781
  my_strtoll10_mb2,
1782
  my_strntoull10rnd_mb2_or_mb4,
1783
  my_scan_mb2
1784
};
1785
1786
1787
CHARSET_INFO my_charset_utf16_general_ci=
1788
{
1789
  54,0,0,              /* number       */
1790
  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
1791
  "utf16",             /* cs name    */
1792
  "utf16_general_ci",  /* name         */
1793
  "UTF-16 Unicode",    /* comment      */
1794
  NULL,                /* tailoring    */
1795
  NULL,                /* ctype        */
1796
  NULL,                /* to_lower     */
1797
  NULL,                /* to_upper     */
1798
  NULL,                /* sort_order   */
1799
  NULL,                /* contractions */
1800
  NULL,                /* sort_order_big*/
1801
  NULL,                /* tab_to_uni   */
1802
  NULL,                /* tab_from_uni */
1803
  my_unicase_default,  /* caseinfo     */
1804
  NULL,                /* state_map    */
1805
  NULL,                /* ident_map    */
1806
  1,                   /* strxfrm_multiply */
1807
  1,                   /* caseup_multiply  */
1808
  1,                   /* casedn_multiply  */
1809
  2,                   /* mbminlen     */
1810
  4,                   /* mbmaxlen     */
1811
  0,                   /* min_sort_char */
1812
  0xFFFF,              /* max_sort_char */
1813
  ' ',                 /* pad char      */
1814
  0,                   /* escape_with_backslash_is_dangerous */
1815
  1,                   /* levels_for_compare */
1816
  1,                   /* levels_for_order   */
1817
  &my_charset_utf16_handler,
1818
  &my_collation_utf16_general_ci_handler
1819
};
1820
1821
1822
CHARSET_INFO my_charset_utf16_bin=
1823
{
1824
  55,0,0,              /* number       */
1825
  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
1826
  "utf16",             /* cs name      */
1827
  "utf16_bin",         /* name         */
1828
  "UTF-16 Unicode",    /* comment      */
1829
  NULL,                /* tailoring    */
1830
  NULL,                /* ctype        */
1831
  NULL,                /* to_lower     */
1832
  NULL,                /* to_upper     */
1833
  NULL,                /* sort_order   */
1834
  NULL,                /* contractions */
1835
  NULL,                /* sort_order_big*/
1836
  NULL,                /* tab_to_uni   */
1837
  NULL,                /* tab_from_uni */
1838
  my_unicase_default,  /* caseinfo     */
1839
  NULL,                /* state_map    */
1840
  NULL,                /* ident_map    */
1841
  1,                   /* strxfrm_multiply */
1842
  1,                   /* caseup_multiply  */
1843
  1,                   /* casedn_multiply  */
1844
  2,                   /* mbminlen     */
1845
  4,                   /* mbmaxlen     */
1846
  0,                   /* min_sort_char */
1847
  0xFFFF,              /* max_sort_char */
1848
  ' ',                 /* pad char      */
1849
  0,                   /* escape_with_backslash_is_dangerous */
1850
  1,                   /* levels_for_compare */
1851
  1,                   /* levels_for_order   */
1852
  &my_charset_utf16_handler,
1853
  &my_collation_utf16_bin_handler
1854
};
1855
1856
#endif /* HAVE_CHARSET_utf16 */
1857
1858
1859
#ifdef HAVE_CHARSET_utf32
1860
1861
static int
1862
my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
1863
             my_wc_t *pwc, const uchar *s, const uchar *e)
1864
{
1865
  if (s + 4 > e)
1866
    return MY_CS_TOOSMALL4;
1867
  *pwc= (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + (s[3]);
1868
  return 4;
1869
}
1870
1871
1872
static int
1873
my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
1874
             my_wc_t wc, uchar *s, uchar *e)
1875
{
1876
  if (s + 4 > e) 
1877
    return MY_CS_TOOSMALL4;
1878
  
1879
  s[0]= (uchar) (wc >> 24);
1880
  s[1]= (uchar) (wc >> 16) & 0xFF;
1881
  s[2]= (uchar) (wc >> 8)  & 0xFF;
1882
  s[3]= (uchar) wc & 0xFF;
1883
  return 4;
1884
}
1885
1886
1887
static inline void
1888
my_tolower_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
1889
{
1890
  int page= *wc >> 8;
1891
  if (page < 256 && uni_plane[page])
1892
    *wc= uni_plane[page][*wc & 0xFF].tolower;
1893
}
1894
1895
1896
static inline void
1897
my_toupper_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
1898
{
1899
  int page= *wc >> 8;
1900
  if (page < 256 && uni_plane[page])
1901
    *wc= uni_plane[page][*wc & 0xFF].toupper;
1902
}
1903
1904
1905
static inline void
1906
my_tosort_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
1907
{
1908
  int page= *wc >> 8;
1909
  if (page < 256)
1910
  {
1911
    if (uni_plane[page])
1912
      *wc= uni_plane[page][*wc & 0xFF].sort;
1913
  }
1914
  else
1915
  {
1916
    *wc= REPLACEMENT_CHAR;
1917
  }
1918
}
1919
1920
1921
static size_t
1922
my_caseup_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
1923
                char *dst __attribute__((unused)),
1924
                size_t dstlen __attribute__((unused)))
1925
{
1926
  my_wc_t wc;
1927
  int res;
1928
  char *srcend= src + srclen;
1929
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
1930
  DBUG_ASSERT(src == dst && srclen == dstlen);
1931
  
1932
  while ((src < srcend) &&
1933
         (res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
1934
  {
1935
    my_toupper_utf32(uni_plane, &wc);
1936
    if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
1937
      break;
1938
    src+= res;
1939
  }
1940
  return srclen;
1941
}
1942
1943
1944
static inline void
1945
my_hash_add(ulong *n1, ulong *n2, uint ch)
1946
{
1947
  n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8);
1948
  n2[0]+= 3;
1949
}
1950
1951
1952
static void
1953
my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen,
1954
                   ulong *n1, ulong *n2)
1955
{
1956
  my_wc_t wc;
1957
  int res;
1958
  const uchar *e= s + slen;
1959
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
1960
1961
  /* Skip trailing spaces */
1962
  while (e > s + 3 && e[-1] == ' ' && !e[-2] && !e[-3] && !e[-4])
1963
    e-= 4;
1964
1965
  while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
1966
  {
1967
    my_tosort_utf32(uni_plane, &wc);
1968
    my_hash_add(n1, n2, (uint) (wc >> 24));
1969
    my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF);
1970
    my_hash_add(n1, n2, (uint) (wc >> 8)  & 0xFF);
1971
    my_hash_add(n1, n2, (uint) (wc & 0xFF));
1972
    s+= res;
1973
  }
1974
}
1975
1976
1977
static size_t
1978
my_casedn_utf32(CHARSET_INFO *cs, char *src, size_t srclen,
1979
                char *dst __attribute__((unused)),
1980
                size_t dstlen __attribute__((unused)))
1981
{
1982
  my_wc_t wc;
1983
  int res;
1984
  char *srcend= src + srclen;
1985
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
1986
  DBUG_ASSERT(src == dst && srclen == dstlen);
1987
1988
  while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
1989
  {
1990
    my_tolower_utf32(uni_plane,&wc);
1991
    if (res != my_uni_utf32(cs, wc, (uchar*) src, (uchar*) srcend))
1992
      break;
1993
    src+= res;
1994
  }
1995
  return srclen;
1996
}
1997
1998
1999
static int
2000
my_strnncoll_utf32(CHARSET_INFO *cs, 
2001
                   const uchar *s, size_t slen, 
2002
                   const uchar *t, size_t tlen,
2003
                   my_bool t_is_prefix)
2004
{
6 by Brian Aker
Second pass on pthread cleanup
2005
  my_wc_t s_wc= 0,t_wc= 0;
1 by brian
clean slate
2006
  const uchar *se= s + slen;
2007
  const uchar *te= t + tlen;
2008
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
2009
2010
  while (s < se && t < te)
2011
  {
2012
    int s_res= my_utf32_uni(cs, &s_wc, s, se);
2013
    int t_res= my_utf32_uni(cs, &t_wc, t, te);
2014
    
2015
    if ( s_res <= 0 || t_res <= 0)
2016
    {
2017
      /* Incorrect string, compare by char value */
2018
      return my_bincmp(s, se, t, te);
2019
    }
2020
    
2021
    my_tosort_utf32(uni_plane, &s_wc);
2022
    my_tosort_utf32(uni_plane, &t_wc);
2023
    
2024
    if (s_wc != t_wc)
2025
    {
2026
      return s_wc > t_wc ? 1 : -1;
2027
    }
2028
    
2029
    s+= s_res;
2030
    t+= t_res;
2031
  }
2032
  return (int) (t_is_prefix ? (t - te) : ((se - s) - (te - t)));
2033
}
2034
2035
2036
/**
2037
  Compare strings, discarding end space
2038
2039
  If one string is shorter as the other, then we space extend the other
2040
  so that the strings have equal length.
2041
2042
  This will ensure that the following things hold:
2043
2044
    "a"  == "a "
2045
    "a\0" < "a"
2046
    "a\0" < "a "
2047
2048
  @param  cs        Character set pinter.
2049
  @param  a         First string to compare.
2050
  @param  a_length  Length of 'a'.
2051
  @param  b         Second string to compare.
2052
  @param  b_length  Length of 'b'.
2053
2054
  IMPLEMENTATION
2055
2056
  @return Comparison result.
2057
    @retval Negative number, if a less than b.
2058
    @retval 0, if a is equal to b
2059
    @retval Positive number, if a > b
2060
*/
2061
2062
2063
static int
2064
my_strnncollsp_utf32(CHARSET_INFO *cs,
2065
                     const uchar *s, size_t slen,
2066
                     const uchar *t, size_t tlen,
2067
                     my_bool diff_if_only_endspace_difference)
2068
{
2069
  int res;
6 by Brian Aker
Second pass on pthread cleanup
2070
  my_wc_t s_wc= 0, t_wc= 0;
1 by brian
clean slate
2071
  const uchar *se= s + slen, *te= t + tlen;
2072
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
2073
2074
  DBUG_ASSERT((slen % 4) == 0);
2075
  DBUG_ASSERT((tlen % 4) == 0);
2076
2077
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
2078
  diff_if_only_endspace_difference= FALSE;
2079
#endif
2080
2081
  while ( s < se && t < te )
2082
  {
2083
    int s_res= my_utf32_uni(cs, &s_wc, s, se);
2084
    int t_res= my_utf32_uni(cs, &t_wc, t, te);
2085
2086
    if ( s_res <= 0 || t_res <= 0 )
2087
    {
2088
      /* Incorrect string, compare bytewise */
2089
      return my_bincmp(s, se, t, te);
2090
    }
2091
2092
    my_tosort_utf32(uni_plane, &s_wc);
2093
    my_tosort_utf32(uni_plane, &t_wc);
2094
    
2095
    if ( s_wc != t_wc )
2096
    {
2097
      return s_wc > t_wc ? 1 : -1;
2098
    }
2099
2100
    s+= s_res;
2101
    t+= t_res;
2102
  }
2103
2104
  slen= (size_t) (se - s);
2105
  tlen= (size_t) (te - t);
2106
  res= 0;
2107
2108
  if (slen != tlen)
2109
  {
2110
    int s_res, swap= 1;
2111
    if (diff_if_only_endspace_difference)
2112
      res= 1;                                   /* Assume 's' is bigger */
2113
    if (slen < tlen)
2114
    {
2115
      slen= tlen;
2116
      s= t;
2117
      se= te;
2118
      swap= -1;
2119
      res= -res;
2120
    }
2121
2122
    for ( ; s < se; s+= s_res)
2123
    {
2124
      if ((s_res= my_utf32_uni(cs, &s_wc, s, se)) < 0)
2125
      {
2126
        DBUG_ASSERT(0);
2127
        return 0;
2128
      }
2129
      if (s_wc != ' ')
2130
        return (s_wc < ' ') ? -swap : swap;
2131
    }
2132
  }
2133
  return res;
2134
}
2135
2136
2137
static size_t
2138
my_strnxfrmlen_utf32(CHARSET_INFO *cs __attribute__((unused)), size_t len)
2139
{
2140
  return len / 2;
2141
}
2142
2143
2144
static void
2145
my_fill_utf32_for_strxfrm(CHARSET_INFO *cs __attribute__((unused)),
2146
                          char *s, size_t slen, int fill)
2147
{
2148
  DBUG_ASSERT(fill <= 0xFFFF);
2149
  
2150
  for ( ; slen > 1; slen-= 2)
2151
  {
2152
    *s++= fill >> 8;
2153
    *s++= fill & 0xFF;
2154
  }
2155
  if (slen)
2156
    *s= 0x00;
2157
}
2158
2159
2160
size_t
2161
my_strxfrm_pad_desc_and_reverse_utf32(CHARSET_INFO *cs,
2162
                                      uchar *str, uchar *frmend, uchar *strend,
2163
                                      uint nweights, uint flags, uint level)
2164
{
2165
  if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
2166
  {
2167
    uint fill_length= min((uint) (strend - frmend), nweights * 2);
2168
    my_fill_utf32_for_strxfrm(cs, (char*) frmend, fill_length, cs->pad_char);
2169
    frmend+= fill_length;
2170
  }
2171
  my_strxfrm_desc_and_reverse(str, frmend, flags, level);
2172
  return frmend - str;
2173
}
2174
2175
2176
static size_t
2177
my_strnxfrm_utf32(CHARSET_INFO *cs, 
2178
                  uchar *dst, size_t dstlen, uint nweights,
2179
                  const uchar *src, size_t srclen, uint flags)
2180
{
2181
  my_wc_t wc;
2182
  int res;
2183
  uchar *de= dst + dstlen;
2184
  uchar *d0= dst;
2185
  const uchar *se= src + srclen;
2186
  MY_UNICASE_INFO **uni_plane= (cs->state & MY_CS_BINSORT) ?
2187
                               NULL : cs->caseinfo;
2188
2189
  for (; src < se && dst < de && nweights; nweights--)
2190
  {
2191
    if ((res= my_utf32_uni(cs,&wc, src, se))<0)
2192
      break;
2193
    src+= res;
2194
    
2195
    if (uni_plane)
2196
      my_tosort_utf32(uni_plane, &wc);
2197
    
2198
    if (dst + 2 >= de)
2199
      break;
2200
    
2201
    *dst++= (uchar) (wc >> 8);
2202
    *dst++= (uchar) (wc & 0xFF);
2203
  }
2204
  return my_strxfrm_pad_desc_and_reverse_utf32(cs, d0, dst, de,
2205
                                               nweights, flags, 0);
2206
}
2207
2208
2209
static uint
2210
my_ismbchar_utf32(CHARSET_INFO *cs __attribute__((unused)),
2211
                  const char *b __attribute__((unused)),
2212
                  const char *e __attribute__((unused)))
2213
{
2214
  return 4;
2215
}
2216
2217
2218
static uint
2219
my_mbcharlen_utf32(CHARSET_INFO *cs  __attribute__((unused)) , 
2220
                   uint c __attribute__((unused)))
2221
{
2222
  return 4;
2223
}
2224
2225
2226
static int
2227
my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap)
2228
{
2229
  char *start= dst, *end= dst + n;
2230
  DBUG_ASSERT((n % 4) == 0);
2231
  for (; *fmt ; fmt++)
2232
  {
2233
    if (fmt[0] != '%')
2234
    {
2235
      if (dst >= end)                        /* End of buffer */
2236
        break;
2237
      
2238
      *dst++= '\0';
2239
      *dst++= '\0';
2240
      *dst++= '\0';
2241
      *dst++= *fmt;        /* Copy ordinary char */
2242
      continue;
2243
    }
2244
    
2245
    fmt++;
2246
    
2247
    /* Skip if max size is used (to be compatible with printf) */
2248
    while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-')
2249
      fmt++;
2250
    
2251
    if (*fmt == 'l')
2252
      fmt++;
2253
    
2254
    if (*fmt == 's')                                /* String parameter */
2255
    {
2256
      register char *par= va_arg(ap, char *);
2257
      size_t plen;
2258
      size_t left_len= (size_t)(end - dst);
2259
      if (!par) par= (char*)"(null)";
2260
      plen= strlen(par);
2261
      if (left_len <= plen*4)
2262
        plen= left_len / 4 - 1;
2263
2264
      for ( ; plen ; plen--, dst+= 4, par++)
2265
      {
2266
        dst[0]= '\0';
2267
        dst[1]= '\0';
2268
        dst[2]= '\0';
2269
        dst[3]= par[0];
2270
      }
2271
      continue;
2272
    }
2273
    else if (*fmt == 'd' || *fmt == 'u')        /* Integer parameter */
2274
    {
2275
      register int iarg;
2276
      char nbuf[16];
2277
      char *pbuf= nbuf;
2278
      
2279
      if ((size_t) (end - dst) < 64)
2280
        break;
2281
      iarg= va_arg(ap, int);
2282
      if (*fmt == 'd')
2283
        int10_to_str((long) iarg, nbuf, -10);
2284
      else
2285
        int10_to_str((long) (uint) iarg,nbuf,10);
2286
2287
      for (; pbuf[0]; pbuf++)
2288
      {
2289
        *dst++= '\0';
2290
        *dst++= '\0';
2291
        *dst++= '\0';
2292
        *dst++= *pbuf;
2293
      }
2294
      continue;
2295
    }
2296
    
2297
    /* We come here on '%%', unknown code or too long parameter */
2298
    if (dst == end)
2299
      break;
2300
    *dst++= '\0';
2301
    *dst++= '\0';
2302
    *dst++= '\0';
2303
    *dst++= '%';    /* % used as % or unknown code */
2304
  }
2305
  
2306
  DBUG_ASSERT(dst < end);
2307
  *dst++= '\0';
2308
  *dst++= '\0';
2309
  *dst++= '\0';
2310
  *dst++= '\0';     /* End of errmessage */
2311
  return (size_t) (dst - start - 4);
2312
}
2313
2314
2315
static size_t
2316
my_snprintf_utf32(CHARSET_INFO *cs __attribute__((unused)),
2317
                  char* to, size_t n, const char* fmt, ...)
2318
{
2319
  va_list args;
2320
  va_start(args,fmt);
2321
  return my_vsnprintf_utf32(to, n, fmt, args);
2322
}
2323
2324
2325
static longlong
2326
my_strtoll10_utf32(CHARSET_INFO *cs __attribute__((unused)),
2327
                   const char *nptr, char **endptr, int *error)
2328
{
2329
  const char *s, *end, *start, *n_end, *true_end;
2330
  uchar c;
2331
  unsigned long i, j, k;
2332
  ulonglong li;
2333
  int negative;
2334
  ulong cutoff, cutoff2, cutoff3;
2335
2336
  s= nptr;
2337
  /* If fixed length string */
2338
  if (endptr)
2339
  {
2340
    /* Make sure string length is even */
2341
    end= s + ((*endptr - s) / 4) * 4;
2342
    while (s < end && !s[0] && !s[1] && !s[2] &&
2343
           (s[3] == ' ' || s[3] == '\t'))
2344
      s+= 4;
2345
    if (s == end)
2346
      goto no_conv;
2347
  }
2348
  else
2349
  {
2350
     /* We don't support null terminated strings in UCS2 */
2351
     goto no_conv;
2352
  }
2353
2354
  /* Check for a sign. */
2355
  negative= 0;
2356
  if (!s[0] && !s[1] && !s[2] && s[3] == '-')
2357
  {
2358
    *error= -1;                                        /* Mark as negative number */
2359
    negative= 1;
2360
    s+= 4;
2361
    if (s == end)
2362
      goto no_conv;
2363
    cutoff=  MAX_NEGATIVE_NUMBER / LFACTOR2;
2364
    cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
2365
    cutoff3=  MAX_NEGATIVE_NUMBER % 100;
2366
  }
2367
  else
2368
  {
2369
    *error= 0;
2370
    if (!s[0] && !s[1] && !s[2] && s[3] == '+')
2371
    {
2372
      s+= 4;
2373
      if (s == end)
2374
        goto no_conv;
2375
    }
2376
    cutoff=  ULONGLONG_MAX / LFACTOR2;
2377
    cutoff2= ULONGLONG_MAX % LFACTOR2 / 100;
2378
    cutoff3=  ULONGLONG_MAX % 100;
2379
  }
2380
2381
  /* Handle case where we have a lot of pre-zero */
2382
  if (!s[0] && !s[1] && !s[2] && s[3] == '0')
2383
  {
2384
    i= 0;
2385
    do
2386
    {
2387
      s+= 4;
2388
      if (s == end)
2389
        goto end_i;                                /* Return 0 */
2390
    }
2391
    while (!s[0] && !s[1] && !s[2] && s[3] == '0');
2392
    n_end= s + 4 * INIT_CNT;
2393
  }
2394
  else
2395
  {
2396
    /* Read first digit to check that it's a valid number */
2397
    if (s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
2398
      goto no_conv;
2399
    i= c;
2400
    s+= 4;
2401
    n_end= s + 4 * (INIT_CNT-1);
2402
  }
2403
2404
  /* Handle first 9 digits and store them in i */
2405
  if (n_end > end)
2406
    n_end= end;
2407
  for (; s != n_end ; s+= 4)
2408
  {
2409
    if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2410
      goto end_i;
2411
    i= i * 10 + c;
2412
  }
2413
  if (s == end)
2414
    goto end_i;
2415
2416
  /* Handle next 9 digits and store them in j */
2417
  j= 0;
2418
  start= s;                                /* Used to know how much to shift i */
2419
  n_end= true_end= s + 4 * INIT_CNT;
2420
  if (n_end > end)
2421
    n_end= end;
2422
  do
2423
  {
2424
    if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2425
      goto end_i_and_j;
2426
    j= j * 10 + c;
2427
    s+= 4;
2428
  } while (s != n_end);
2429
  if (s == end)
2430
  {
2431
    if (s != true_end)
2432
      goto end_i_and_j;
2433
    goto end3;
2434
  }
2435
  if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9)
2436
    goto end3;
2437
2438
  /* Handle the next 1 or 2 digits and store them in k */
2439
  k=c;
2440
  s+= 4;
2441
  if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9)
2442
    goto end4;
2443
  k= k * 10 + c;
2444
  s+= 2;
2445
  *endptr= (char*) s;
2446
2447
  /* number string should have ended here */
2448
  if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] - '0')) <= 9)
2449
    goto overflow;
2450
2451
  /* Check that we didn't get an overflow with the last digit */
2452
  if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) &&
2453
                                     k > cutoff3)))
2454
    goto overflow;
2455
  li= i * LFACTOR2+ (ulonglong) j * 100 + k;
2456
  return (longlong) li;
2457
2458
overflow:                                        /* *endptr is set here */
2459
  *error= MY_ERRNO_ERANGE;
2460
  return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX;
2461
2462
end_i:
2463
  *endptr= (char*) s;
2464
  return (negative ? ((longlong) -(long) i) : (longlong) i);
2465
2466
end_i_and_j:
2467
  li= (ulonglong) i * lfactor[(size_t) (s-start) / 4] + j;
2468
  *endptr= (char*) s;
2469
  return (negative ? -((longlong) li) : (longlong) li);
2470
2471
end3:
2472
  li= (ulonglong) i*LFACTOR+ (ulonglong) j;
2473
  *endptr= (char*) s;
2474
  return (negative ? -((longlong) li) : (longlong) li);
2475
2476
end4:
2477
  li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k;
2478
  *endptr= (char*) s;
2479
  if (negative)
2480
  {
2481
   if (li > MAX_NEGATIVE_NUMBER)
2482
     goto overflow;
2483
   return -((longlong) li);
2484
  }
2485
  return (longlong) li;
2486
2487
no_conv:
2488
  /* There was no number to convert.  */
2489
  *error= MY_ERRNO_EDOM;
2490
  *endptr= (char *) nptr;
2491
  return 0;
2492
}
2493
2494
2495
static size_t
2496
my_numchars_utf32(CHARSET_INFO *cs __attribute__((unused)),
2497
                  const char *b, const char *e)
2498
{
2499
  return (size_t) (e - b) / 4;
2500
}
2501
2502
2503
static size_t
2504
my_charpos_utf32(CHARSET_INFO *cs __attribute__((unused)),
2505
                 const char *b, const char *e, size_t pos)
2506
{
2507
  size_t string_length= (size_t) (e - b);
2508
  return pos * 4 > string_length ? string_length + 4 : pos * 4;
2509
}
2510
2511
2512
static size_t
2513
my_well_formed_len_utf32(CHARSET_INFO *cs __attribute__((unused)),
2514
                         const char *b, const char *e,
2515
                         size_t nchars, int *error)
2516
{
2517
  /* Ensure string length is divisible by 4 */
2518
  const char *b0= b;
2519
  size_t length= e - b;
2520
  DBUG_ASSERT((length % 4) == 0);
2521
  *error= 0;
2522
  nchars*= 4;
2523
  if (length > nchars)
2524
  {
2525
    length= nchars;
2526
    e= b + nchars;
2527
  }
2528
  for (; b < e; b+= 4)
2529
  {
2530
    if (b[0] || b[1] > 0x10)
2531
    {
2532
      *error= 1;
2533
      return b - b0;
2534
    }
2535
  }
2536
  return length;
2537
}
2538
2539
2540
static
2541
void my_fill_utf32(CHARSET_INFO *cs,
2542
                   char *s, size_t slen, int fill)
2543
{
2544
  char buf[10];
2545
  uint buflen;
2546
  char *e= s + slen;
2547
  
2548
  DBUG_ASSERT((slen % 4) == 0);
2549
2550
  buflen= cs->cset->wc_mb(cs, (my_wc_t) fill, (uchar*) buf,
2551
                          (uchar*) buf + sizeof(buf));
2552
  DBUG_ASSERT(buflen == 4);
2553
  while (s < e)
2554
  {
2555
    memcpy(s, buf, 4);
2556
    s+= 4;
2557
  }
2558
}
2559
2560
2561
static size_t
2562
my_lengthsp_utf32(CHARSET_INFO *cs __attribute__((unused)),
2563
                  const char *ptr, size_t length)
2564
{
2565
  const char *end= ptr + length;
2566
  DBUG_ASSERT((length % 4) == 0);
2567
  while (end > ptr + 3 && end[-1] == ' ' && !end[-2] && !end[-3] && !end[-4])
2568
    end-= 4;
2569
  return (size_t) (end - ptr);
2570
}
2571
2572
2573
static int
2574
my_wildcmp_utf32_ci(CHARSET_INFO *cs,
2575
                    const char *str, const char *str_end,
2576
                    const char *wildstr, const char *wildend,
2577
                    int escape, int w_one, int w_many)
2578
{
2579
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
2580
  return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2581
                            escape, w_one, w_many, uni_plane); 
2582
}
2583
2584
2585
static int
2586
my_wildcmp_utf32_bin(CHARSET_INFO *cs,
2587
                     const char *str,const char *str_end,
2588
                     const char *wildstr,const char *wildend,
2589
                     int escape, int w_one, int w_many)
2590
{
2591
  return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
2592
                            escape, w_one, w_many, NULL); 
2593
}
2594
2595
2596
static int
2597
my_strnncoll_utf32_bin(CHARSET_INFO *cs, 
2598
                       const uchar *s, size_t slen,
2599
                       const uchar *t, size_t tlen,
2600
                       my_bool t_is_prefix)
2601
{
6 by Brian Aker
Second pass on pthread cleanup
2602
  my_wc_t s_wc= 0, t_wc= 0;
1 by brian
clean slate
2603
  const uchar *se= s + slen;
2604
  const uchar *te= t + tlen;
2605
2606
  while (s < se && t < te)
2607
  {
2608
    int s_res= my_utf32_uni(cs, &s_wc, s, se);
2609
    int t_res= my_utf32_uni(cs, &t_wc, t, te);
2610
    
2611
    if (s_res <= 0 || t_res <= 0)
2612
    {
2613
      /* Incorrect string, compare by char value */
2614
      return my_bincmp(s, se, t, te);
2615
    }
2616
    if (s_wc != t_wc)
2617
    {
2618
      return  s_wc > t_wc ? 1 : -1;
2619
    }
2620
    
2621
    s+= s_res;
2622
    t+= t_res;
2623
  }
2624
  return (int) (t_is_prefix ? (t-te) : ((se - s) - (te - t)));
2625
}
2626
2627
2628
static inline my_wc_t
2629
my_utf32_get(const uchar *s)
2630
{
2631
  return
2632
    ((my_wc_t) s[0] << 24) +
2633
    ((my_wc_t) s[1] << 16) +
2634
    ((my_wc_t) s[2] << 8) +
2635
    s[3];
2636
}
2637
2638
2639
static int
2640
my_strnncollsp_utf32_bin(CHARSET_INFO *cs __attribute__((unused)), 
2641
                         const uchar *s, size_t slen, 
2642
                         const uchar *t, size_t tlen,
2643
                         my_bool diff_if_only_endspace_difference
2644
                         __attribute__((unused)))
2645
{
2646
  const uchar *se, *te;
2647
  size_t minlen;
2648
2649
  DBUG_ASSERT((slen % 4) == 0);
2650
  DBUG_ASSERT((tlen % 4) == 0);
2651
2652
  se= s + slen;
2653
  te= t + tlen;
2654
2655
  for (minlen= min(slen, tlen); minlen; minlen-= 4)
2656
  {
2657
    my_wc_t s_wc= my_utf32_get(s);
2658
    my_wc_t t_wc= my_utf32_get(t);
2659
    if (s_wc != t_wc)
2660
      return  s_wc > t_wc ? 1 : -1;
2661
2662
    s+= 4;
2663
    t+= 4;
2664
  }
2665
2666
  if (slen != tlen)
2667
  {
2668
    int swap= 1;
2669
    if (slen < tlen)
2670
    {
2671
      s= t;
2672
      se= te;
2673
      swap= -1;
2674
    }
2675
2676
    for ( ; s < se ; s+= 4)
2677
    {
2678
      my_wc_t s_wc= my_utf32_get(s);
2679
      if (s_wc != ' ')
2680
        return (s_wc < ' ') ? -swap : swap;
2681
    }
2682
  }
2683
  return 0;
2684
}
2685
2686
2687
/**
2688
   Calculate min_str and max_str that ranges a LIKE string.
2689
2690
   @param ptr        Pointer to LIKE pattern.
2691
   @param ptr_length Length of LIKE pattern.
2692
   @param escape     Escape character in LIKE.  (Normally '\').
2693
                     All escape characters should be removed
2694
                     from min_str and max_str.
2695
   @param res_length Length of min_str and max_str.
2696
   @param min_str    Smallest case sensitive string that ranges LIKE.
2697
                     Should be space padded to res_length.
2698
   @param max_str    Largest case sensitive string that ranges LIKE.
2699
                     Normally padded with the biggest character sort value.
2700
2701
   @return Optimization status.
2702
     @retval FALSE if LIKE pattern can be optimized
2703
     @rerval TRUE if LIKE can't be optimized.
2704
*/
2705
2706
my_bool
2707
my_like_range_utf32(CHARSET_INFO *cs,
2708
                    const char *ptr, size_t ptr_length,
2709
                    pbool escape, pbool w_one, pbool w_many,
2710
                    size_t res_length,
2711
                    char *min_str,char *max_str,
2712
                    size_t *min_length,size_t *max_length)
2713
{
2714
  const char *end= ptr + ptr_length;
2715
  char *min_org= min_str;
2716
  char *min_end= min_str + res_length;
2717
  char *max_end= max_str + res_length;
2718
  size_t charlen= res_length / cs->mbmaxlen;
2719
  
2720
  DBUG_ASSERT((res_length % 4) == 0);
2721
  
2722
  for ( ; charlen > 0; ptr+= 4, charlen--)
2723
  {
2724
    my_wc_t wc;
2725
    int res;
2726
    if ((res= my_utf32_uni(cs, &wc, ptr, end)) < 0)
2727
    {
2728
      my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
2729
      my_fill_utf32(cs, max_str, min_end - min_str, cs->max_sort_char);
2730
      /* min_length and max_legnth are not important */
2731
      return TRUE;
2732
    }
2733
    
2734
    if (wc == (my_wc_t) escape)
2735
    {
2736
      ptr+= 4;                                  /* Skip escape */
2737
      if ((res= my_utf32_uni(cs, &wc, ptr, end)) < 0)
2738
      {
2739
        my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
2740
        my_fill_utf32(cs, max_str, max_end - min_str, cs->max_sort_char);
2741
        /* min_length and max_length are not important */
2742
        return TRUE;
2743
      }
2744
      if (my_uni_utf32(cs, wc, min_str, min_end) != 4 ||
2745
          my_uni_utf32(cs, wc, max_str, max_end) != 4)
2746
        goto pad_set_lengths;
2747
      *min_str++= 4;
2748
      *max_str++= 4;
2749
      continue;
2750
    }
2751
    
2752
    if (wc == (my_wc_t) w_one)
2753
    {
2754
      if (my_uni_utf32(cs, cs->min_sort_char, min_str, min_end) != 4 ||
2755
          my_uni_utf32(cs, cs->max_sort_char, max_str, max_end) != 4)
2756
        goto pad_set_lengths;
2757
      min_str+= 4;
2758
      max_str+= 4;
2759
      continue;
2760
    }
2761
    
2762
    if (wc == (my_wc_t) w_many)
2763
    {
2764
      /*
2765
        Calculate length of keys:
2766
        'a\0\0... is the smallest possible string when we have space expand
2767
        a\ff\ff... is the biggest possible string
2768
      */
2769
      *min_length= ((cs->state & MY_CS_BINSORT) ?
2770
                    (size_t) (min_str - min_org) :
2771
                    res_length);
2772
      *max_length= res_length;
2773
      goto pad_min_max;
2774
    }
2775
    
2776
    /* Normal character */
2777
    if (my_uni_utf32(cs, wc, min_str, min_end) != 4 ||
2778
        my_uni_utf32(cs, wc, max_str, max_end) != 4)
2779
      goto pad_set_lengths;
2780
    min_str+= 4;
2781
    max_str+= 4;
2782
  }
2783
2784
pad_set_lengths:
2785
  *min_length= *max_length= (size_t) (min_str - min_org);
2786
2787
pad_min_max:
2788
  my_fill_utf32(cs, min_str, min_end - min_str, cs->min_sort_char);
2789
  my_fill_utf32(cs, max_str, max_end - max_str, cs->max_sort_char);
2790
  return FALSE;
2791
}
2792
2793
2794
static size_t
2795
my_scan_utf32(CHARSET_INFO *cs,
2796
              const char *str, const char *end, int sequence_type)
2797
{
2798
  const char *str0= str;
2799
  
2800
  switch (sequence_type)
2801
  {
2802
  case MY_SEQ_SPACES:
2803
    for ( ; str < end; )
2804
    {
2805
      my_wc_t wc;
2806
      int res= my_utf32_uni(cs, &wc, str, end);
2807
      if (res < 0 || wc != ' ')
2808
        break;
2809
      str+= res;
2810
    }
2811
    return (size_t) (str - str0);
2812
  default:
2813
    return 0;
2814
  }
2815
}
2816
2817
2818
static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
2819
{
2820
  NULL, /* init */
2821
  my_strnncoll_utf32,
2822
  my_strnncollsp_utf32,
2823
  my_strnxfrm_utf32,
2824
  my_strnxfrmlen_utf32,
2825
  my_like_range_utf32,
2826
  my_wildcmp_utf32_ci,
2827
  my_strcasecmp_mb2_or_mb4,
2828
  my_instr_mb,
2829
  my_hash_sort_utf32,
2830
  my_propagate_simple
2831
};
2832
2833
2834
static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
2835
{
2836
  NULL, /* init */
2837
  my_strnncoll_utf32_bin,
2838
  my_strnncollsp_utf32_bin,
2839
  my_strnxfrm_utf32,
2840
  my_strnxfrmlen_utf32,
2841
  my_like_range_utf32,
2842
  my_wildcmp_utf32_bin,
2843
  my_strcasecmp_mb2_or_mb4,
2844
  my_instr_mb,
2845
  my_hash_sort_utf32,
2846
  my_propagate_simple
2847
};
2848
2849
2850
MY_CHARSET_HANDLER my_charset_utf32_handler=
2851
{
2852
  NULL, /* init */
2853
  my_ismbchar_utf32,
2854
  my_mbcharlen_utf32,
2855
  my_numchars_utf32,
2856
  my_charpos_utf32,
2857
  my_well_formed_len_utf32,
2858
  my_lengthsp_utf32,
2859
  my_numcells_mb,
2860
  my_utf32_uni,
2861
  my_uni_utf32,
2862
  my_mb_ctype_mb,
2863
  my_caseup_str_mb2_or_mb4,
2864
  my_casedn_str_mb2_or_mb4,
2865
  my_caseup_utf32,
2866
  my_casedn_utf32,
2867
  my_snprintf_utf32,
2868
  my_l10tostr_mb2_or_mb4,
2869
  my_ll10tostr_mb2_or_mb4,
2870
  my_fill_utf32,
2871
  my_strntol_mb2_or_mb4,
2872
  my_strntoul_mb2_or_mb4,
2873
  my_strntoll_mb2_or_mb4,
2874
  my_strntoull_mb2_or_mb4,
2875
  my_strntod_mb2_or_mb4,
2876
  my_strtoll10_utf32,
2877
  my_strntoull10rnd_mb2_or_mb4,
2878
  my_scan_utf32
2879
};
2880
2881
2882
CHARSET_INFO my_charset_utf32_general_ci=
2883
{
2884
  60,0,0,              /* number       */
2885
  MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
2886
  "utf32",             /* cs name    */
2887
  "utf32_general_ci",  /* name         */
2888
  "UTF-32 Unicode",    /* comment      */
2889
  NULL,                /* tailoring    */
2890
  NULL,                /* ctype        */
2891
  NULL,                /* to_lower     */
2892
  NULL,                /* to_upper     */
2893
  NULL,                /* sort_order   */
2894
  NULL,                /* contractions */
2895
  NULL,                /* sort_order_big*/
2896
  NULL,                /* tab_to_uni   */
2897
  NULL,                /* tab_from_uni */
2898
  my_unicase_default,  /* caseinfo     */
2899
  NULL,                /* state_map    */
2900
  NULL,                /* ident_map    */
2901
  1,                   /* strxfrm_multiply */
2902
  1,                   /* caseup_multiply  */
2903
  1,                   /* casedn_multiply  */
2904
  4,                   /* mbminlen     */
2905
  4,                   /* mbmaxlen     */
2906
  0,                   /* min_sort_char */
2907
  0xFFFF,              /* max_sort_char */
2908
  ' ',                 /* pad char      */
2909
  0,                   /* escape_with_backslash_is_dangerous */
2910
  1,                   /* levels_for_compare */
2911
  1,                   /* levels_for_order   */
2912
  &my_charset_utf32_handler,
2913
  &my_collation_utf32_general_ci_handler
2914
};
2915
2916
2917
CHARSET_INFO my_charset_utf32_bin=
2918
{
2919
  61,0,0,              /* number       */
2920
  MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
2921
  "utf32",             /* cs name    */
2922
  "utf32_bin",         /* name         */
2923
  "UTF-32 Unicode",    /* comment      */
2924
  NULL,                /* tailoring    */
2925
  NULL,                /* ctype        */
2926
  NULL,                /* to_lower     */
2927
  NULL,                /* to_upper     */
2928
  NULL,                /* sort_order   */
2929
  NULL,                /* contractions */
2930
  NULL,                /* sort_order_big*/
2931
  NULL,                /* tab_to_uni   */
2932
  NULL,                /* tab_from_uni */
2933
  my_unicase_default,  /* caseinfo     */
2934
  NULL,                /* state_map    */
2935
  NULL,                /* ident_map    */
2936
  1,                   /* strxfrm_multiply */
2937
  1,                   /* caseup_multiply  */
2938
  1,                   /* casedn_multiply  */
2939
  4,                   /* mbminlen     */
2940
  4,                   /* mbmaxlen     */
2941
  0,                   /* min_sort_char */
2942
  0xFFFF,              /* max_sort_char */
2943
  ' ',                 /* pad char      */
2944
  0,                   /* escape_with_backslash_is_dangerous */
2945
  1,                   /* levels_for_compare */
2946
  1,                   /* levels_for_order   */
2947
  &my_charset_utf32_handler,
2948
  &my_collation_utf32_bin_handler
2949
};
2950
2951
2952
#endif /* HAVE_CHARSET_utf32 */
2953
2954
2955
#ifdef HAVE_CHARSET_ucs2
2956
2957
static uchar ctype_ucs2[] = {
2958
    0,
2959
   32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
2960
   32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
2961
   72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
2962
  132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
2963
   16,129,129,129,129,129,129,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2964
    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
2965
   16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
2966
    2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
2967
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2968
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2969
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2970
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2971
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2972
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2973
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
2974
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
2975
};
2976
2977
static uchar to_lower_ucs2[] = {
2978
    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
2979
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2980
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
2981
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
2982
   64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2983
  112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
2984
   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
2985
  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
2986
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
2987
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
2988
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
2989
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
2990
  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
2991
  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
2992
  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
2993
  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
2994
};
2995
2996
static uchar to_upper_ucs2[] = {
2997
    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
2998
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2999
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
3000
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
3001
   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
3002
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
3003
   96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
3004
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
3005
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
3006
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
3007
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
3008
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
3009
  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
3010
  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
3011
  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
3012
  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
3013
};
3014
3015
3016
static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
3017
		       my_wc_t * pwc, const uchar *s, const uchar *e)
3018
{
3019
  if (s+2 > e) /* Need 2 characters */
3020
    return MY_CS_TOOSMALL2;
3021
  
3022
  *pwc= ((uchar)s[0]) * 256  + ((uchar)s[1]);
3023
  return 2;
3024
}
3025
3026
static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
3027
		       my_wc_t wc, uchar *r, uchar *e)
3028
{
3029
  if ( r+2 > e ) 
3030
    return MY_CS_TOOSMALL2;
3031
  
3032
  r[0]= (uchar) (wc >> 8);
3033
  r[1]= (uchar) (wc & 0xFF);
3034
  return 2;
3035
}
3036
3037
3038
static size_t my_caseup_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
3039
                           char *dst __attribute__((unused)),
3040
                           size_t dstlen __attribute__((unused)))
3041
{
3042
  my_wc_t wc;
3043
  int res;
3044
  char *srcend= src + srclen;
3045
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
3046
  DBUG_ASSERT(src == dst && srclen == dstlen);
3047
  
3048
  while ((src < srcend) &&
3049
         (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0)
3050
  {
3051
    int plane= (wc>>8) & 0xFF;
3052
    wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
3053
    if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
3054
      break;
3055
    src+= res;
3056
  }
3057
  return srclen;
3058
}
3059
3060
3061
static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen,
3062
			      ulong *n1, ulong *n2)
3063
{
3064
  my_wc_t wc;
3065
  int res;
3066
  const uchar *e=s+slen;
3067
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
3068
3069
  while (e > s+1 && e[-1] == ' ' && e[-2] == '\0')
3070
    e-= 2;
3071
3072
  while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0)
3073
  {
3074
    int plane = (wc>>8) & 0xFF;
3075
    wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
3076
    n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
3077
    n2[0]+=3;
3078
    n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
3079
    n2[0]+=3;
3080
    s+=res;
3081
  }
3082
}
3083
3084
3085
static size_t my_casedn_ucs2(CHARSET_INFO *cs, char *src, size_t srclen,
3086
                           char *dst __attribute__((unused)),
3087
                           size_t dstlen __attribute__((unused)))
3088
{
3089
  my_wc_t wc;
3090
  int res;
3091
  char *srcend= src + srclen;
3092
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
3093
  DBUG_ASSERT(src == dst && srclen == dstlen);
3094
3095
  while ((src < srcend) &&
3096
         (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0)
3097
  {
3098
    int plane= (wc>>8) & 0xFF;
3099
    wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
3100
    if (res != my_uni_ucs2(cs, wc, (uchar*) src, (uchar*) srcend))
3101
      break;
3102
    src+= res;
3103
  }
3104
  return srclen;
3105
}
3106
3107
3108
static int my_strnncoll_ucs2(CHARSET_INFO *cs, 
3109
			     const uchar *s, size_t slen, 
3110
                             const uchar *t, size_t tlen,
3111
                             my_bool t_is_prefix)
3112
{
3113
  int s_res,t_res;
6 by Brian Aker
Second pass on pthread cleanup
3114
  my_wc_t s_wc= 0,t_wc= 0;
1 by brian
clean slate
3115
  const uchar *se=s+slen;
3116
  const uchar *te=t+tlen;
3117
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
3118
3119
  while ( s < se && t < te )
3120
  {
3121
    int plane;
3122
    s_res=my_ucs2_uni(cs,&s_wc, s, se);
3123
    t_res=my_ucs2_uni(cs,&t_wc, t, te);
3124
    
3125
    if ( s_res <= 0 || t_res <= 0 )
3126
    {
3127
      /* Incorrect string, compare by char value */
3128
      return ((int)s[0]-(int)t[0]); 
3129
    }
3130
    
3131
    plane=(s_wc>>8) & 0xFF;
3132
    s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
3133
    plane=(t_wc>>8) & 0xFF;
3134
    t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
3135
    if ( s_wc != t_wc )
3136
    {
3137
      return  s_wc > t_wc ? 1 : -1;
3138
    }
3139
    
3140
    s+=s_res;
3141
    t+=t_res;
3142
  }
3143
  return (int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
3144
}
3145
3146
3147
/**
3148
  Compare strings, discarding end space
3149
3150
  If one string is shorter as the other, then we space extend the other
3151
  so that the strings have equal length.
3152
3153
  This will ensure that the following things hold:
3154
3155
    "a"  == "a "
3156
    "a\0" < "a"
3157
    "a\0" < "a "
3158
3159
  @param  cs        Character set pinter.
3160
  @param  a         First string to compare.
3161
  @param  a_length  Length of 'a'.
3162
  @param  b         Second string to compare.
3163
  @param  b_length  Length of 'b'.
3164
3165
  IMPLEMENTATION
3166
3167
  @return Comparison result.
3168
    @retval Negative number, if a less than b.
3169
    @retval 0, if a is equal to b
3170
    @retval Positive number, if a > b
3171
*/
3172
3173
static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3174
                               const uchar *s, size_t slen,
3175
                               const uchar *t, size_t tlen,
3176
                               my_bool diff_if_only_endspace_difference
3177
			       __attribute__((unused)))
3178
{
3179
  const uchar *se, *te;
3180
  size_t minlen;
3181
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
3182
3183
  /* extra safety to make sure the lengths are even numbers */
3184
  slen&= ~1;
3185
  tlen&= ~1;
3186
3187
  se= s + slen;
3188
  te= t + tlen;
3189
3190
  for (minlen= min(slen, tlen); minlen; minlen-= 2)
3191
  {
3192
    int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort :
3193
                                 (((int) s[0]) << 8) + (int) s[1];
3194
3195
    int t_wc = uni_plane[t[0]] ? (int) uni_plane[t[0]][t[1]].sort : 
3196
                                 (((int) t[0]) << 8) + (int) t[1];
3197
    if ( s_wc != t_wc )
3198
      return  s_wc > t_wc ? 1 : -1;
3199
3200
    s+= 2;
3201
    t+= 2;
3202
  }
3203
3204
  if (slen != tlen)
3205
  {
3206
    int swap= 1;
3207
    if (slen < tlen)
3208
    {
3209
      s= t;
3210
      se= te;
3211
      swap= -1;
3212
    }
3213
3214
    for ( ; s < se ; s+= 2)
3215
    {
3216
      if (s[0] || s[1] != ' ')
3217
        return (s[0] == 0 && s[1] < ' ') ? -swap : swap;
3218
    }
3219
  }
3220
  return 0;
3221
}
3222
3223
3224
static size_t
3225
my_strnxfrm_ucs2(CHARSET_INFO *cs, 
3226
                 uchar *dst, size_t dstlen, uint nweights,
3227
                 const uchar *src, size_t srclen, uint flags)
3228
{
3229
  my_wc_t wc;
3230
  int res;
3231
  int plane;
3232
  uchar *de= dst + dstlen;
3233
  uchar *d0= dst;
3234
  const uchar *se= src + srclen;
3235
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
3236
3237
  for (; src < se && dst < de && nweights; nweights--)
3238
  {
3239
    if ((res=my_ucs2_uni(cs,&wc, src, se))<0)
3240
    {
3241
      break;
3242
    }
3243
    src+=res;
3244
    srclen-=res;
3245
    
3246
    plane=(wc>>8) & 0xFF;
3247
    wc = uni_plane[plane] ? uni_plane[plane][wc & 0xFF].sort : wc;
3248
    
3249
    if ((res=my_uni_ucs2(cs,wc,dst,de)) <0)
3250
    {
3251
      break;
3252
    }
3253
    dst+=res;
3254
  }
3255
  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
3256
}
3257
3258
3259
static uint my_ismbchar_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3260
                             const char *b __attribute__((unused)),
3261
                             const char *e __attribute__((unused)))
3262
{
3263
  return 2;
3264
}
3265
3266
3267
static uint my_mbcharlen_ucs2(CHARSET_INFO *cs  __attribute__((unused)) , 
3268
                              uint c __attribute__((unused)))
3269
{
3270
  return 2;
3271
}
3272
3273
3274
static
3275
size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3276
                        const char *b, const char *e)
3277
{
3278
  return (size_t) (e-b)/2;
3279
}
3280
3281
3282
static
3283
size_t my_charpos_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3284
                       const char *b, const char *e, size_t pos)
3285
{
3286
  size_t string_length= (size_t) (e - b);
3287
  return pos > string_length ? string_length + 2 : pos * 2;
3288
}
3289
3290
3291
static
3292
size_t my_well_formed_len_ucs2(CHARSET_INFO *cs __attribute__((unused)),
3293
                               const char *b, const char *e,
3294
                               size_t nchars, int *error)
3295
{
3296
  /* Ensure string length is dividable with 2 */
3297
  size_t nbytes= ((size_t) (e-b)) & ~(size_t) 1;
3298
  *error= 0;
3299
  nchars*= 2;
3300
  return min(nbytes, nchars);
3301
}
3302
3303
3304
static
3305
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
3306
		    const char *str,const char *str_end,
3307
		    const char *wildstr,const char *wildend,
3308
		    int escape, int w_one, int w_many)
3309
{
3310
  MY_UNICASE_INFO **uni_plane= cs->caseinfo;
3311
  return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3312
                            escape,w_one,w_many,uni_plane); 
3313
}
3314
3315
3316
static
3317
int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
3318
		    const char *str,const char *str_end,
3319
		    const char *wildstr,const char *wildend,
3320
		    int escape, int w_one, int w_many)
3321
{
3322
  return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
3323
                            escape,w_one,w_many,NULL); 
3324
}
3325
3326
3327
static
3328
int my_strnncoll_ucs2_bin(CHARSET_INFO *cs, 
3329
                          const uchar *s, size_t slen,
3330
                          const uchar *t, size_t tlen,
3331
                          my_bool t_is_prefix)
3332
{
3333
  int s_res,t_res;
6 by Brian Aker
Second pass on pthread cleanup
3334
  my_wc_t s_wc= 0,t_wc= 0;
1 by brian
clean slate
3335
  const uchar *se=s+slen;
3336
  const uchar *te=t+tlen;
3337
3338
  while ( s < se && t < te )
3339
  {
3340
    s_res=my_ucs2_uni(cs,&s_wc, s, se);
3341
    t_res=my_ucs2_uni(cs,&t_wc, t, te);
3342
    
3343
    if ( s_res <= 0 || t_res <= 0 )
3344
    {
3345
      /* Incorrect string, compare by char value */
3346
      return ((int)s[0]-(int)t[0]); 
3347
    }
3348
    if ( s_wc != t_wc )
3349
    {
3350
      return  s_wc > t_wc ? 1 : -1;
3351
    }
3352
    
3353
    s+=s_res;
3354
    t+=t_res;
3355
  }
3356
  return (int) (t_is_prefix ? t-te : ((se-s) - (te-t)));
3357
}
3358
3359
static int my_strnncollsp_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)), 
3360
                                   const uchar *s, size_t slen, 
3361
                                   const uchar *t, size_t tlen,
3362
                                   my_bool diff_if_only_endspace_difference
3363
                                   __attribute__((unused)))
3364
{
3365
  const uchar *se, *te;
3366
  size_t minlen;
3367
3368
  /* extra safety to make sure the lengths are even numbers */
3369
  slen= (slen >> 1) << 1;
3370
  tlen= (tlen >> 1) << 1;
3371
3372
  se= s + slen;
3373
  te= t + tlen;
3374
3375
  for (minlen= min(slen, tlen); minlen; minlen-= 2)
3376
  {
3377
    int s_wc= s[0] * 256 + s[1];
3378
    int t_wc= t[0] * 256 + t[1];
3379
    if ( s_wc != t_wc )
3380
      return  s_wc > t_wc ? 1 : -1;
3381
3382
    s+= 2;
3383
    t+= 2;
3384
  }
3385
3386
  if (slen != tlen)
3387
  {
3388
    int swap= 1;
3389
    if (slen < tlen)
3390
    {
3391
      s= t;
3392
      se= te;
3393
      swap= -1;
3394
    }
3395
3396
    for ( ; s < se ; s+= 2)
3397
    {
3398
      if (s[0] || s[1] != ' ')
3399
        return (s[0] == 0 && s[1] < ' ') ? -swap : swap;
3400
    }
3401
  }
3402
  return 0;
3403
}
3404
3405
3406
static
3407
size_t my_strnxfrm_ucs2_bin(CHARSET_INFO *cs,
3408
                            uchar *dst, size_t dstlen, uint nweights,
3409
                            const uchar *src, size_t srclen, uint flags)
3410
{
3411
  uint frmlen;
3412
  if ((frmlen= min(dstlen, nweights * 2)) > srclen)
3413
    frmlen= srclen;
3414
  if (dst != src)
3415
    memcpy(dst, src, frmlen);
3416
  return my_strxfrm_pad_desc_and_reverse(cs, dst, dst + frmlen, dst + dstlen,
3417
                                         nweights - frmlen / 2, flags, 0);
3418
}
3419
3420
3421
static
3422
void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
3423
			   const uchar *key, size_t len,ulong *nr1, ulong *nr2)
3424
{
3425
  const uchar *pos = key;
3426
  
3427
  key+= len;
3428
3429
  while (key > pos+1 && key[-1] == ' ' && key[-2] == '\0')
3430
    key-= 2;
3431
3432
  for (; pos < (uchar*) key ; pos++)
3433
  {
3434
    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
3435
	     ((uint)*pos)) + (nr1[0] << 8);
3436
    nr2[0]+=3;
3437
  }
3438
}
3439
3440
3441
/**
3442
   Calculate min_str and max_str that ranges a LIKE string.
3443
3444
   @param ptr        Pointer to LIKE pattern.
3445
   @param ptr_length Length of LIKE pattern.
3446
   @param escape     Escape character in LIKE.  (Normally '\').
3447
                     All escape characters should be removed
3448
                     from min_str and max_str.
3449
   @param res_length Length of min_str and max_str.
3450
   @param min_str    Smallest case sensitive string that ranges LIKE.
3451
                     Should be space padded to res_length.
3452
   @param max_str    Largest case sensitive string that ranges LIKE.
3453
                     Normally padded with the biggest character sort value.
3454
3455
   @return Optimization status.
3456
     @retval FALSE if LIKE pattern can be optimized
3457
     @rerval TRUE if LIKE can't be optimized.
3458
*/
3459
3460
my_bool my_like_range_ucs2(CHARSET_INFO *cs,
3461
			   const char *ptr, size_t ptr_length,
3462
			   pbool escape, pbool w_one, pbool w_many,
3463
			   size_t res_length,
3464
			   char *min_str,char *max_str,
3465
			   size_t *min_length,size_t *max_length)
3466
{
3467
  const char *end=ptr+ptr_length;
3468
  char *min_org=min_str;
3469
  char *min_end=min_str+res_length;
3470
  size_t charlen= res_length / cs->mbmaxlen;
3471
  const char *contraction_flags= cs->contractions ?
3472
             ((const char*) cs->contractions) + 0x40*0x40 : NULL;
3473
  
3474
  for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
3475
        ; ptr+=2, charlen--)
3476
  {
3477
    if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
3478
    {
3479
      ptr+=2;					/* Skip escape */
3480
      *min_str++= *max_str++ = ptr[0];
3481
      *min_str++= *max_str++ = ptr[1];
3482
      continue;
3483
    }
3484
    if (ptr[0] == '\0' && ptr[1] == w_one)	/* '_' in SQL */
3485
    {
3486
      *min_str++= (char) (cs->min_sort_char >> 8);
3487
      *min_str++= (char) (cs->min_sort_char & 255);
3488
      *max_str++= (char) (cs->max_sort_char >> 8);
3489
      *max_str++= (char) (cs->max_sort_char & 255);
3490
      continue;
3491
    }
3492
    if (ptr[0] == '\0' && ptr[1] == w_many)	/* '%' in SQL */
3493
    {
3494
fill_max_and_min:
3495
      /*
3496
        Calculate length of keys:
3497
        'a\0\0... is the smallest possible string when we have space expand
3498
        a\ff\ff... is the biggest possible string
3499
      */
3500
      *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
3501
                    res_length);
3502
      *max_length= res_length;
3503
      do {
3504
        *min_str++ = 0;
3505
	*min_str++ = 0;
3506
	*max_str++ = (char) (cs->max_sort_char >> 8);
3507
	*max_str++ = (char) (cs->max_sort_char & 255);
3508
      } while (min_str + 1 < min_end);
3509
      return FALSE;
3510
    }
3511
3512
    if (contraction_flags && ptr + 3 < end &&
3513
        ptr[0] == '\0' && contraction_flags[(uchar) ptr[1]])
3514
    {
3515
      /* Contraction head found */
3516
      if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
3517
      {
3518
        /* Contraction head followed by a wildcard, quit */
3519
        goto fill_max_and_min;
3520
      }
3521
      
3522
      /*
3523
        Check if the second letter can be contraction part,
3524
        and if two letters really produce a contraction.
3525
      */
3526
      if (ptr[2] == '\0' && contraction_flags[(uchar) ptr[3]] &&
3527
          cs->contractions[(ptr[1]-0x40)*0x40 + ptr[3] - 0x40])
3528
      {
3529
        /* Contraction found */
3530
        if (charlen == 1 || min_str + 2 >= min_end)
3531
        {
3532
          /* Full contraction doesn't fit, quit */
3533
          goto fill_max_and_min;
3534
        }
3535
        
3536
        /* Put contraction head */
3537
        *min_str++= *max_str++= *ptr++;
3538
        *min_str++= *max_str++= *ptr++;
3539
        charlen--;
3540
      }
3541
    }
3542
    /* Put contraction tail, or a single character */
3543
    *min_str++= *max_str++ = ptr[0];
3544
    *min_str++= *max_str++ = ptr[1];
3545
  }
3546
3547
  /* Temporary fix for handling w_one at end of string (key compression) */
3548
  {
3549
    char *tmp;
3550
    for (tmp= min_str ; tmp-1 > min_org && tmp[-1] == '\0' && tmp[-2]=='\0';)
3551
    {
3552
      *--tmp=' ';
3553
      *--tmp='\0';
3554
    }
3555
  }
3556
  
3557
  *min_length= *max_length = (size_t) (min_str - min_org);
3558
  while (min_str + 1 < min_end)
3559
  {
3560
    *min_str++ = *max_str++ = '\0';
3561
    *min_str++ = *max_str++ = ' ';      /* Because if key compression */
3562
  }
3563
  return FALSE;
3564
}
3565
3566
3567
static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
3568
{
3569
    NULL,		/* init */
3570
    my_strnncoll_ucs2,
3571
    my_strnncollsp_ucs2,
3572
    my_strnxfrm_ucs2,
3573
    my_strnxfrmlen_simple,
3574
    my_like_range_ucs2,
3575
    my_wildcmp_ucs2_ci,
3576
    my_strcasecmp_mb2_or_mb4,
3577
    my_instr_mb,
3578
    my_hash_sort_ucs2,
3579
    my_propagate_simple
3580
};
3581
3582
3583
static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
3584
{
3585
    NULL,		/* init */
3586
    my_strnncoll_ucs2_bin,
3587
    my_strnncollsp_ucs2_bin,
3588
    my_strnxfrm_ucs2_bin,
3589
    my_strnxfrmlen_simple,
3590
    my_like_range_ucs2,
3591
    my_wildcmp_ucs2_bin,
3592
    my_strcasecmp_mb2_or_mb4,
3593
    my_instr_mb,
3594
    my_hash_sort_ucs2_bin,
3595
    my_propagate_simple
3596
};
3597
3598
3599
MY_CHARSET_HANDLER my_charset_ucs2_handler=
3600
{
3601
    NULL,		/* init */
3602
    my_ismbchar_ucs2,	/* ismbchar     */
3603
    my_mbcharlen_ucs2,	/* mbcharlen    */
3604
    my_numchars_ucs2,
3605
    my_charpos_ucs2,
3606
    my_well_formed_len_ucs2,
3607
    my_lengthsp_mb2,
3608
    my_numcells_mb,
3609
    my_ucs2_uni,	/* mb_wc        */
3610
    my_uni_ucs2,	/* wc_mb        */
3611
    my_mb_ctype_mb,
3612
    my_caseup_str_mb2_or_mb4,
3613
    my_casedn_str_mb2_or_mb4,
3614
    my_caseup_ucs2,
3615
    my_casedn_ucs2,
3616
    my_snprintf_mb2,
3617
    my_l10tostr_mb2_or_mb4,
3618
    my_ll10tostr_mb2_or_mb4,
3619
    my_fill_mb2,
3620
    my_strntol_mb2_or_mb4,
3621
    my_strntoul_mb2_or_mb4,
3622
    my_strntoll_mb2_or_mb4,
3623
    my_strntoull_mb2_or_mb4,
3624
    my_strntod_mb2_or_mb4,
3625
    my_strtoll10_mb2,
3626
    my_strntoull10rnd_mb2_or_mb4,
3627
    my_scan_mb2
3628
};
3629
3630
3631
CHARSET_INFO my_charset_ucs2_general_ci=
3632
{
3633
    35,0,0,		/* number       */
3634
    MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII,
3635
    "ucs2",		/* cs name    */
3636
    "ucs2_general_ci",	/* name         */
3637
    "UCS-2 Unicode",    /* comment      */
3638
    NULL,		/* tailoring    */
3639
    ctype_ucs2,		/* ctype        */
3640
    to_lower_ucs2,	/* to_lower     */
3641
    to_upper_ucs2,	/* to_upper     */
3642
    to_upper_ucs2,	/* sort_order   */
3643
    NULL,		/* contractions */
3644
    NULL,		/* sort_order_big*/
3645
    NULL,		/* tab_to_uni   */
3646
    NULL,		/* tab_from_uni */
3647
    my_unicase_default, /* caseinfo     */
3648
    NULL,		/* state_map    */
3649
    NULL,		/* ident_map    */
3650
    1,			/* strxfrm_multiply */
3651
    1,                  /* caseup_multiply  */
3652
    1,                  /* casedn_multiply  */
3653
    2,			/* mbminlen     */
3654
    2,			/* mbmaxlen     */
3655
    0,			/* min_sort_char */
3656
    0xFFFF,		/* max_sort_char */
3657
    ' ',                /* pad char      */
3658
    0,                  /* escape_with_backslash_is_dangerous */
3659
    1,                  /* levels_for_compare */
3660
    1,                  /* levels_for_order   */
3661
    &my_charset_ucs2_handler,
3662
    &my_collation_ucs2_general_ci_handler
3663
};
3664
3665
CHARSET_INFO my_charset_ucs2_bin=
3666
{
3667
    90,0,0,		/* number       */
3668
    MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII,
3669
    "ucs2",		/* cs name    */
3670
    "ucs2_bin",		/* name         */
3671
    "UCS-2 Unicode",    /* comment      */
3672
    NULL,		/* tailoring    */
3673
    ctype_ucs2,		/* ctype        */
3674
    to_lower_ucs2,	/* to_lower     */
3675
    to_upper_ucs2,	/* to_upper     */
3676
    NULL,		/* sort_order   */
3677
    NULL,		/* contractions */
3678
    NULL,		/* sort_order_big*/
3679
    NULL,		/* tab_to_uni   */
3680
    NULL,		/* tab_from_uni */
3681
    my_unicase_default, /* caseinfo     */
3682
    NULL,		/* state_map    */
3683
    NULL,		/* ident_map    */
3684
    1,			/* strxfrm_multiply */
3685
    1,                  /* caseup_multiply  */
3686
    1,                  /* casedn_multiply  */
3687
    2,			/* mbminlen     */
3688
    2,			/* mbmaxlen     */
3689
    0,			/* min_sort_char */
3690
    0xFFFF,		/* max_sort_char */
3691
    ' ',                /* pad char      */
3692
    0,                  /* escape_with_backslash_is_dangerous */
3693
    1,                  /* levels_for_compare */
3694
    1,                  /* levels_for_order   */
3695
    &my_charset_ucs2_handler,
3696
    &my_collation_ucs2_bin_handler
3697
};
3698
3699
3700
#endif /* HAVE_CHARSET_ucs2 */