~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
1802.10.2 by Monty Taylor
Update all of the copyright headers to include the correct address.
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
1 by brian
clean slate
15
1241.9.1 by Monty Taylor
Removed global.h. Fixed all the headers.
16
#include "config.h"
1130.3.26 by Monty Taylor
Removed global.h from headers.
17
1241.9.64 by Monty Taylor
Moved remaining non-public portions of mysys and mystrings to drizzled/internal.
18
#include "drizzled/internal/m_string.h"
1241.9.61 by Monty Taylor
No more mystrings in drizzled/
19
#include "drizzled/charset_info.h"
1 by brian
clean slate
20
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
21
#include <algorithm>
22
23
using namespace std;
24
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
25
namespace drizzled
26
{
1 by brian
clean slate
27
28
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
29
size_t my_caseup_str_mb(const CHARSET_INFO * const  cs, char *str)
1 by brian
clean slate
30
{
205 by Brian Aker
uint32 -> uin32_t
31
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
32
  register unsigned char *map= cs->to_upper;
1 by brian
clean slate
33
  char *str_orig= str;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
34
1 by brian
clean slate
35
  while (*str)
36
  {
37
    /* Pointing after the '\0' is safe here. */
38
    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
39
      str+= l;
40
    else
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
41
    {
481 by Brian Aker
Remove all of uchar.
42
      *str= (char) map[(unsigned char)*str];
1 by brian
clean slate
43
      str++;
44
    }
45
  }
46
  return (size_t) (str - str_orig);
47
}
48
49
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
50
size_t my_casedn_str_mb(const CHARSET_INFO * const  cs, char *str)
1 by brian
clean slate
51
{
205 by Brian Aker
uint32 -> uin32_t
52
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
53
  register unsigned char *map= cs->to_lower;
1 by brian
clean slate
54
  char *str_orig= str;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
55
1 by brian
clean slate
56
  while (*str)
57
  {
58
    /* Pointing after the '\0' is safe here. */
59
    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
60
      str+= l;
61
    else
62
    {
481 by Brian Aker
Remove all of uchar.
63
      *str= (char) map[(unsigned char)*str];
1 by brian
clean slate
64
      str++;
65
    }
66
  }
67
  return (size_t) (str - str_orig);
68
}
69
70
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
71
size_t my_caseup_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
72
                    char *dst, size_t dstlen)
1 by brian
clean slate
73
{
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
74
#ifdef NDEBUG
75
  (void)dst;
76
  (void)dstlen;
77
#endif
205 by Brian Aker
uint32 -> uin32_t
78
  register uint32_t l;
1 by brian
clean slate
79
  register char *srcend= src + srclen;
481 by Brian Aker
Remove all of uchar.
80
  register unsigned char *map= cs->to_upper;
1 by brian
clean slate
81
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
82
  assert(src == dst && srclen == dstlen);
1 by brian
clean slate
83
  while (src < srcend)
84
  {
85
    if ((l=my_ismbchar(cs, src, srcend)))
86
      src+= l;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
87
    else
1 by brian
clean slate
88
    {
481 by Brian Aker
Remove all of uchar.
89
      *src=(char) map[(unsigned char) *src];
1 by brian
clean slate
90
      src++;
91
    }
92
  }
93
  return srclen;
94
}
95
96
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
97
size_t my_casedn_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
98
                    char *dst, size_t dstlen)
1 by brian
clean slate
99
{
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
100
#ifdef NDEBUG
101
  (void)dst;
102
  (void)dstlen;
103
#endif
205 by Brian Aker
uint32 -> uin32_t
104
  register uint32_t l;
1 by brian
clean slate
105
  register char *srcend= src + srclen;
481 by Brian Aker
Remove all of uchar.
106
  register unsigned char *map=cs->to_lower;
1 by brian
clean slate
107
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
108
  assert(src == dst && srclen == dstlen);
1 by brian
clean slate
109
  while (src < srcend)
110
  {
111
    if ((l= my_ismbchar(cs, src, srcend)))
112
      src+= l;
113
    else
114
    {
481 by Brian Aker
Remove all of uchar.
115
      *src= (char) map[(unsigned char)*src];
1 by brian
clean slate
116
      src++;
117
    }
118
  }
119
  return srclen;
120
}
121
122
123
/*
124
  my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
125
 */
126
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
127
int my_strcasecmp_mb(const CHARSET_INFO * const  cs,const char *s, const char *t)
1 by brian
clean slate
128
{
205 by Brian Aker
uint32 -> uin32_t
129
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
130
  register unsigned char *map=cs->to_upper;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
131
1 by brian
clean slate
132
  while (*s && *t)
133
  {
134
    /* Pointing after the '\0' is safe here. */
135
    if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen)))
136
    {
137
      while (l--)
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
138
        if (*s++ != *t++)
1 by brian
clean slate
139
          return 1;
140
    }
141
    else if (my_mbcharlen(cs, *t) > 1)
142
      return 1;
481 by Brian Aker
Remove all of uchar.
143
    else if (map[(unsigned char) *s++] != map[(unsigned char) *t++])
1 by brian
clean slate
144
      return 1;
145
  }
146
  /* At least one of '*s' and '*t' is zero here. */
147
  return (*t != *s);
148
}
149
150
151
/*
152
** Compare string against string with wildcard
153
**	0 if matched
154
**	-1 if not matched with wildcard
155
**	 1 if matched with wildcard
156
*/
157
158
#define INC_PTR(cs,A,B) A+=(my_ismbchar(cs,A,B) ? my_ismbchar(cs,A,B) : 1)
159
481 by Brian Aker
Remove all of uchar.
160
#define likeconv(s,A) (unsigned char) (s)->sort_order[(unsigned char) (A)]
1 by brian
clean slate
161
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
162
int my_wildcmp_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
163
		  const char *str,const char *str_end,
164
		  const char *wildstr,const char *wildend,
165
		  int escape, int w_one, int w_many)
166
{
167
  int result= -1;				/* Not found, using wildcards */
168
169
  while (wildstr != wildend)
170
  {
171
    while (*wildstr != w_many && *wildstr != w_one)
172
    {
173
      int l;
174
      if (*wildstr == escape && wildstr+1 != wildend)
175
	wildstr++;
176
      if ((l = my_ismbchar(cs, wildstr, wildend)))
177
      {
178
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
179
	      return 1;
180
	  str += l;
181
	  wildstr += l;
182
      }
183
      else
184
      if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
185
	return(1);				/* No match */
186
      if (wildstr == wildend)
187
	return (str != str_end);		/* Match if both are at end */
188
      result=1;					/* Found an anchor char */
189
    }
190
    if (*wildstr == w_one)
191
    {
192
      do
193
      {
194
	if (str == str_end)			/* Skip one char if possible */
195
	  return (result);
196
	INC_PTR(cs,str,str_end);
197
      } while (++wildstr < wildend && *wildstr == w_one);
198
      if (wildstr == wildend)
199
	break;
200
    }
201
    if (*wildstr == w_many)
202
    {						/* Found w_many */
481 by Brian Aker
Remove all of uchar.
203
      unsigned char cmp;
1 by brian
clean slate
204
      const char* mb = wildstr;
205
      int mb_len=0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
206
1 by brian
clean slate
207
      wildstr++;
208
      /* Remove any '%' and '_' from the wild search string */
209
      for (; wildstr != wildend ; wildstr++)
210
      {
211
	if (*wildstr == w_many)
212
	  continue;
213
	if (*wildstr == w_one)
214
	{
215
	  if (str == str_end)
216
	    return (-1);
217
	  INC_PTR(cs,str,str_end);
218
	  continue;
219
	}
220
	break;					/* Not a wild character */
221
      }
222
      if (wildstr == wildend)
223
	return(0);				/* Ok if w_many is last */
224
      if (str == str_end)
225
	return -1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
226
1 by brian
clean slate
227
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
228
	cmp= *++wildstr;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
229
1 by brian
clean slate
230
      mb=wildstr;
231
      mb_len= my_ismbchar(cs, wildstr, wildend);
232
      INC_PTR(cs,wildstr,wildend);		/* This is compared trough cmp */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
233
      cmp=likeconv(cs,cmp);
1 by brian
clean slate
234
      do
235
      {
236
        for (;;)
237
        {
238
          if (str >= str_end)
239
            return -1;
240
          if (mb_len)
241
          {
242
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
243
            {
244
              str += mb_len;
245
              break;
246
            }
247
          }
248
          else if (!my_ismbchar(cs, str, str_end) &&
249
                   likeconv(cs,*str) == cmp)
250
          {
251
            str++;
252
            break;
253
          }
254
          INC_PTR(cs,str, str_end);
255
        }
256
	{
257
	  int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,
258
                                w_many);
259
	  if (tmp <= 0)
260
	    return (tmp);
261
	}
262
      } while (str != str_end && wildstr[0] != w_many);
263
      return(-1);
264
    }
265
  }
266
  return (str != str_end ? 1 : 0);
267
}
268
269
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
270
size_t my_numchars_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
271
		      const char *pos, const char *end)
272
{
273
  register size_t count= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
274
  while (pos < end)
1 by brian
clean slate
275
  {
482 by Brian Aker
Remove uint.
276
    uint32_t mb_len;
1 by brian
clean slate
277
    pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1;
278
    count++;
279
  }
280
  return count;
281
}
282
283
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
284
size_t my_charpos_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
285
		     const char *pos, const char *end, size_t length)
286
{
287
  const char *start= pos;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
288
1 by brian
clean slate
289
  while (length && pos < end)
290
  {
482 by Brian Aker
Remove uint.
291
    uint32_t mb_len;
1 by brian
clean slate
292
    pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1;
293
    length--;
294
  }
295
  return (size_t) (length ? end+2-start : pos-start);
296
}
297
298
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
299
size_t my_well_formed_len_mb(const CHARSET_INFO * const cs, const char *b, const char *e,
1 by brian
clean slate
300
                             size_t pos, int *error)
301
{
302
  const char *b_start= b;
303
  *error= 0;
304
  while (pos)
305
  {
306
    my_wc_t wc;
307
    int mb_len;
308
481 by Brian Aker
Remove all of uchar.
309
    if ((mb_len= cs->cset->mb_wc(cs, &wc, (const unsigned char*) b, (const unsigned char*) e)) <= 0)
1 by brian
clean slate
310
    {
311
      *error= b < e ? 1 : 0;
312
      break;
313
    }
314
    b+= mb_len;
315
    pos--;
316
  }
317
  return (size_t) (b - b_start);
318
}
319
320
482 by Brian Aker
Remove uint.
321
uint32_t my_instr_mb(const CHARSET_INFO * const cs,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
322
                 const char *b, size_t b_length,
1 by brian
clean slate
323
                 const char *s, size_t s_length,
482 by Brian Aker
Remove uint.
324
                 my_match_t *match, uint32_t nmatch)
1 by brian
clean slate
325
{
326
  register const char *end, *b0;
327
  int res= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
328
1 by brian
clean slate
329
  if (s_length <= b_length)
330
  {
331
    if (!s_length)
332
    {
333
      if (nmatch)
334
      {
335
        match->beg= 0;
336
        match->end= 0;
337
        match->mb_len= 0;
338
      }
339
      return 1;		/* Empty string is always found */
340
    }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
341
1 by brian
clean slate
342
    b0= b;
343
    end= b+b_length-s_length+1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
344
1 by brian
clean slate
345
    while (b < end)
346
    {
347
      int mb_len;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
348
349
      if (!cs->coll->strnncoll(cs, (const unsigned char*) b, s_length,
481 by Brian Aker
Remove all of uchar.
350
                                   (const unsigned char*) s, s_length, 0))
1 by brian
clean slate
351
      {
352
        if (nmatch)
353
        {
354
          match[0].beg= 0;
355
          match[0].end= (size_t) (b-b0);
356
          match[0].mb_len= res;
357
          if (nmatch > 1)
358
          {
359
            match[1].beg= match[0].end;
360
            match[1].end= match[0].end+s_length;
361
            match[1].mb_len= 0;	/* Not computed */
362
          }
363
        }
364
        return 2;
365
      }
366
      mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1;
367
      b+= mb_len;
368
      b_length-= mb_len;
369
      res++;
370
    }
371
  }
372
  return 0;
373
}
374
375
376
/* BINARY collations handlers for MB charsets */
377
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
378
int my_strnncoll_mb_bin(const CHARSET_INFO * const,
481 by Brian Aker
Remove all of uchar.
379
                        const unsigned char *s, size_t slen,
380
                        const unsigned char *t, size_t tlen,
276 by Brian Aker
Cleaned out my_bool from strings.
381
                        bool t_is_prefix)
1 by brian
clean slate
382
{
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
383
  size_t len= min(slen,tlen);
1 by brian
clean slate
384
  int cmp= memcmp(s,t,len);
385
  return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
386
}
387
388
389
/*
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
390
  Compare two strings.
391
1 by brian
clean slate
392
  SYNOPSIS
393
    my_strnncollsp_mb_bin()
394
    cs			Chararacter set
395
    s			String to compare
396
    slen		Length of 's'
397
    t			String to compare
398
    tlen		Length of 't'
399
    diff_if_only_endspace_difference
400
		        Set to 1 if the strings should be regarded as different
401
                        if they only difference in end space
402
403
  NOTE
404
   This function is used for character strings with binary collations.
405
   The shorter string is extended with end space to be as long as the longer
406
   one.
407
408
  RETURN
409
    A negative number if s < t
410
    A positive number if s > t
411
    0 if strings are equal
412
*/
413
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
414
int my_strnncollsp_mb_bin(const CHARSET_INFO * const,
481 by Brian Aker
Remove all of uchar.
415
                          const unsigned char *a, size_t a_length,
416
                          const unsigned char *b, size_t b_length,
276 by Brian Aker
Cleaned out my_bool from strings.
417
                          bool diff_if_only_endspace_difference)
1 by brian
clean slate
418
{
481 by Brian Aker
Remove all of uchar.
419
  const unsigned char *end;
1 by brian
clean slate
420
  size_t length;
421
  int res;
422
423
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
424
  diff_if_only_endspace_difference= 0;
425
#endif
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
426
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
427
  end= a + (length= min(a_length, b_length));
1 by brian
clean slate
428
  while (a < end)
429
  {
430
    if (*a++ != *b++)
431
      return ((int) a[-1] - (int) b[-1]);
432
  }
433
  res= 0;
434
  if (a_length != b_length)
435
  {
436
    int swap= 1;
437
    if (diff_if_only_endspace_difference)
438
      res= 1;                                   /* Assume 'a' is bigger */
439
    /*
440
      Check the next not space character of the longer key. If it's < ' ',
441
      then it's smaller than the other key.
442
    */
443
    if (a_length < b_length)
444
    {
445
      /* put shorter key in s */
446
      a_length= b_length;
447
      a= b;
448
      swap= -1;					/* swap sign of result */
449
      res= -res;
450
    }
451
    for (end= a + a_length-length; a < end ; a++)
452
    {
453
      if (*a != ' ')
454
	return (*a < ' ') ? -swap : swap;
455
    }
456
  }
457
  return res;
458
}
459
460
461
/*
462
  Copy one non-ascii character.
463
  "dst" must have enough room for the character.
464
  Note, we don't use sort_order[] in this macros.
465
  This is correct even for case insensitive collations:
466
  - basic Latin letters are processed outside this macros;
467
  - for other characters sort_order[x] is equal to x.
468
*/
469
#define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se)                  \
470
{                                                                        \
471
  switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \
472
  case 4:                                                                \
473
    *dst++= *src++;                                                      \
474
    /* fall through */                                                   \
475
  case 3:                                                                \
476
    *dst++= *src++;                                                      \
477
    /* fall through */                                                   \
478
  case 2:                                                                \
479
    *dst++= *src++;                                                      \
480
    /* fall through */                                                   \
481
  case 0:                                                                \
482
    *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */  \
483
  }                                                                      \
484
}
485
486
487
/*
488
  For character sets with two or three byte multi-byte
489
  characters having multibyte weights *equal* to their codes:
490
  cp932, euckr, gb2312, sjis, eucjpms, ujis.
491
*/
492
size_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
493
my_strnxfrm_mb(const CHARSET_INFO * const cs,
482 by Brian Aker
Remove uint.
494
               unsigned char *dst, size_t dstlen, uint32_t nweights,
495
               const unsigned char *src, size_t srclen, uint32_t flags)
1 by brian
clean slate
496
{
481 by Brian Aker
Remove all of uchar.
497
  unsigned char *d0= dst;
498
  unsigned char *de= dst + dstlen;
499
  const unsigned char *se= src + srclen;
500
  const unsigned char *sort_order= cs->sort_order;
1 by brian
clean slate
501
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
502
  assert(cs->mbmaxlen <= 4);
1 by brian
clean slate
503
504
  /*
505
    If "srclen" is smaller than both "dstlen" and "nweights"
506
    then we can run a simplified loop -
507
    without checking "nweights" and "de".
508
  */
509
  if (dstlen >= srclen && nweights >= srclen)
510
  {
511
    if (sort_order)
512
    {
513
      /* Optimized version for a case insensitive collation */
514
      for (; src < se; nweights--)
515
      {
516
        if (*src < 128) /* quickly catch ASCII characters */
517
          *dst++= sort_order[*src++];
518
        else
519
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
520
      }
521
    }
522
    else
523
    {
524
      /* Optimized version for a case sensitive collation (no sort_order) */
525
      for (; src < se; nweights--)
526
      {
527
        if (*src < 128) /* quickly catch ASCII characters */
528
          *dst++= *src++;
529
        else
530
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
531
      }
532
    }
533
    goto pad;
534
  }
535
536
  /*
537
    A thourough loop, checking all possible limits:
538
    "se", "nweights" and "de".
539
  */
540
  for (; src < se && nweights; nweights--)
541
  {
542
    int chlen;
543
    if (*src < 128 ||
544
        !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se)))
545
    {
546
      /* Single byte character */
547
      if (dst >= de)
548
        break;
549
      *dst++= sort_order ? sort_order[*src++] : *src++;
550
    }
551
    else
552
    {
553
      /* Multi-byte character */
554
      if (dst + chlen > de)
555
        break;
556
      *dst++= *src++;
557
      *dst++= *src++;
558
      if (chlen == 3)
559
        *dst++= *src++;
560
    }
561
  }
562
563
pad:
564
  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
565
}
566
567
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
568
int my_strcasecmp_mb_bin(const CHARSET_INFO * const,
1 by brian
clean slate
569
                         const char *s, const char *t)
570
{
571
  return strcmp(s,t);
572
}
573
574
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
575
void my_hash_sort_mb_bin(const CHARSET_INFO * const,
576
                         const unsigned char *key, size_t len,
577
                         uint32_t *nr1, uint32_t *nr2)
1 by brian
clean slate
578
{
481 by Brian Aker
Remove all of uchar.
579
  const unsigned char *pos = key;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
580
1 by brian
clean slate
581
  /*
582
     Remove trailing spaces. We have to do this to be able to compare
583
    'A ' and 'A' as identical
584
  */
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
585
  key= internal::skip_trailing_space(key, len);
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
586
481 by Brian Aker
Remove all of uchar.
587
  for (; pos < (const unsigned char*) key ; pos++)
1 by brian
clean slate
588
  {
895 by Brian Aker
Completion (?) of uint conversion.
589
    nr1[0]^=(ulong) ((((uint32_t) nr1[0] & 63)+nr2[0]) *
590
	     ((uint32_t)*pos)) + (nr1[0] << 8);
1 by brian
clean slate
591
    nr2[0]+=3;
592
  }
593
}
594
595
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
596
/*
1 by brian
clean slate
597
  Fill the given buffer with 'maximum character' for given charset
598
  SYNOPSIS
599
      pad_max_char()
600
      cs   Character set
601
      str  Start of buffer to fill
602
      end  End of buffer to fill
603
604
  DESCRIPTION
605
      Write max key:
606
      - for non-Unicode character sets:
607
        just set to 255.
608
      - for Unicode character set (utf-8):
609
        create a buffer with multibyte representation of the max_sort_char
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
610
        character, and copy it into max_str in a loop.
1 by brian
clean slate
611
*/
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
612
static void pad_max_char(const CHARSET_INFO * const cs, char *str, char *end)
1 by brian
clean slate
613
{
614
  char buf[10];
615
  char buflen;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
616
1 by brian
clean slate
617
  if (!(cs->state & MY_CS_UNICODE))
618
  {
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
619
    memset(str, 255, end - str);
1 by brian
clean slate
620
    return;
621
  }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
622
481 by Brian Aker
Remove all of uchar.
623
  buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (unsigned char*) buf,
624
                          (unsigned char*) buf + sizeof(buf));
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
625
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
626
  assert(buflen > 0);
1 by brian
clean slate
627
  do
628
  {
629
    if ((str + buflen) < end)
630
    {
631
      /* Enough space for the characer */
632
      memcpy(str, buf, buflen);
633
      str+= buflen;
634
    }
635
    else
636
    {
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
637
      /*
1 by brian
clean slate
638
        There is no space for whole multibyte
639
        character, then add trailing spaces.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
640
      */
1 by brian
clean slate
641
      *str++= ' ';
642
    }
643
  } while (str < end);
644
}
645
646
/*
647
** Calculate min_str and max_str that ranges a LIKE string.
648
** Arguments:
649
** ptr		Pointer to LIKE string.
650
** ptr_length	Length of LIKE string.
651
** escape	Escape character in LIKE.  (Normally '\').
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
652
**		All escape characters should be removed from
653
**              min_str and max_str
654
** w_one        Single char matching char in LIKE (Normally '_')
655
** w_many       Multiple char matching char in LIKE (Normally '%')
1 by brian
clean slate
656
** res_length	Length of min_str and max_str.
657
** min_str	Smallest case sensitive string that ranges LIKE.
658
**		Should be space padded to res_length.
659
** max_str	Largest case sensitive string that ranges LIKE.
660
**		Normally padded with the biggest character sort value.
661
**
662
** The function should return 0 if ok and 1 if the LIKE string can't be
663
** optimized !
664
*/
665
276 by Brian Aker
Cleaned out my_bool from strings.
666
bool my_like_range_mb(const CHARSET_INFO * const cs,
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
667
                         const char *ptr,size_t ptr_length,
668
                         char escape, char w_one, char w_many,
669
                         size_t res_length,
670
                         char *min_str,char *max_str,
671
                         size_t *min_length,size_t *max_length)
1 by brian
clean slate
672
{
482 by Brian Aker
Remove uint.
673
  uint32_t mb_len;
1 by brian
clean slate
674
  const char *end= ptr + ptr_length;
675
  char *min_org= min_str;
676
  char *min_end= min_str + res_length;
677
  char *max_end= max_str + res_length;
678
  size_t maxcharlen= res_length / cs->mbmaxlen;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
679
  const char *contraction_flags= cs->contractions ?
1 by brian
clean slate
680
              ((const char*) cs->contractions) + 0x40*0x40 : NULL;
681
682
  for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
683
  {
684
    /* We assume here that escape, w_any, w_namy are one-byte characters */
685
    if (*ptr == escape && ptr+1 != end)
686
      ptr++;                                    /* Skip escape */
687
    else if (*ptr == w_one || *ptr == w_many)   /* '_' and '%' in SQL */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
688
    {
1 by brian
clean slate
689
fill_max_and_min:
690
      /*
691
        Calculate length of keys:
692
        'a\0\0... is the smallest possible string when we have space expand
693
        a\ff\ff... is the biggest possible string
694
      */
695
      *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
696
                    res_length);
697
      *max_length= res_length;
698
      /* Create min key  */
699
      do
700
      {
701
	*min_str++= (char) cs->min_sort_char;
702
      } while (min_str != min_end);
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
703
704
      /*
1 by brian
clean slate
705
        Write max key: create a buffer with multibyte
706
        representation of the max_sort_char character,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
707
        and copy it into max_str in a loop.
1 by brian
clean slate
708
      */
709
      *max_length= res_length;
710
      pad_max_char(cs, max_str, max_end);
711
      return 0;
712
    }
713
    if ((mb_len= my_ismbchar(cs, ptr, end)) > 1)
714
    {
715
      if (ptr+mb_len > end || min_str+mb_len > min_end)
716
        break;
717
      while (mb_len--)
718
       *min_str++= *max_str++= *ptr++;
719
    }
720
    else
721
    {
722
      /*
723
        Special case for collations with contractions.
724
        For example, in Chezh, 'ch' is a separate letter
725
        which is sorted between 'h' and 'i'.
726
        If the pattern 'abc%', 'c' at the end can mean:
727
        - letter 'c' itself,
728
        - beginning of the contraction 'ch'.
729
730
        If we simply return this LIKE range:
731
732
         'abc\min\min\min' and 'abc\max\max\max'
733
734
        then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
735
        will only find values starting from 'abc[^h]',
736
        but won't find values starting from 'abch'.
737
738
        We must ignore contraction heads followed by w_one or w_many.
739
        ('Contraction head' means any letter which can be the first
740
        letter in a contraction)
741
742
        For example, for Czech 'abc%', we will return LIKE range,
743
        which is equal to LIKE range for 'ab%':
744
745
        'ab\min\min\min\min' and 'ab\max\max\max\max'.
746
747
      */
748
      if (contraction_flags && ptr + 1 < end &&
481 by Brian Aker
Remove all of uchar.
749
          contraction_flags[(unsigned char) *ptr])
1 by brian
clean slate
750
      {
751
        /* Ptr[0] is a contraction head. */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
752
1 by brian
clean slate
753
        if (ptr[1] == w_one || ptr[1] == w_many)
754
        {
755
          /* Contraction head followed by a wildcard, quit. */
756
          goto fill_max_and_min;
757
        }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
758
1 by brian
clean slate
759
        /*
760
          Some letters can be both contraction heads and contraction tails.
761
          For example, in Danish 'aa' is a separate single letter which
762
          is sorted after 'z'. So 'a' can be both head and tail.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
763
1 by brian
clean slate
764
          If ptr[0]+ptr[1] is a contraction,
765
          then put both letters together.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
766
1 by brian
clean slate
767
          If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
768
          is not a contraction, then we put only ptr[0],
769
          and continue with ptr[1] on the next loop.
770
        */
481 by Brian Aker
Remove all of uchar.
771
        if (contraction_flags[(unsigned char) ptr[1]] &&
1 by brian
clean slate
772
            cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
773
        {
774
          /* Contraction found */
775
          if (maxcharlen == 1 || min_str + 1 >= min_end)
776
          {
777
            /* Both contraction parts don't fit, quit */
778
            goto fill_max_and_min;
779
          }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
780
1 by brian
clean slate
781
          /* Put contraction head */
782
          *min_str++= *max_str++= *ptr++;
783
          maxcharlen--;
784
        }
785
      }
786
      /* Put contraction tail, or a single character */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
787
      *min_str++= *max_str++= *ptr++;
1 by brian
clean slate
788
    }
789
  }
790
791
  *min_length= *max_length = (size_t) (min_str - min_org);
792
  while (min_str != min_end)
793
    *min_str++= *max_str++= ' ';           /* Because if key compression */
794
  return 0;
795
}
796
797
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
798
int my_wildcmp_mb_bin(const CHARSET_INFO * const cs,
1 by brian
clean slate
799
                      const char *str,const char *str_end,
800
                      const char *wildstr,const char *wildend,
801
                      int escape, int w_one, int w_many)
802
{
803
  int result= -1;				/* Not found, using wildcards */
804
805
  while (wildstr != wildend)
806
  {
807
    while (*wildstr != w_many && *wildstr != w_one)
808
    {
809
      int l;
810
      if (*wildstr == escape && wildstr+1 != wildend)
811
	wildstr++;
812
      if ((l = my_ismbchar(cs, wildstr, wildend)))
813
      {
814
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
815
	      return 1;
816
	  str += l;
817
	  wildstr += l;
818
      }
819
      else
820
      if (str == str_end || *wildstr++ != *str++)
821
	return(1);				/* No match */
822
      if (wildstr == wildend)
823
	return (str != str_end);		/* Match if both are at end */
824
      result=1;					/* Found an anchor char */
825
    }
826
    if (*wildstr == w_one)
827
    {
828
      do
829
      {
830
	if (str == str_end)			/* Skip one char if possible */
831
	  return (result);
832
	INC_PTR(cs,str,str_end);
833
      } while (++wildstr < wildend && *wildstr == w_one);
834
      if (wildstr == wildend)
835
	break;
836
    }
837
    if (*wildstr == w_many)
838
    {						/* Found w_many */
481 by Brian Aker
Remove all of uchar.
839
      unsigned char cmp;
1 by brian
clean slate
840
      const char* mb = wildstr;
841
      int mb_len=0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
842
1 by brian
clean slate
843
      wildstr++;
844
      /* Remove any '%' and '_' from the wild search string */
845
      for (; wildstr != wildend ; wildstr++)
846
      {
847
	if (*wildstr == w_many)
848
	  continue;
849
	if (*wildstr == w_one)
850
	{
851
	  if (str == str_end)
852
	    return (-1);
853
	  INC_PTR(cs,str,str_end);
854
	  continue;
855
	}
856
	break;					/* Not a wild character */
857
      }
858
      if (wildstr == wildend)
859
	return(0);				/* Ok if w_many is last */
860
      if (str == str_end)
861
	return -1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
862
1 by brian
clean slate
863
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
864
	cmp= *++wildstr;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
865
1 by brian
clean slate
866
      mb=wildstr;
867
      mb_len= my_ismbchar(cs, wildstr, wildend);
868
      INC_PTR(cs,wildstr,wildend);		/* This is compared trough cmp */
869
      do
870
      {
871
        for (;;)
872
        {
873
          if (str >= str_end)
874
            return -1;
875
          if (mb_len)
876
          {
877
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
878
            {
879
              str += mb_len;
880
              break;
881
            }
882
          }
883
          else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
884
          {
885
            str++;
886
            break;
887
          }
888
          INC_PTR(cs,str, str_end);
889
        }
890
	{
891
	  int tmp=my_wildcmp_mb_bin(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
892
	  if (tmp <= 0)
893
	    return (tmp);
894
	}
895
      } while (str != str_end && wildstr[0] != w_many);
896
      return(-1);
897
    }
898
  }
899
  return (str != str_end ? 1 : 0);
900
}
901
902
903
/*
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
904
  Data was produced from EastAsianWidth.txt
1 by brian
clean slate
905
  using utt11-dump utility.
906
*/
907
static char pg11[256]=
908
{
909
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
910
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
911
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,
912
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
913
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
914
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
915
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
916
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
917
};
918
919
static char pg23[256]=
920
{
921
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
922
0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
923
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
924
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
925
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
926
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
927
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
928
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
929
};
930
931
static char pg2E[256]=
932
{
933
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
934
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
935
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
936
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
937
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
938
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
939
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
940
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
941
};
942
943
static char pg2F[256]=
944
{
945
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
946
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
947
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
948
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
949
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
950
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
951
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
952
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0
953
};
954
955
static char pg30[256]=
956
{
957
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
958
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
959
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
960
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
961
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,
962
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
963
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
964
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
965
};
966
967
static char pg31[256]=
968
{
969
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
970
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
971
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
972
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
973
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
974
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
975
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
976
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
977
};
978
979
static char pg32[256]=
980
{
981
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
982
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
983
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
984
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,
985
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
986
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
987
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
988
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
989
};
990
991
static char pg4D[256]=
992
{
993
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
994
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
995
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
996
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
997
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
998
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
999
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1000
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1001
};
1002
1003
static char pg9F[256]=
1004
{
1005
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1006
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1007
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1008
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1009
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1010
1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1011
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1012
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1013
};
1014
1015
static char pgA4[256]=
1016
{
1017
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1018
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1019
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1020
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1021
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1022
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1023
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1024
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1025
};
1026
1027
static char pgD7[256]=
1028
{
1029
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1030
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1031
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1032
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1033
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1034
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1035
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1036
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1037
};
1038
1039
static char pgFA[256]=
1040
{
1041
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1042
1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1043
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1044
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1045
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1046
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1047
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1048
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1049
};
1050
1051
static char pgFE[256]=
1052
{
1053
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1054
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1055
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,
1056
1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1057
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1058
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1059
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1060
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1061
};
1062
1063
static char pgFF[256]=
1064
{
1065
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1066
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1067
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1068
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1069
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1070
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1071
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1072
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1073
};
1074
1810.2.1 by tdavies
Modified File: ../drizzled/ctype-mb.cc at line 1075 converted the static C struct which instatiats the utr11_data array to a C++ class
1075
static class {
1076
public:
1077
  int page; 
1078
  char *p;
1079
} 
1080
  utr11_data[256]=
1 by brian
clean slate
1081
{
1082
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1083
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1084
{0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1085
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1086
{0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1087
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F},
1088
{0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1089
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1090
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1091
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL},
1092
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1093
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1094
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1095
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1096
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1097
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1098
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1099
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1100
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1101
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F},
1102
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL},
1103
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1104
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1105
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1106
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1107
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1108
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7},
1109
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1110
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1111
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1112
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1113
{0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF}
1114
};
1115
1116
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1117
size_t my_numcells_mb(const CHARSET_INFO * const cs, const char *b, const char *e)
1 by brian
clean slate
1118
{
1119
  my_wc_t wc;
1120
  size_t clen= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
1121
1 by brian
clean slate
1122
  while (b < e)
1123
  {
1124
    int mb_len;
482 by Brian Aker
Remove uint.
1125
    uint32_t pg;
481 by Brian Aker
Remove all of uchar.
1126
    if ((mb_len= cs->cset->mb_wc(cs, &wc, (unsigned char*) b, (unsigned char*) e)) <= 0 ||
1 by brian
clean slate
1127
        wc > 0xFFFF)
1128
    {
1129
      /*
1130
        Let's think a wrong sequence takes 1 dysplay cell.
1131
        Also, consider supplementary characters as taking one cell.
1132
      */
1133
      mb_len= 1;
1134
      b++;
1135
      continue;
1136
    }
1137
    b+= mb_len;
1138
    pg= (wc >> 8) & 0xFF;
1139
    clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page;
1140
    clen++;
1141
  }
1142
  return clen;
1143
}
1144
1145
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1146
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1147
int my_mb_ctype_mb(const CHARSET_INFO * const cs, int *ctype,
481 by Brian Aker
Remove all of uchar.
1148
                   const unsigned char *s, const unsigned char *e)
1 by brian
clean slate
1149
{
1150
  my_wc_t wc;
1151
  int res= cs->cset->mb_wc(cs, &wc, s, e);
1152
  if (res <= 0 || wc > 0xFFFF)
1153
    *ctype= 0;
1154
  else
1155
    *ctype= my_uni_ctype[wc>>8].ctype ?
1156
            my_uni_ctype[wc>>8].ctype[wc&0xFF] :
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
1157
            my_uni_ctype[wc>>8].pctype;
1 by brian
clean slate
1158
  return res;
1159
}
1160
1161
1162
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
1163
{
1164
    NULL,              /* init */
1165
    my_strnncoll_mb_bin,
1166
    my_strnncollsp_mb_bin,
1167
    my_strnxfrm_mb,
1168
    my_strnxfrmlen_simple,
1169
    my_like_range_mb,
1170
    my_wildcmp_mb_bin,
1171
    my_strcasecmp_mb_bin,
1172
    my_instr_mb,
1173
    my_hash_sort_mb_bin,
1174
    my_propagate_simple
1175
};
1176
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1177
} /* namespace drizzled */