~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
1802.10.2 by Monty Taylor
Update all of the copyright headers to include the correct address.
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
1 by brian
clean slate
15
1241.9.1 by Monty Taylor
Removed global.h. Fixed all the headers.
16
#include "config.h"
1130.3.26 by Monty Taylor
Removed global.h from headers.
17
1241.9.64 by Monty Taylor
Moved remaining non-public portions of mysys and mystrings to drizzled/internal.
18
#include "drizzled/internal/m_string.h"
1241.9.61 by Monty Taylor
No more mystrings in drizzled/
19
#include "drizzled/charset_info.h"
1 by brian
clean slate
20
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
21
#include <algorithm>
22
23
using namespace std;
24
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
25
namespace drizzled
26
{
1 by brian
clean slate
27
28
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
29
size_t my_caseup_str_mb(const CHARSET_INFO * const  cs, char *str)
1 by brian
clean slate
30
{
205 by Brian Aker
uint32 -> uin32_t
31
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
32
  register unsigned char *map= cs->to_upper;
1 by brian
clean slate
33
  char *str_orig= str;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
34
1 by brian
clean slate
35
  while (*str)
36
  {
37
    /* Pointing after the '\0' is safe here. */
38
    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
39
      str+= l;
40
    else
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
41
    {
481 by Brian Aker
Remove all of uchar.
42
      *str= (char) map[(unsigned char)*str];
1 by brian
clean slate
43
      str++;
44
    }
45
  }
46
  return (size_t) (str - str_orig);
47
}
48
49
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
50
size_t my_casedn_str_mb(const CHARSET_INFO * const  cs, char *str)
1 by brian
clean slate
51
{
205 by Brian Aker
uint32 -> uin32_t
52
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
53
  register unsigned char *map= cs->to_lower;
1 by brian
clean slate
54
  char *str_orig= str;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
55
1 by brian
clean slate
56
  while (*str)
57
  {
58
    /* Pointing after the '\0' is safe here. */
59
    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
60
      str+= l;
61
    else
62
    {
481 by Brian Aker
Remove all of uchar.
63
      *str= (char) map[(unsigned char)*str];
1 by brian
clean slate
64
      str++;
65
    }
66
  }
67
  return (size_t) (str - str_orig);
68
}
69
70
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
71
size_t my_caseup_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
72
                    char *dst, size_t dstlen)
1 by brian
clean slate
73
{
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
74
#ifdef NDEBUG
75
  (void)dst;
76
  (void)dstlen;
77
#endif
205 by Brian Aker
uint32 -> uin32_t
78
  register uint32_t l;
1 by brian
clean slate
79
  register char *srcend= src + srclen;
481 by Brian Aker
Remove all of uchar.
80
  register unsigned char *map= cs->to_upper;
1 by brian
clean slate
81
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
82
  assert(src == dst && srclen == dstlen);
1 by brian
clean slate
83
  while (src < srcend)
84
  {
85
    if ((l=my_ismbchar(cs, src, srcend)))
86
      src+= l;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
87
    else
1 by brian
clean slate
88
    {
481 by Brian Aker
Remove all of uchar.
89
      *src=(char) map[(unsigned char) *src];
1 by brian
clean slate
90
      src++;
91
    }
92
  }
93
  return srclen;
94
}
95
96
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
97
size_t my_casedn_mb(const CHARSET_INFO * const  cs, char *src, size_t srclen,
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
98
                    char *dst, size_t dstlen)
1 by brian
clean slate
99
{
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
100
#ifdef NDEBUG
101
  (void)dst;
102
  (void)dstlen;
103
#endif
205 by Brian Aker
uint32 -> uin32_t
104
  register uint32_t l;
1 by brian
clean slate
105
  register char *srcend= src + srclen;
481 by Brian Aker
Remove all of uchar.
106
  register unsigned char *map=cs->to_lower;
1 by brian
clean slate
107
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
108
  assert(src == dst && srclen == dstlen);
1 by brian
clean slate
109
  while (src < srcend)
110
  {
111
    if ((l= my_ismbchar(cs, src, srcend)))
112
      src+= l;
113
    else
114
    {
481 by Brian Aker
Remove all of uchar.
115
      *src= (char) map[(unsigned char)*src];
1 by brian
clean slate
116
      src++;
117
    }
118
  }
119
  return srclen;
120
}
121
122
123
/*
124
  my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
125
 */
126
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
127
int my_strcasecmp_mb(const CHARSET_INFO * const  cs,const char *s, const char *t)
1 by brian
clean slate
128
{
205 by Brian Aker
uint32 -> uin32_t
129
  register uint32_t l;
481 by Brian Aker
Remove all of uchar.
130
  register unsigned char *map=cs->to_upper;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
131
1 by brian
clean slate
132
  while (*s && *t)
133
  {
134
    /* Pointing after the '\0' is safe here. */
135
    if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen)))
136
    {
137
      while (l--)
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
138
        if (*s++ != *t++)
1 by brian
clean slate
139
          return 1;
140
    }
141
    else if (my_mbcharlen(cs, *t) > 1)
142
      return 1;
481 by Brian Aker
Remove all of uchar.
143
    else if (map[(unsigned char) *s++] != map[(unsigned char) *t++])
1 by brian
clean slate
144
      return 1;
145
  }
146
  /* At least one of '*s' and '*t' is zero here. */
147
  return (*t != *s);
148
}
149
150
/*
151
** Compare string against string with wildcard
152
**	0 if matched
153
**	-1 if not matched with wildcard
154
**	 1 if matched with wildcard
155
*/
156
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
157
inline static const char* inc_ptr(const charset_info_st *cs, const char *str, const char* str_end)
158
{
159
  return str + (my_ismbchar(cs, str, str_end) ? my_ismbchar(cs, str, str_end) : 1);
160
}
161
162
inline static int likeconv(const charset_info_st *cs, const char c) 
163
{
164
  return (unsigned char) cs->sort_order[(unsigned char) c];
165
}
166
    
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
167
int my_wildcmp_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
168
		  const char *str,const char *str_end,
169
		  const char *wildstr,const char *wildend,
170
		  int escape, int w_one, int w_many)
171
{
172
  int result= -1;				/* Not found, using wildcards */
173
174
  while (wildstr != wildend)
175
  {
176
    while (*wildstr != w_many && *wildstr != w_one)
177
    {
178
      int l;
179
      if (*wildstr == escape && wildstr+1 != wildend)
180
	wildstr++;
181
      if ((l = my_ismbchar(cs, wildstr, wildend)))
182
      {
183
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
184
	      return 1;
185
	  str += l;
186
	  wildstr += l;
187
      }
188
      else
189
      if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
190
	return(1);				/* No match */
191
      if (wildstr == wildend)
192
	return (str != str_end);		/* Match if both are at end */
193
      result=1;					/* Found an anchor char */
194
    }
195
    if (*wildstr == w_one)
196
    {
197
      do
198
      {
199
	if (str == str_end)			/* Skip one char if possible */
200
	  return (result);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
201
	inc_ptr(cs,str,str_end);
1 by brian
clean slate
202
      } while (++wildstr < wildend && *wildstr == w_one);
203
      if (wildstr == wildend)
204
	break;
205
    }
206
    if (*wildstr == w_many)
207
    {						/* Found w_many */
481 by Brian Aker
Remove all of uchar.
208
      unsigned char cmp;
1 by brian
clean slate
209
      const char* mb = wildstr;
210
      int mb_len=0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
211
1 by brian
clean slate
212
      wildstr++;
213
      /* Remove any '%' and '_' from the wild search string */
214
      for (; wildstr != wildend ; wildstr++)
215
      {
216
	if (*wildstr == w_many)
217
	  continue;
218
	if (*wildstr == w_one)
219
	{
220
	  if (str == str_end)
221
	    return (-1);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
222
	  inc_ptr(cs,str,str_end);
1 by brian
clean slate
223
	  continue;
224
	}
225
	break;					/* Not a wild character */
226
      }
227
      if (wildstr == wildend)
228
	return(0);				/* Ok if w_many is last */
229
      if (str == str_end)
230
	return -1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
231
1 by brian
clean slate
232
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
233
	cmp= *++wildstr;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
234
1 by brian
clean slate
235
      mb=wildstr;
236
      mb_len= my_ismbchar(cs, wildstr, wildend);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
237
      inc_ptr(cs,wildstr,wildend);		/* This is compared trough cmp */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
238
      cmp=likeconv(cs,cmp);
1 by brian
clean slate
239
      do
240
      {
241
        for (;;)
242
        {
243
          if (str >= str_end)
244
            return -1;
245
          if (mb_len)
246
          {
247
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
248
            {
249
              str += mb_len;
250
              break;
251
            }
252
          }
253
          else if (!my_ismbchar(cs, str, str_end) &&
254
                   likeconv(cs,*str) == cmp)
255
          {
256
            str++;
257
            break;
258
          }
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
259
          inc_ptr(cs,str, str_end);
1 by brian
clean slate
260
        }
261
	{
262
	  int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,
263
                                w_many);
264
	  if (tmp <= 0)
265
	    return (tmp);
266
	}
267
      } while (str != str_end && wildstr[0] != w_many);
268
      return(-1);
269
    }
270
  }
271
  return (str != str_end ? 1 : 0);
272
}
273
274
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
275
size_t my_numchars_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
276
		      const char *pos, const char *end)
277
{
278
  register size_t count= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
279
  while (pos < end)
1 by brian
clean slate
280
  {
482 by Brian Aker
Remove uint.
281
    uint32_t mb_len;
1 by brian
clean slate
282
    pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1;
283
    count++;
284
  }
285
  return count;
286
}
287
288
779.1.27 by Monty Taylor
Got rid of __attribute__((unused)) and the like from the .cc files.
289
size_t my_charpos_mb(const CHARSET_INFO * const cs,
1 by brian
clean slate
290
		     const char *pos, const char *end, size_t length)
291
{
292
  const char *start= pos;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
293
1 by brian
clean slate
294
  while (length && pos < end)
295
  {
482 by Brian Aker
Remove uint.
296
    uint32_t mb_len;
1 by brian
clean slate
297
    pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1;
298
    length--;
299
  }
300
  return (size_t) (length ? end+2-start : pos-start);
301
}
302
303
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
304
size_t my_well_formed_len_mb(const CHARSET_INFO * const cs, const char *b, const char *e,
1 by brian
clean slate
305
                             size_t pos, int *error)
306
{
307
  const char *b_start= b;
308
  *error= 0;
309
  while (pos)
310
  {
311
    my_wc_t wc;
312
    int mb_len;
313
481 by Brian Aker
Remove all of uchar.
314
    if ((mb_len= cs->cset->mb_wc(cs, &wc, (const unsigned char*) b, (const unsigned char*) e)) <= 0)
1 by brian
clean slate
315
    {
316
      *error= b < e ? 1 : 0;
317
      break;
318
    }
319
    b+= mb_len;
320
    pos--;
321
  }
322
  return (size_t) (b - b_start);
323
}
324
325
482 by Brian Aker
Remove uint.
326
uint32_t my_instr_mb(const CHARSET_INFO * const cs,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
327
                 const char *b, size_t b_length,
1 by brian
clean slate
328
                 const char *s, size_t s_length,
482 by Brian Aker
Remove uint.
329
                 my_match_t *match, uint32_t nmatch)
1 by brian
clean slate
330
{
331
  register const char *end, *b0;
332
  int res= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
333
1 by brian
clean slate
334
  if (s_length <= b_length)
335
  {
336
    if (!s_length)
337
    {
338
      if (nmatch)
339
      {
340
        match->beg= 0;
341
        match->end= 0;
342
        match->mb_len= 0;
343
      }
344
      return 1;		/* Empty string is always found */
345
    }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
346
1 by brian
clean slate
347
    b0= b;
348
    end= b+b_length-s_length+1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
349
1 by brian
clean slate
350
    while (b < end)
351
    {
352
      int mb_len;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
353
354
      if (!cs->coll->strnncoll(cs, (const unsigned char*) b, s_length,
481 by Brian Aker
Remove all of uchar.
355
                                   (const unsigned char*) s, s_length, 0))
1 by brian
clean slate
356
      {
357
        if (nmatch)
358
        {
359
          match[0].beg= 0;
360
          match[0].end= (size_t) (b-b0);
361
          match[0].mb_len= res;
362
          if (nmatch > 1)
363
          {
364
            match[1].beg= match[0].end;
365
            match[1].end= match[0].end+s_length;
366
            match[1].mb_len= 0;	/* Not computed */
367
          }
368
        }
369
        return 2;
370
      }
371
      mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1;
372
      b+= mb_len;
373
      b_length-= mb_len;
374
      res++;
375
    }
376
  }
377
  return 0;
378
}
379
380
381
/* BINARY collations handlers for MB charsets */
382
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
383
int my_strnncoll_mb_bin(const CHARSET_INFO * const,
481 by Brian Aker
Remove all of uchar.
384
                        const unsigned char *s, size_t slen,
385
                        const unsigned char *t, size_t tlen,
276 by Brian Aker
Cleaned out my_bool from strings.
386
                        bool t_is_prefix)
1 by brian
clean slate
387
{
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
388
  size_t len= min(slen,tlen);
1 by brian
clean slate
389
  int cmp= memcmp(s,t,len);
390
  return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
391
}
392
393
394
/*
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
395
  Compare two strings.
396
1 by brian
clean slate
397
  SYNOPSIS
398
    my_strnncollsp_mb_bin()
399
    cs			Chararacter set
400
    s			String to compare
401
    slen		Length of 's'
402
    t			String to compare
403
    tlen		Length of 't'
404
    diff_if_only_endspace_difference
405
		        Set to 1 if the strings should be regarded as different
406
                        if they only difference in end space
407
408
  NOTE
409
   This function is used for character strings with binary collations.
410
   The shorter string is extended with end space to be as long as the longer
411
   one.
412
413
  RETURN
414
    A negative number if s < t
415
    A positive number if s > t
416
    0 if strings are equal
417
*/
418
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
419
int my_strnncollsp_mb_bin(const CHARSET_INFO * const,
481 by Brian Aker
Remove all of uchar.
420
                          const unsigned char *a, size_t a_length,
421
                          const unsigned char *b, size_t b_length,
276 by Brian Aker
Cleaned out my_bool from strings.
422
                          bool diff_if_only_endspace_difference)
1 by brian
clean slate
423
{
481 by Brian Aker
Remove all of uchar.
424
  const unsigned char *end;
1 by brian
clean slate
425
  size_t length;
426
  int res;
427
428
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
429
  diff_if_only_endspace_difference= 0;
430
#endif
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
431
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
432
  end= a + (length= min(a_length, b_length));
1 by brian
clean slate
433
  while (a < end)
434
  {
435
    if (*a++ != *b++)
436
      return ((int) a[-1] - (int) b[-1]);
437
  }
438
  res= 0;
439
  if (a_length != b_length)
440
  {
441
    int swap= 1;
442
    if (diff_if_only_endspace_difference)
443
      res= 1;                                   /* Assume 'a' is bigger */
444
    /*
445
      Check the next not space character of the longer key. If it's < ' ',
446
      then it's smaller than the other key.
447
    */
448
    if (a_length < b_length)
449
    {
450
      /* put shorter key in s */
451
      a_length= b_length;
452
      a= b;
453
      swap= -1;					/* swap sign of result */
454
      res= -res;
455
    }
456
    for (end= a + a_length-length; a < end ; a++)
457
    {
458
      if (*a != ' ')
459
	return (*a < ' ') ? -swap : swap;
460
    }
461
  }
462
  return res;
463
}
464
465
466
/*
467
  Copy one non-ascii character.
468
  "dst" must have enough room for the character.
469
  Note, we don't use sort_order[] in this macros.
470
  This is correct even for case insensitive collations:
471
  - basic Latin letters are processed outside this macros;
472
  - for other characters sort_order[x] is equal to x.
473
*/
474
#define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se)                  \
475
{                                                                        \
476
  switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \
477
  case 4:                                                                \
478
    *dst++= *src++;                                                      \
479
    /* fall through */                                                   \
480
  case 3:                                                                \
481
    *dst++= *src++;                                                      \
482
    /* fall through */                                                   \
483
  case 2:                                                                \
484
    *dst++= *src++;                                                      \
485
    /* fall through */                                                   \
486
  case 0:                                                                \
487
    *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */  \
488
  }                                                                      \
489
}
490
491
492
/*
493
  For character sets with two or three byte multi-byte
494
  characters having multibyte weights *equal* to their codes:
495
  cp932, euckr, gb2312, sjis, eucjpms, ujis.
496
*/
497
size_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
498
my_strnxfrm_mb(const CHARSET_INFO * const cs,
482 by Brian Aker
Remove uint.
499
               unsigned char *dst, size_t dstlen, uint32_t nweights,
500
               const unsigned char *src, size_t srclen, uint32_t flags)
1 by brian
clean slate
501
{
481 by Brian Aker
Remove all of uchar.
502
  unsigned char *d0= dst;
503
  unsigned char *de= dst + dstlen;
504
  const unsigned char *se= src + srclen;
505
  const unsigned char *sort_order= cs->sort_order;
1 by brian
clean slate
506
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
507
  assert(cs->mbmaxlen <= 4);
1 by brian
clean slate
508
509
  /*
510
    If "srclen" is smaller than both "dstlen" and "nweights"
511
    then we can run a simplified loop -
512
    without checking "nweights" and "de".
513
  */
514
  if (dstlen >= srclen && nweights >= srclen)
515
  {
516
    if (sort_order)
517
    {
518
      /* Optimized version for a case insensitive collation */
519
      for (; src < se; nweights--)
520
      {
521
        if (*src < 128) /* quickly catch ASCII characters */
522
          *dst++= sort_order[*src++];
523
        else
524
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
525
      }
526
    }
527
    else
528
    {
529
      /* Optimized version for a case sensitive collation (no sort_order) */
530
      for (; src < se; nweights--)
531
      {
532
        if (*src < 128) /* quickly catch ASCII characters */
533
          *dst++= *src++;
534
        else
535
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
536
      }
537
    }
538
    goto pad;
539
  }
540
541
  /*
542
    A thourough loop, checking all possible limits:
543
    "se", "nweights" and "de".
544
  */
545
  for (; src < se && nweights; nweights--)
546
  {
547
    int chlen;
548
    if (*src < 128 ||
549
        !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se)))
550
    {
551
      /* Single byte character */
552
      if (dst >= de)
553
        break;
554
      *dst++= sort_order ? sort_order[*src++] : *src++;
555
    }
556
    else
557
    {
558
      /* Multi-byte character */
559
      if (dst + chlen > de)
560
        break;
561
      *dst++= *src++;
562
      *dst++= *src++;
563
      if (chlen == 3)
564
        *dst++= *src++;
565
    }
566
  }
567
568
pad:
569
  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
570
}
571
572
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
573
int my_strcasecmp_mb_bin(const CHARSET_INFO * const,
1 by brian
clean slate
574
                         const char *s, const char *t)
575
{
576
  return strcmp(s,t);
577
}
578
579
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
580
void my_hash_sort_mb_bin(const CHARSET_INFO * const,
581
                         const unsigned char *key, size_t len,
582
                         uint32_t *nr1, uint32_t *nr2)
1 by brian
clean slate
583
{
481 by Brian Aker
Remove all of uchar.
584
  const unsigned char *pos = key;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
585
1 by brian
clean slate
586
  /*
587
     Remove trailing spaces. We have to do this to be able to compare
588
    'A ' and 'A' as identical
589
  */
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
590
  key= internal::skip_trailing_space(key, len);
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
591
481 by Brian Aker
Remove all of uchar.
592
  for (; pos < (const unsigned char*) key ; pos++)
1 by brian
clean slate
593
  {
895 by Brian Aker
Completion (?) of uint conversion.
594
    nr1[0]^=(ulong) ((((uint32_t) nr1[0] & 63)+nr2[0]) *
595
	     ((uint32_t)*pos)) + (nr1[0] << 8);
1 by brian
clean slate
596
    nr2[0]+=3;
597
  }
598
}
599
600
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
601
/*
1 by brian
clean slate
602
  Fill the given buffer with 'maximum character' for given charset
603
  SYNOPSIS
604
      pad_max_char()
605
      cs   Character set
606
      str  Start of buffer to fill
607
      end  End of buffer to fill
608
609
  DESCRIPTION
610
      Write max key:
611
      - for non-Unicode character sets:
612
        just set to 255.
613
      - for Unicode character set (utf-8):
614
        create a buffer with multibyte representation of the max_sort_char
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
615
        character, and copy it into max_str in a loop.
1 by brian
clean slate
616
*/
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
617
static void pad_max_char(const CHARSET_INFO * const cs, char *str, char *end)
1 by brian
clean slate
618
{
619
  char buf[10];
620
  char buflen;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
621
1 by brian
clean slate
622
  if (!(cs->state & MY_CS_UNICODE))
623
  {
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
624
    memset(str, 255, end - str);
1 by brian
clean slate
625
    return;
626
  }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
627
481 by Brian Aker
Remove all of uchar.
628
  buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (unsigned char*) buf,
629
                          (unsigned char*) buf + sizeof(buf));
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
630
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
631
  assert(buflen > 0);
1 by brian
clean slate
632
  do
633
  {
634
    if ((str + buflen) < end)
635
    {
636
      /* Enough space for the characer */
637
      memcpy(str, buf, buflen);
638
      str+= buflen;
639
    }
640
    else
641
    {
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
642
      /*
1 by brian
clean slate
643
        There is no space for whole multibyte
644
        character, then add trailing spaces.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
645
      */
1 by brian
clean slate
646
      *str++= ' ';
647
    }
648
  } while (str < end);
649
}
650
651
/*
652
** Calculate min_str and max_str that ranges a LIKE string.
653
** Arguments:
654
** ptr		Pointer to LIKE string.
655
** ptr_length	Length of LIKE string.
656
** escape	Escape character in LIKE.  (Normally '\').
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
657
**		All escape characters should be removed from
658
**              min_str and max_str
659
** w_one        Single char matching char in LIKE (Normally '_')
660
** w_many       Multiple char matching char in LIKE (Normally '%')
1 by brian
clean slate
661
** res_length	Length of min_str and max_str.
662
** min_str	Smallest case sensitive string that ranges LIKE.
663
**		Should be space padded to res_length.
664
** max_str	Largest case sensitive string that ranges LIKE.
665
**		Normally padded with the biggest character sort value.
666
**
667
** The function should return 0 if ok and 1 if the LIKE string can't be
668
** optimized !
669
*/
670
276 by Brian Aker
Cleaned out my_bool from strings.
671
bool my_like_range_mb(const CHARSET_INFO * const cs,
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
672
                         const char *ptr,size_t ptr_length,
673
                         char escape, char w_one, char w_many,
674
                         size_t res_length,
675
                         char *min_str,char *max_str,
676
                         size_t *min_length,size_t *max_length)
1 by brian
clean slate
677
{
482 by Brian Aker
Remove uint.
678
  uint32_t mb_len;
1 by brian
clean slate
679
  const char *end= ptr + ptr_length;
680
  char *min_org= min_str;
681
  char *min_end= min_str + res_length;
682
  char *max_end= max_str + res_length;
683
  size_t maxcharlen= res_length / cs->mbmaxlen;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
684
  const char *contraction_flags= cs->contractions ?
1 by brian
clean slate
685
              ((const char*) cs->contractions) + 0x40*0x40 : NULL;
686
687
  for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
688
  {
689
    /* We assume here that escape, w_any, w_namy are one-byte characters */
690
    if (*ptr == escape && ptr+1 != end)
691
      ptr++;                                    /* Skip escape */
692
    else if (*ptr == w_one || *ptr == w_many)   /* '_' and '%' in SQL */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
693
    {
1 by brian
clean slate
694
fill_max_and_min:
695
      /*
696
        Calculate length of keys:
697
        'a\0\0... is the smallest possible string when we have space expand
698
        a\ff\ff... is the biggest possible string
699
      */
700
      *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
701
                    res_length);
702
      *max_length= res_length;
703
      /* Create min key  */
704
      do
705
      {
706
	*min_str++= (char) cs->min_sort_char;
707
      } while (min_str != min_end);
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
708
709
      /*
1 by brian
clean slate
710
        Write max key: create a buffer with multibyte
711
        representation of the max_sort_char character,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
712
        and copy it into max_str in a loop.
1 by brian
clean slate
713
      */
714
      *max_length= res_length;
715
      pad_max_char(cs, max_str, max_end);
716
      return 0;
717
    }
718
    if ((mb_len= my_ismbchar(cs, ptr, end)) > 1)
719
    {
720
      if (ptr+mb_len > end || min_str+mb_len > min_end)
721
        break;
722
      while (mb_len--)
723
       *min_str++= *max_str++= *ptr++;
724
    }
725
    else
726
    {
727
      /*
728
        Special case for collations with contractions.
729
        For example, in Chezh, 'ch' is a separate letter
730
        which is sorted between 'h' and 'i'.
731
        If the pattern 'abc%', 'c' at the end can mean:
732
        - letter 'c' itself,
733
        - beginning of the contraction 'ch'.
734
735
        If we simply return this LIKE range:
736
737
         'abc\min\min\min' and 'abc\max\max\max'
738
739
        then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
740
        will only find values starting from 'abc[^h]',
741
        but won't find values starting from 'abch'.
742
743
        We must ignore contraction heads followed by w_one or w_many.
744
        ('Contraction head' means any letter which can be the first
745
        letter in a contraction)
746
747
        For example, for Czech 'abc%', we will return LIKE range,
748
        which is equal to LIKE range for 'ab%':
749
750
        'ab\min\min\min\min' and 'ab\max\max\max\max'.
751
752
      */
753
      if (contraction_flags && ptr + 1 < end &&
481 by Brian Aker
Remove all of uchar.
754
          contraction_flags[(unsigned char) *ptr])
1 by brian
clean slate
755
      {
756
        /* Ptr[0] is a contraction head. */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
757
1 by brian
clean slate
758
        if (ptr[1] == w_one || ptr[1] == w_many)
759
        {
760
          /* Contraction head followed by a wildcard, quit. */
761
          goto fill_max_and_min;
762
        }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
763
1 by brian
clean slate
764
        /*
765
          Some letters can be both contraction heads and contraction tails.
766
          For example, in Danish 'aa' is a separate single letter which
767
          is sorted after 'z'. So 'a' can be both head and tail.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
768
1 by brian
clean slate
769
          If ptr[0]+ptr[1] is a contraction,
770
          then put both letters together.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
771
1 by brian
clean slate
772
          If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
773
          is not a contraction, then we put only ptr[0],
774
          and continue with ptr[1] on the next loop.
775
        */
481 by Brian Aker
Remove all of uchar.
776
        if (contraction_flags[(unsigned char) ptr[1]] &&
1 by brian
clean slate
777
            cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
778
        {
779
          /* Contraction found */
780
          if (maxcharlen == 1 || min_str + 1 >= min_end)
781
          {
782
            /* Both contraction parts don't fit, quit */
783
            goto fill_max_and_min;
784
          }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
785
1 by brian
clean slate
786
          /* Put contraction head */
787
          *min_str++= *max_str++= *ptr++;
788
          maxcharlen--;
789
        }
790
      }
791
      /* Put contraction tail, or a single character */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
792
      *min_str++= *max_str++= *ptr++;
1 by brian
clean slate
793
    }
794
  }
795
796
  *min_length= *max_length = (size_t) (min_str - min_org);
797
  while (min_str != min_end)
798
    *min_str++= *max_str++= ' ';           /* Because if key compression */
799
  return 0;
800
}
801
802
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
803
int my_wildcmp_mb_bin(const CHARSET_INFO * const cs,
1 by brian
clean slate
804
                      const char *str,const char *str_end,
805
                      const char *wildstr,const char *wildend,
806
                      int escape, int w_one, int w_many)
807
{
808
  int result= -1;				/* Not found, using wildcards */
809
810
  while (wildstr != wildend)
811
  {
812
    while (*wildstr != w_many && *wildstr != w_one)
813
    {
814
      int l;
815
      if (*wildstr == escape && wildstr+1 != wildend)
816
	wildstr++;
817
      if ((l = my_ismbchar(cs, wildstr, wildend)))
818
      {
819
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
820
	      return 1;
821
	  str += l;
822
	  wildstr += l;
823
      }
824
      else
825
      if (str == str_end || *wildstr++ != *str++)
826
	return(1);				/* No match */
827
      if (wildstr == wildend)
828
	return (str != str_end);		/* Match if both are at end */
829
      result=1;					/* Found an anchor char */
830
    }
831
    if (*wildstr == w_one)
832
    {
833
      do
834
      {
835
	if (str == str_end)			/* Skip one char if possible */
836
	  return (result);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
837
	inc_ptr(cs,str,str_end);
1 by brian
clean slate
838
      } while (++wildstr < wildend && *wildstr == w_one);
839
      if (wildstr == wildend)
840
	break;
841
    }
842
    if (*wildstr == w_many)
843
    {						/* Found w_many */
481 by Brian Aker
Remove all of uchar.
844
      unsigned char cmp;
1 by brian
clean slate
845
      const char* mb = wildstr;
846
      int mb_len=0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
847
1 by brian
clean slate
848
      wildstr++;
849
      /* Remove any '%' and '_' from the wild search string */
850
      for (; wildstr != wildend ; wildstr++)
851
      {
852
	if (*wildstr == w_many)
853
	  continue;
854
	if (*wildstr == w_one)
855
	{
856
	  if (str == str_end)
857
	    return (-1);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
858
	  inc_ptr(cs,str,str_end);
1 by brian
clean slate
859
	  continue;
860
	}
861
	break;					/* Not a wild character */
862
      }
863
      if (wildstr == wildend)
864
	return(0);				/* Ok if w_many is last */
865
      if (str == str_end)
866
	return -1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
867
1 by brian
clean slate
868
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
869
	cmp= *++wildstr;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
870
1 by brian
clean slate
871
      mb=wildstr;
872
      mb_len= my_ismbchar(cs, wildstr, wildend);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
873
      inc_ptr(cs,wildstr,wildend);		/* This is compared trough cmp */
1 by brian
clean slate
874
      do
875
      {
876
        for (;;)
877
        {
878
          if (str >= str_end)
879
            return -1;
880
          if (mb_len)
881
          {
882
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
883
            {
884
              str += mb_len;
885
              break;
886
            }
887
          }
888
          else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
889
          {
890
            str++;
891
            break;
892
          }
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
893
          inc_ptr(cs,str, str_end);
1 by brian
clean slate
894
        }
895
	{
896
	  int tmp=my_wildcmp_mb_bin(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
897
	  if (tmp <= 0)
898
	    return (tmp);
899
	}
900
      } while (str != str_end && wildstr[0] != w_many);
901
      return(-1);
902
    }
903
  }
904
  return (str != str_end ? 1 : 0);
905
}
906
907
908
/*
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
909
  Data was produced from EastAsianWidth.txt
1 by brian
clean slate
910
  using utt11-dump utility.
911
*/
912
static char pg11[256]=
913
{
914
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
915
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
916
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,
917
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
918
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
919
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
920
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
921
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
922
};
923
924
static char pg23[256]=
925
{
926
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
927
0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
928
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
929
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
930
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
931
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
932
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
933
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
934
};
935
936
static char pg2E[256]=
937
{
938
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
939
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
940
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
941
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
942
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
943
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
944
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
945
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
946
};
947
948
static char pg2F[256]=
949
{
950
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
951
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
952
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
953
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
954
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
955
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
956
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
957
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0
958
};
959
960
static char pg30[256]=
961
{
962
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
963
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
964
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
965
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
966
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,
967
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
968
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
969
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
970
};
971
972
static char pg31[256]=
973
{
974
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
975
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
976
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
977
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
978
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
979
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
980
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
981
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
982
};
983
984
static char pg32[256]=
985
{
986
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
987
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
988
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
989
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,
990
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
991
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
992
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
993
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
994
};
995
996
static char pg4D[256]=
997
{
998
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
999
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1000
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1001
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1002
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1003
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
1004
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1005
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1006
};
1007
1008
static char pg9F[256]=
1009
{
1010
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1011
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1012
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1013
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1014
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1015
1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1016
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1017
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1018
};
1019
1020
static char pgA4[256]=
1021
{
1022
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1023
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1024
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1025
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1026
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1027
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1028
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1029
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1030
};
1031
1032
static char pgD7[256]=
1033
{
1034
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1035
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1036
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1037
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1038
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1039
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1040
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1041
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1042
};
1043
1044
static char pgFA[256]=
1045
{
1046
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1047
1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1048
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1049
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1050
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1051
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1052
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1053
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1054
};
1055
1056
static char pgFE[256]=
1057
{
1058
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1059
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1060
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,
1061
1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1062
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1063
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1064
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1065
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1066
};
1067
1068
static char pgFF[256]=
1069
{
1070
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1071
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1072
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1073
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1074
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1075
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1076
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1077
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1078
};
1079
1810.2.1 by tdavies
Modified File: ../drizzled/ctype-mb.cc at line 1075 converted the static C struct which instatiats the utr11_data array to a C++ class
1080
static class {
1081
public:
1082
  int page; 
1083
  char *p;
1084
} 
1085
  utr11_data[256]=
1 by brian
clean slate
1086
{
1087
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1088
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1089
{0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1090
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1091
{0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1092
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F},
1093
{0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1094
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1095
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1096
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL},
1097
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1098
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1099
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1100
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1101
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1102
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1103
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1104
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1105
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1106
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F},
1107
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL},
1108
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1109
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1110
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1111
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1112
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1113
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7},
1114
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1115
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1116
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1117
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1118
{0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF}
1119
};
1120
1121
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1122
size_t my_numcells_mb(const CHARSET_INFO * const cs, const char *b, const char *e)
1 by brian
clean slate
1123
{
1124
  my_wc_t wc;
1125
  size_t clen= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
1126
1 by brian
clean slate
1127
  while (b < e)
1128
  {
1129
    int mb_len;
482 by Brian Aker
Remove uint.
1130
    uint32_t pg;
481 by Brian Aker
Remove all of uchar.
1131
    if ((mb_len= cs->cset->mb_wc(cs, &wc, (unsigned char*) b, (unsigned char*) e)) <= 0 ||
1 by brian
clean slate
1132
        wc > 0xFFFF)
1133
    {
1134
      /*
1135
        Let's think a wrong sequence takes 1 dysplay cell.
1136
        Also, consider supplementary characters as taking one cell.
1137
      */
1138
      mb_len= 1;
1139
      b++;
1140
      continue;
1141
    }
1142
    b+= mb_len;
1143
    pg= (wc >> 8) & 0xFF;
1144
    clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page;
1145
    clen++;
1146
  }
1147
  return clen;
1148
}
1149
1150
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1151
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1152
int my_mb_ctype_mb(const CHARSET_INFO * const cs, int *ctype,
481 by Brian Aker
Remove all of uchar.
1153
                   const unsigned char *s, const unsigned char *e)
1 by brian
clean slate
1154
{
1155
  my_wc_t wc;
1156
  int res= cs->cset->mb_wc(cs, &wc, s, e);
1157
  if (res <= 0 || wc > 0xFFFF)
1158
    *ctype= 0;
1159
  else
1160
    *ctype= my_uni_ctype[wc>>8].ctype ?
1161
            my_uni_ctype[wc>>8].ctype[wc&0xFF] :
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
1162
            my_uni_ctype[wc>>8].pctype;
1 by brian
clean slate
1163
  return res;
1164
}
1165
1166
1167
MY_COLLATION_HANDLER my_collation_mb_bin_handler =
1168
{
1169
    NULL,              /* init */
1170
    my_strnncoll_mb_bin,
1171
    my_strnncollsp_mb_bin,
1172
    my_strnxfrm_mb,
1173
    my_strnxfrmlen_simple,
1174
    my_like_range_mb,
1175
    my_wildcmp_mb_bin,
1176
    my_strcasecmp_mb_bin,
1177
    my_instr_mb,
1178
    my_hash_sort_mb_bin,
1179
    my_propagate_simple
1180
};
1181
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1182
} /* namespace drizzled */