~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
1802.10.2 by Monty Taylor
Update all of the copyright headers to include the correct address.
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
1 by brian
clean slate
15
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
16
#include <config.h>
1130.3.26 by Monty Taylor
Removed global.h from headers.
17
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
18
#include <drizzled/internal/m_string.h>
2281.5.1 by Muhammad Umair
Merged charset declarations of global_charset_info.h and charset_info.h into charset.h header file.
19
#include <drizzled/charset.h>
1 by brian
clean slate
20
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
21
#include <algorithm>
22
23
using namespace std;
24
2318.2.13 by Olaf van der Spek
Refactor
25
namespace drizzled {
1 by brian
clean slate
26
27
/*
28
  my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
29
 */
30
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
31
int my_strcasecmp_mb(const charset_info_st * const  cs,const char *s, const char *t)
1 by brian
clean slate
32
{
2170.5.1 by Olaf van der Spek
Remove register keyword
33
  uint32_t l;
34
  unsigned char *map=cs->to_upper;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
35
1 by brian
clean slate
36
  while (*s && *t)
37
  {
38
    /* Pointing after the '\0' is safe here. */
39
    if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen)))
40
    {
41
      while (l--)
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
42
        if (*s++ != *t++)
1 by brian
clean slate
43
          return 1;
44
    }
45
    else if (my_mbcharlen(cs, *t) > 1)
46
      return 1;
481 by Brian Aker
Remove all of uchar.
47
    else if (map[(unsigned char) *s++] != map[(unsigned char) *t++])
1 by brian
clean slate
48
      return 1;
49
  }
50
  /* At least one of '*s' and '*t' is zero here. */
51
  return (*t != *s);
52
}
53
54
/*
55
** Compare string against string with wildcard
56
**	0 if matched
57
**	-1 if not matched with wildcard
58
**	 1 if matched with wildcard
59
*/
60
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
61
inline static const char* inc_ptr(const charset_info_st *cs, const char *str, const char* str_end)
62
{
63
  return str + (my_ismbchar(cs, str, str_end) ? my_ismbchar(cs, str, str_end) : 1);
64
}
65
66
inline static int likeconv(const charset_info_st *cs, const char c) 
67
{
68
  return (unsigned char) cs->sort_order[(unsigned char) c];
69
}
70
    
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
71
int my_wildcmp_mb(const charset_info_st * const cs,
1 by brian
clean slate
72
		  const char *str,const char *str_end,
73
		  const char *wildstr,const char *wildend,
74
		  int escape, int w_one, int w_many)
75
{
76
  int result= -1;				/* Not found, using wildcards */
77
78
  while (wildstr != wildend)
79
  {
80
    while (*wildstr != w_many && *wildstr != w_one)
81
    {
82
      int l;
83
      if (*wildstr == escape && wildstr+1 != wildend)
84
	wildstr++;
85
      if ((l = my_ismbchar(cs, wildstr, wildend)))
86
      {
87
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
88
	      return 1;
89
	  str += l;
90
	  wildstr += l;
91
      }
92
      else
93
      if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
2318.6.77 by Olaf van der Spek
Refactor
94
	return 1;				/* No match */
1 by brian
clean slate
95
      if (wildstr == wildend)
96
	return (str != str_end);		/* Match if both are at end */
97
      result=1;					/* Found an anchor char */
98
    }
99
    if (*wildstr == w_one)
100
    {
101
      do
102
      {
103
	if (str == str_end)			/* Skip one char if possible */
104
	  return (result);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
105
	inc_ptr(cs,str,str_end);
1 by brian
clean slate
106
      } while (++wildstr < wildend && *wildstr == w_one);
107
      if (wildstr == wildend)
108
	break;
109
    }
110
    if (*wildstr == w_many)
111
    {						/* Found w_many */
481 by Brian Aker
Remove all of uchar.
112
      unsigned char cmp;
1 by brian
clean slate
113
      const char* mb = wildstr;
114
      int mb_len=0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
115
1 by brian
clean slate
116
      wildstr++;
117
      /* Remove any '%' and '_' from the wild search string */
118
      for (; wildstr != wildend ; wildstr++)
119
      {
120
	if (*wildstr == w_many)
121
	  continue;
122
	if (*wildstr == w_one)
123
	{
124
	  if (str == str_end)
125
	    return (-1);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
126
	  inc_ptr(cs,str,str_end);
1 by brian
clean slate
127
	  continue;
128
	}
129
	break;					/* Not a wild character */
130
      }
131
      if (wildstr == wildend)
2318.6.58 by Olaf van der Spek
Refactor
132
	return 0;				/* Ok if w_many is last */
1 by brian
clean slate
133
      if (str == str_end)
134
	return -1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
135
1 by brian
clean slate
136
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
137
	cmp= *++wildstr;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
138
1 by brian
clean slate
139
      mb=wildstr;
140
      mb_len= my_ismbchar(cs, wildstr, wildend);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
141
      inc_ptr(cs,wildstr,wildend);		/* This is compared trough cmp */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
142
      cmp=likeconv(cs,cmp);
1 by brian
clean slate
143
      do
144
      {
145
        for (;;)
146
        {
147
          if (str >= str_end)
148
            return -1;
149
          if (mb_len)
150
          {
151
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
152
            {
153
              str += mb_len;
154
              break;
155
            }
156
          }
157
          else if (!my_ismbchar(cs, str, str_end) &&
158
                   likeconv(cs,*str) == cmp)
159
          {
160
            str++;
161
            break;
162
          }
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
163
          inc_ptr(cs,str, str_end);
1 by brian
clean slate
164
        }
165
	{
166
	  int tmp=my_wildcmp_mb(cs,str,str_end,wildstr,wildend,escape,w_one,
167
                                w_many);
168
	  if (tmp <= 0)
169
	    return (tmp);
170
	}
171
      } while (str != str_end && wildstr[0] != w_many);
172
      return(-1);
173
    }
174
  }
175
  return (str != str_end ? 1 : 0);
176
}
177
178
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
179
size_t my_numchars_mb(const charset_info_st * const cs,
1 by brian
clean slate
180
		      const char *pos, const char *end)
181
{
2170.5.1 by Olaf van der Spek
Remove register keyword
182
  size_t count= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
183
  while (pos < end)
1 by brian
clean slate
184
  {
482 by Brian Aker
Remove uint.
185
    uint32_t mb_len;
1 by brian
clean slate
186
    pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1;
187
    count++;
188
  }
189
  return count;
190
}
191
192
2440.2.14 by Olaf van der Spek
Refactor
193
size_t my_charpos_mb(const charset_info_st * const cs, const char *pos, const char *end, size_t length)
1 by brian
clean slate
194
{
195
  const char *start= pos;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
196
1 by brian
clean slate
197
  while (length && pos < end)
198
  {
482 by Brian Aker
Remove uint.
199
    uint32_t mb_len;
1 by brian
clean slate
200
    pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1;
201
    length--;
202
  }
203
  return (size_t) (length ? end+2-start : pos-start);
204
}
205
206
2440.2.14 by Olaf van der Spek
Refactor
207
size_t my_well_formed_len_mb(const charset_info_st& cs, str_ref str, size_t pos, int *error)
1 by brian
clean slate
208
{
2440.2.14 by Olaf van der Spek
Refactor
209
  const char *b= str.begin();
210
  const char *e= str.end();
1 by brian
clean slate
211
  const char *b_start= b;
212
  *error= 0;
213
  while (pos)
214
  {
215
    my_wc_t wc;
2440.2.14 by Olaf van der Spek
Refactor
216
    int mb_len= cs.cset->mb_wc(&cs, &wc, (const unsigned char*) b, (const unsigned char*) e);
217
    if (mb_len <= 0)
1 by brian
clean slate
218
    {
219
      *error= b < e ? 1 : 0;
220
      break;
221
    }
222
    b+= mb_len;
223
    pos--;
224
  }
225
  return (size_t) (b - b_start);
226
}
227
228
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
229
uint32_t my_instr_mb(const charset_info_st * const cs,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
230
                 const char *b, size_t b_length,
1 by brian
clean slate
231
                 const char *s, size_t s_length,
482 by Brian Aker
Remove uint.
232
                 my_match_t *match, uint32_t nmatch)
1 by brian
clean slate
233
{
2170.5.1 by Olaf van der Spek
Remove register keyword
234
  const char *end, *b0;
1 by brian
clean slate
235
  int res= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
236
1 by brian
clean slate
237
  if (s_length <= b_length)
238
  {
239
    if (!s_length)
240
    {
241
      if (nmatch)
242
      {
243
        match->beg= 0;
244
        match->end= 0;
245
        match->mb_len= 0;
246
      }
247
      return 1;		/* Empty string is always found */
248
    }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
249
1 by brian
clean slate
250
    b0= b;
251
    end= b+b_length-s_length+1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
252
1 by brian
clean slate
253
    while (b < end)
254
    {
255
      int mb_len;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
256
257
      if (!cs->coll->strnncoll(cs, (const unsigned char*) b, s_length,
481 by Brian Aker
Remove all of uchar.
258
                                   (const unsigned char*) s, s_length, 0))
1 by brian
clean slate
259
      {
260
        if (nmatch)
261
        {
262
          match[0].beg= 0;
263
          match[0].end= (size_t) (b-b0);
264
          match[0].mb_len= res;
265
          if (nmatch > 1)
266
          {
267
            match[1].beg= match[0].end;
268
            match[1].end= match[0].end+s_length;
269
            match[1].mb_len= 0;	/* Not computed */
270
          }
271
        }
272
        return 2;
273
      }
274
      mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1;
275
      b+= mb_len;
276
      b_length-= mb_len;
277
      res++;
278
    }
279
  }
280
  return 0;
281
}
282
283
284
/* BINARY collations handlers for MB charsets */
285
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
286
int my_strnncoll_mb_bin(const charset_info_st * const,
481 by Brian Aker
Remove all of uchar.
287
                        const unsigned char *s, size_t slen,
288
                        const unsigned char *t, size_t tlen,
276 by Brian Aker
Cleaned out my_bool from strings.
289
                        bool t_is_prefix)
1 by brian
clean slate
290
{
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
291
  size_t len= min(slen,tlen);
1 by brian
clean slate
292
  int cmp= memcmp(s,t,len);
293
  return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
294
}
295
296
297
/*
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
298
  Compare two strings.
299
1 by brian
clean slate
300
  SYNOPSIS
301
    my_strnncollsp_mb_bin()
302
    cs			Chararacter set
303
    s			String to compare
304
    slen		Length of 's'
305
    t			String to compare
306
    tlen		Length of 't'
307
    diff_if_only_endspace_difference
308
		        Set to 1 if the strings should be regarded as different
309
                        if they only difference in end space
310
311
  NOTE
312
   This function is used for character strings with binary collations.
313
   The shorter string is extended with end space to be as long as the longer
314
   one.
315
316
  RETURN
317
    A negative number if s < t
318
    A positive number if s > t
319
    0 if strings are equal
320
*/
321
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
322
int my_strnncollsp_mb_bin(const charset_info_st * const,
481 by Brian Aker
Remove all of uchar.
323
                          const unsigned char *a, size_t a_length,
324
                          const unsigned char *b, size_t b_length,
276 by Brian Aker
Cleaned out my_bool from strings.
325
                          bool diff_if_only_endspace_difference)
1 by brian
clean slate
326
{
481 by Brian Aker
Remove all of uchar.
327
  const unsigned char *end;
1 by brian
clean slate
328
  size_t length;
329
  int res;
330
331
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
332
  diff_if_only_endspace_difference= 0;
333
#endif
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
334
1067.4.9 by Nathan Williams
Converted all usages of cmin/cmax in mystrings directory to use std::min/max
335
  end= a + (length= min(a_length, b_length));
1 by brian
clean slate
336
  while (a < end)
337
  {
338
    if (*a++ != *b++)
339
      return ((int) a[-1] - (int) b[-1]);
340
  }
341
  res= 0;
342
  if (a_length != b_length)
343
  {
344
    int swap= 1;
345
    if (diff_if_only_endspace_difference)
346
      res= 1;                                   /* Assume 'a' is bigger */
347
    /*
348
      Check the next not space character of the longer key. If it's < ' ',
349
      then it's smaller than the other key.
350
    */
351
    if (a_length < b_length)
352
    {
353
      /* put shorter key in s */
354
      a_length= b_length;
355
      a= b;
356
      swap= -1;					/* swap sign of result */
357
      res= -res;
358
    }
359
    for (end= a + a_length-length; a < end ; a++)
360
    {
361
      if (*a != ' ')
362
	return (*a < ' ') ? -swap : swap;
363
    }
364
  }
365
  return res;
366
}
367
368
369
/*
370
  Copy one non-ascii character.
371
  "dst" must have enough room for the character.
372
  Note, we don't use sort_order[] in this macros.
373
  This is correct even for case insensitive collations:
374
  - basic Latin letters are processed outside this macros;
375
  - for other characters sort_order[x] is equal to x.
376
*/
377
#define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se)                  \
378
{                                                                        \
379
  switch (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) { \
380
  case 4:                                                                \
381
    *dst++= *src++;                                                      \
382
    /* fall through */                                                   \
383
  case 3:                                                                \
384
    *dst++= *src++;                                                      \
385
    /* fall through */                                                   \
386
  case 2:                                                                \
387
    *dst++= *src++;                                                      \
388
    /* fall through */                                                   \
389
  case 0:                                                                \
390
    *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */  \
391
  }                                                                      \
392
}
393
394
395
/*
396
  For character sets with two or three byte multi-byte
397
  characters having multibyte weights *equal* to their codes:
398
  cp932, euckr, gb2312, sjis, eucjpms, ujis.
399
*/
400
size_t
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
401
my_strnxfrm_mb(const charset_info_st * const cs,
482 by Brian Aker
Remove uint.
402
               unsigned char *dst, size_t dstlen, uint32_t nweights,
403
               const unsigned char *src, size_t srclen, uint32_t flags)
1 by brian
clean slate
404
{
481 by Brian Aker
Remove all of uchar.
405
  unsigned char *d0= dst;
406
  unsigned char *de= dst + dstlen;
407
  const unsigned char *se= src + srclen;
408
  const unsigned char *sort_order= cs->sort_order;
1 by brian
clean slate
409
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
410
  assert(cs->mbmaxlen <= 4);
1 by brian
clean slate
411
412
  /*
413
    If "srclen" is smaller than both "dstlen" and "nweights"
414
    then we can run a simplified loop -
415
    without checking "nweights" and "de".
416
  */
417
  if (dstlen >= srclen && nweights >= srclen)
418
  {
419
    if (sort_order)
420
    {
421
      /* Optimized version for a case insensitive collation */
422
      for (; src < se; nweights--)
423
      {
424
        if (*src < 128) /* quickly catch ASCII characters */
425
          *dst++= sort_order[*src++];
426
        else
427
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
428
      }
429
    }
430
    else
431
    {
432
      /* Optimized version for a case sensitive collation (no sort_order) */
433
      for (; src < se; nweights--)
434
      {
435
        if (*src < 128) /* quickly catch ASCII characters */
436
          *dst++= *src++;
437
        else
438
          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
439
      }
440
    }
441
    goto pad;
442
  }
443
444
  /*
445
    A thourough loop, checking all possible limits:
446
    "se", "nweights" and "de".
447
  */
448
  for (; src < se && nweights; nweights--)
449
  {
450
    int chlen;
451
    if (*src < 128 ||
452
        !(chlen= cs->cset->ismbchar(cs, (const char*) src, (const char*) se)))
453
    {
454
      /* Single byte character */
455
      if (dst >= de)
456
        break;
457
      *dst++= sort_order ? sort_order[*src++] : *src++;
458
    }
459
    else
460
    {
461
      /* Multi-byte character */
462
      if (dst + chlen > de)
463
        break;
464
      *dst++= *src++;
465
      *dst++= *src++;
466
      if (chlen == 3)
467
        *dst++= *src++;
468
    }
469
  }
470
471
pad:
472
  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
473
}
474
475
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
476
int my_strcasecmp_mb_bin(const charset_info_st * const,
1 by brian
clean slate
477
                         const char *s, const char *t)
478
{
479
  return strcmp(s,t);
480
}
481
482
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
483
void my_hash_sort_mb_bin(const charset_info_st * const,
632.1.10 by Monty Taylor
Got rid of Sun Studio warnings.
484
                         const unsigned char *key, size_t len,
485
                         uint32_t *nr1, uint32_t *nr2)
1 by brian
clean slate
486
{
481 by Brian Aker
Remove all of uchar.
487
  const unsigned char *pos = key;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
488
1 by brian
clean slate
489
  /*
490
     Remove trailing spaces. We have to do this to be able to compare
491
    'A ' and 'A' as identical
492
  */
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
493
  key= internal::skip_trailing_space(key, len);
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
494
481 by Brian Aker
Remove all of uchar.
495
  for (; pos < (const unsigned char*) key ; pos++)
1 by brian
clean slate
496
  {
895 by Brian Aker
Completion (?) of uint conversion.
497
    nr1[0]^=(ulong) ((((uint32_t) nr1[0] & 63)+nr2[0]) *
498
	     ((uint32_t)*pos)) + (nr1[0] << 8);
1 by brian
clean slate
499
    nr2[0]+=3;
500
  }
501
}
502
503
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
504
/*
1 by brian
clean slate
505
  Fill the given buffer with 'maximum character' for given charset
506
  SYNOPSIS
507
      pad_max_char()
508
      cs   Character set
509
      str  Start of buffer to fill
510
      end  End of buffer to fill
511
512
  DESCRIPTION
513
      Write max key:
514
      - for non-Unicode character sets:
515
        just set to 255.
516
      - for Unicode character set (utf-8):
517
        create a buffer with multibyte representation of the max_sort_char
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
518
        character, and copy it into max_str in a loop.
1 by brian
clean slate
519
*/
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
520
static void pad_max_char(const charset_info_st * const cs, char *str, char *end)
1 by brian
clean slate
521
{
522
  char buf[10];
523
  char buflen;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
524
1 by brian
clean slate
525
  if (!(cs->state & MY_CS_UNICODE))
526
  {
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
527
    memset(str, 255, end - str);
1 by brian
clean slate
528
    return;
529
  }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
530
481 by Brian Aker
Remove all of uchar.
531
  buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (unsigned char*) buf,
532
                          (unsigned char*) buf + sizeof(buf));
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
533
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
534
  assert(buflen > 0);
1 by brian
clean slate
535
  do
536
  {
537
    if ((str + buflen) < end)
538
    {
539
      /* Enough space for the characer */
540
      memcpy(str, buf, buflen);
541
      str+= buflen;
542
    }
543
    else
544
    {
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
545
      /*
1 by brian
clean slate
546
        There is no space for whole multibyte
547
        character, then add trailing spaces.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
548
      */
1 by brian
clean slate
549
      *str++= ' ';
550
    }
551
  } while (str < end);
552
}
553
554
/*
555
** Calculate min_str and max_str that ranges a LIKE string.
556
** Arguments:
557
** ptr		Pointer to LIKE string.
558
** ptr_length	Length of LIKE string.
559
** escape	Escape character in LIKE.  (Normally '\').
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
560
**		All escape characters should be removed from
561
**              min_str and max_str
562
** w_one        Single char matching char in LIKE (Normally '_')
563
** w_many       Multiple char matching char in LIKE (Normally '%')
1 by brian
clean slate
564
** res_length	Length of min_str and max_str.
565
** min_str	Smallest case sensitive string that ranges LIKE.
566
**		Should be space padded to res_length.
567
** max_str	Largest case sensitive string that ranges LIKE.
568
**		Normally padded with the biggest character sort value.
569
**
570
** The function should return 0 if ok and 1 if the LIKE string can't be
571
** optimized !
572
*/
573
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
574
bool my_like_range_mb(const charset_info_st * const cs,
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
575
                         const char *ptr,size_t ptr_length,
576
                         char escape, char w_one, char w_many,
577
                         size_t res_length,
578
                         char *min_str,char *max_str,
579
                         size_t *min_length,size_t *max_length)
1 by brian
clean slate
580
{
482 by Brian Aker
Remove uint.
581
  uint32_t mb_len;
1 by brian
clean slate
582
  const char *end= ptr + ptr_length;
583
  char *min_org= min_str;
584
  char *min_end= min_str + res_length;
585
  char *max_end= max_str + res_length;
586
  size_t maxcharlen= res_length / cs->mbmaxlen;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
587
  const char *contraction_flags= cs->contractions ?
1 by brian
clean slate
588
              ((const char*) cs->contractions) + 0x40*0x40 : NULL;
589
590
  for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
591
  {
592
    /* We assume here that escape, w_any, w_namy are one-byte characters */
593
    if (*ptr == escape && ptr+1 != end)
594
      ptr++;                                    /* Skip escape */
595
    else if (*ptr == w_one || *ptr == w_many)   /* '_' and '%' in SQL */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
596
    {
1 by brian
clean slate
597
fill_max_and_min:
598
      /*
599
        Calculate length of keys:
600
        'a\0\0... is the smallest possible string when we have space expand
601
        a\ff\ff... is the biggest possible string
602
      */
603
      *min_length= ((cs->state & MY_CS_BINSORT) ? (size_t) (min_str - min_org) :
604
                    res_length);
605
      *max_length= res_length;
606
      /* Create min key  */
607
      do
608
      {
609
	*min_str++= (char) cs->min_sort_char;
610
      } while (min_str != min_end);
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
611
612
      /*
1 by brian
clean slate
613
        Write max key: create a buffer with multibyte
614
        representation of the max_sort_char character,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
615
        and copy it into max_str in a loop.
1 by brian
clean slate
616
      */
617
      *max_length= res_length;
618
      pad_max_char(cs, max_str, max_end);
619
      return 0;
620
    }
621
    if ((mb_len= my_ismbchar(cs, ptr, end)) > 1)
622
    {
623
      if (ptr+mb_len > end || min_str+mb_len > min_end)
624
        break;
625
      while (mb_len--)
626
       *min_str++= *max_str++= *ptr++;
627
    }
628
    else
629
    {
630
      /*
631
        Special case for collations with contractions.
632
        For example, in Chezh, 'ch' is a separate letter
633
        which is sorted between 'h' and 'i'.
634
        If the pattern 'abc%', 'c' at the end can mean:
635
        - letter 'c' itself,
636
        - beginning of the contraction 'ch'.
637
638
        If we simply return this LIKE range:
639
640
         'abc\min\min\min' and 'abc\max\max\max'
641
642
        then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
643
        will only find values starting from 'abc[^h]',
644
        but won't find values starting from 'abch'.
645
646
        We must ignore contraction heads followed by w_one or w_many.
647
        ('Contraction head' means any letter which can be the first
648
        letter in a contraction)
649
650
        For example, for Czech 'abc%', we will return LIKE range,
651
        which is equal to LIKE range for 'ab%':
652
653
        'ab\min\min\min\min' and 'ab\max\max\max\max'.
654
655
      */
656
      if (contraction_flags && ptr + 1 < end &&
481 by Brian Aker
Remove all of uchar.
657
          contraction_flags[(unsigned char) *ptr])
1 by brian
clean slate
658
      {
659
        /* Ptr[0] is a contraction head. */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
660
1 by brian
clean slate
661
        if (ptr[1] == w_one || ptr[1] == w_many)
662
        {
663
          /* Contraction head followed by a wildcard, quit. */
664
          goto fill_max_and_min;
665
        }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
666
1 by brian
clean slate
667
        /*
668
          Some letters can be both contraction heads and contraction tails.
669
          For example, in Danish 'aa' is a separate single letter which
670
          is sorted after 'z'. So 'a' can be both head and tail.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
671
1 by brian
clean slate
672
          If ptr[0]+ptr[1] is a contraction,
673
          then put both letters together.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
674
1 by brian
clean slate
675
          If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
676
          is not a contraction, then we put only ptr[0],
677
          and continue with ptr[1] on the next loop.
678
        */
481 by Brian Aker
Remove all of uchar.
679
        if (contraction_flags[(unsigned char) ptr[1]] &&
1 by brian
clean slate
680
            cs->contractions[(*ptr-0x40)*0x40 + ptr[1] - 0x40])
681
        {
682
          /* Contraction found */
683
          if (maxcharlen == 1 || min_str + 1 >= min_end)
684
          {
685
            /* Both contraction parts don't fit, quit */
686
            goto fill_max_and_min;
687
          }
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
688
1 by brian
clean slate
689
          /* Put contraction head */
690
          *min_str++= *max_str++= *ptr++;
691
          maxcharlen--;
692
        }
693
      }
694
      /* Put contraction tail, or a single character */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
695
      *min_str++= *max_str++= *ptr++;
1 by brian
clean slate
696
    }
697
  }
698
699
  *min_length= *max_length = (size_t) (min_str - min_org);
700
  while (min_str != min_end)
701
    *min_str++= *max_str++= ' ';           /* Because if key compression */
702
  return 0;
703
}
704
705
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
706
int my_wildcmp_mb_bin(const charset_info_st * const cs,
1 by brian
clean slate
707
                      const char *str,const char *str_end,
708
                      const char *wildstr,const char *wildend,
709
                      int escape, int w_one, int w_many)
710
{
711
  int result= -1;				/* Not found, using wildcards */
712
713
  while (wildstr != wildend)
714
  {
715
    while (*wildstr != w_many && *wildstr != w_one)
716
    {
717
      int l;
718
      if (*wildstr == escape && wildstr+1 != wildend)
719
	wildstr++;
720
      if ((l = my_ismbchar(cs, wildstr, wildend)))
721
      {
722
	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
723
	      return 1;
724
	  str += l;
725
	  wildstr += l;
726
      }
727
      else
728
      if (str == str_end || *wildstr++ != *str++)
2318.6.77 by Olaf van der Spek
Refactor
729
	return 1;				/* No match */
1 by brian
clean slate
730
      if (wildstr == wildend)
731
	return (str != str_end);		/* Match if both are at end */
732
      result=1;					/* Found an anchor char */
733
    }
734
    if (*wildstr == w_one)
735
    {
736
      do
737
      {
738
	if (str == str_end)			/* Skip one char if possible */
739
	  return (result);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
740
	inc_ptr(cs,str,str_end);
1 by brian
clean slate
741
      } while (++wildstr < wildend && *wildstr == w_one);
742
      if (wildstr == wildend)
743
	break;
744
    }
745
    if (*wildstr == w_many)
746
    {						/* Found w_many */
481 by Brian Aker
Remove all of uchar.
747
      unsigned char cmp;
1 by brian
clean slate
748
      const char* mb = wildstr;
749
      int mb_len=0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
750
1 by brian
clean slate
751
      wildstr++;
752
      /* Remove any '%' and '_' from the wild search string */
753
      for (; wildstr != wildend ; wildstr++)
754
      {
755
	if (*wildstr == w_many)
756
	  continue;
757
	if (*wildstr == w_one)
758
	{
759
	  if (str == str_end)
760
	    return (-1);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
761
	  inc_ptr(cs,str,str_end);
1 by brian
clean slate
762
	  continue;
763
	}
764
	break;					/* Not a wild character */
765
      }
766
      if (wildstr == wildend)
2318.6.58 by Olaf van der Spek
Refactor
767
	return 0;				/* Ok if w_many is last */
1 by brian
clean slate
768
      if (str == str_end)
769
	return -1;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
770
1 by brian
clean slate
771
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
772
	cmp= *++wildstr;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
773
1 by brian
clean slate
774
      mb=wildstr;
775
      mb_len= my_ismbchar(cs, wildstr, wildend);
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
776
      inc_ptr(cs,wildstr,wildend);		/* This is compared trough cmp */
1 by brian
clean slate
777
      do
778
      {
779
        for (;;)
780
        {
781
          if (str >= str_end)
782
            return -1;
783
          if (mb_len)
784
          {
785
            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
786
            {
787
              str += mb_len;
788
              break;
789
            }
790
          }
791
          else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
792
          {
793
            str++;
794
            break;
795
          }
1892.6.1 by Gustaf Thorslund
Replaced some more macro functions.
796
          inc_ptr(cs,str, str_end);
1 by brian
clean slate
797
        }
798
	{
799
	  int tmp=my_wildcmp_mb_bin(cs,str,str_end,wildstr,wildend,escape,w_one,w_many);
800
	  if (tmp <= 0)
801
	    return (tmp);
802
	}
803
      } while (str != str_end && wildstr[0] != w_many);
804
      return(-1);
805
    }
806
  }
807
  return (str != str_end ? 1 : 0);
808
}
809
810
811
/*
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
812
  Data was produced from EastAsianWidth.txt
1 by brian
clean slate
813
  using utt11-dump utility.
814
*/
815
static char pg11[256]=
816
{
817
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
818
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
819
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,
820
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
821
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
822
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
823
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
824
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
825
};
826
827
static char pg23[256]=
828
{
829
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
830
0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
831
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
832
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
833
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
834
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
835
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
836
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
837
};
838
839
static char pg2E[256]=
840
{
841
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
842
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
843
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
844
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
845
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
846
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
847
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
848
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
849
};
850
851
static char pg2F[256]=
852
{
853
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
854
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
855
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
856
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
857
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
858
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
859
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
860
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0
861
};
862
863
static char pg30[256]=
864
{
865
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
866
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
867
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
868
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
869
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,
870
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
871
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
872
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
873
};
874
875
static char pg31[256]=
876
{
877
0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
878
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
879
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
880
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
881
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
882
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
883
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
884
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
885
};
886
887
static char pg32[256]=
888
{
889
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
890
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
891
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
892
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,
893
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
894
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
895
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
896
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
897
};
898
899
static char pg4D[256]=
900
{
901
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
902
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
903
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
904
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
905
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
906
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
907
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
908
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
909
};
910
911
static char pg9F[256]=
912
{
913
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
914
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
915
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
916
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
917
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
918
1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
919
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
920
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
921
};
922
923
static char pgA4[256]=
924
{
925
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
926
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
927
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
928
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
929
1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
930
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
931
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
932
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
933
};
934
935
static char pgD7[256]=
936
{
937
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
938
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
939
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
940
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
941
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
942
1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
943
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
944
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
945
};
946
947
static char pgFA[256]=
948
{
949
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
950
1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
951
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
952
1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
953
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
954
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
955
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
956
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
957
};
958
959
static char pgFE[256]=
960
{
961
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
962
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
963
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,
964
1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
965
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
966
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
967
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
968
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
969
};
970
971
static char pgFF[256]=
972
{
973
0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
974
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
975
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
976
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
977
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
978
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
979
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
980
1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
981
};
982
1810.2.1 by tdavies
Modified File: ../drizzled/ctype-mb.cc at line 1075 converted the static C struct which instatiats the utr11_data array to a C++ class
983
static class {
984
public:
985
  int page; 
986
  char *p;
987
} 
988
  utr11_data[256]=
1 by brian
clean slate
989
{
990
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
991
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
992
{0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
993
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
994
{0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
995
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F},
996
{0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
997
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
998
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
999
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL},
1000
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1001
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1002
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1003
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1004
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1005
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1006
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1007
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1008
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1009
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F},
1010
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL},
1011
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1012
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1013
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1014
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1015
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
1016
{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7},
1017
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1018
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1019
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1020
{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
1021
{0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF}
1022
};
1023
1024
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
1025
size_t my_numcells_mb(const charset_info_st * const cs, const char *b, const char *e)
1 by brian
clean slate
1026
{
1027
  my_wc_t wc;
1028
  size_t clen= 0;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
1029
1 by brian
clean slate
1030
  while (b < e)
1031
  {
1032
    int mb_len;
482 by Brian Aker
Remove uint.
1033
    uint32_t pg;
481 by Brian Aker
Remove all of uchar.
1034
    if ((mb_len= cs->cset->mb_wc(cs, &wc, (unsigned char*) b, (unsigned char*) e)) <= 0 ||
1 by brian
clean slate
1035
        wc > 0xFFFF)
1036
    {
1037
      /*
1038
        Let's think a wrong sequence takes 1 dysplay cell.
1039
        Also, consider supplementary characters as taking one cell.
1040
      */
1041
      mb_len= 1;
1042
      b++;
1043
      continue;
1044
    }
1045
    b+= mb_len;
1046
    pg= (wc >> 8) & 0xFF;
1047
    clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page;
1048
    clen++;
1049
  }
1050
  return clen;
1051
}
1052
1053
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1054
2254 by Brian Aker
Shift CHARSET_INFO to charset_info_st
1055
int my_mb_ctype_mb(const charset_info_st * const cs, int *ctype,
481 by Brian Aker
Remove all of uchar.
1056
                   const unsigned char *s, const unsigned char *e)
1 by brian
clean slate
1057
{
1058
  my_wc_t wc;
1059
  int res= cs->cset->mb_wc(cs, &wc, s, e);
1060
  if (res <= 0 || wc > 0xFFFF)
1061
    *ctype= 0;
1062
  else
2440.2.14 by Olaf van der Spek
Refactor
1063
    *ctype= my_uni_ctype[wc>>8].ctype ? my_uni_ctype[wc>>8].ctype[wc&0xFF] : my_uni_ctype[wc>>8].pctype;
1 by brian
clean slate
1064
  return res;
1065
}
1066
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
1067
} /* namespace drizzled */