~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2002 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
#include "m_string.h"
17
#include "m_ctype.h"
18
#include <errno.h>
19
20
#include "stdarg.h"
21
22
/*
23
  Returns the number of bytes required for strnxfrm().
24
*/
25
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
26
size_t my_strnxfrmlen_simple(const CHARSET_INFO * const cs, size_t len)
1 by brian
clean slate
27
{
28
  return len * (cs->strxfrm_multiply ? cs->strxfrm_multiply : 1);
29
}
30
31
32
/*
33
  Converts a string into its sort key.
34
  
35
  SYNOPSIS
36
     my_strnxfrm_xxx()
37
     
38
  IMPLEMENTATION
39
     
40
     The my_strxfrm_xxx() function transforms a string pointed to by
41
     'src' with length 'srclen' according to the charset+collation 
42
     pair 'cs' and copies the result key into 'dest'.
43
     
44
     Comparing two strings using memcmp() after my_strnxfrm_xxx()
45
     is equal to comparing two original strings with my_strnncollsp_xxx().
46
     
47
     Not more than 'dstlen' bytes are written into 'dst'.
48
     To garantee that the whole string is transformed, 'dstlen' must be
49
     at least srclen*cs->strnxfrm_multiply bytes long. Otherwise,
50
     consequent memcmp() may return a non-accurate result.
51
     
52
     If the source string is too short to fill whole 'dstlen' bytes,
53
     then the 'dest' string is padded up to 'dstlen', ensuring that:
54
     
55
       "a"  == "a "
56
       "a\0" < "a"
57
       "a\0" < "a "
58
     
59
     my_strnxfrm_simple() is implemented for 8bit charsets and
60
     simple collations with one-to-one string->key transformation.
61
     
62
     See also implementations for various charsets/collations in  
63
     other ctype-xxx.c files.
64
     
65
  RETURN
66
  
67
    Target len 'dstlen'.
68
  
69
*/
70
71
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
72
size_t my_strnxfrm_simple(const CHARSET_INFO * const  cs,
1 by brian
clean slate
73
                       uchar *dst, size_t dstlen, uint nweights,
74
                       const uchar *src, size_t srclen, uint flags)
75
{
76
  uchar *map= cs->sort_order;
77
  uchar *d0= dst;
78
  uint frmlen;
79
  if ((frmlen= min(dstlen, nweights)) > srclen)
80
    frmlen= srclen;
81
  if (dst != src)
82
  {
83
    const uchar *end;
84
    for (end= src + frmlen; src < end;)
85
      *dst++= map[*src++];
86
  }
87
  else
88
  {
89
    const uchar *end;
90
    for (end= dst + frmlen; dst < end; dst++)
91
      *dst= map[(uchar) *dst];
92
  }
93
  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, d0 + dstlen,
94
                                         nweights - frmlen, flags, 0);
95
}
96
97
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
98
int my_strnncoll_simple(const CHARSET_INFO * const  cs, const uchar *s, size_t slen, 
1 by brian
clean slate
99
                        const uchar *t, size_t tlen,
276 by Brian Aker
Cleaned out my_bool from strings.
100
                        bool t_is_prefix)
1 by brian
clean slate
101
{
102
  size_t len = ( slen > tlen ) ? tlen : slen;
103
  uchar *map= cs->sort_order;
104
  if (t_is_prefix && slen > tlen)
105
    slen=tlen;
106
  while (len--)
107
  {
108
    if (map[*s++] != map[*t++])
109
      return ((int) map[s[-1]] - (int) map[t[-1]]);
110
  }
111
  /*
112
    We can't use (slen - tlen) here as the result may be outside of the
113
    precision of a signed int
114
  */
115
  return slen > tlen ? 1 : slen < tlen ? -1 : 0 ;
116
}
117
118
119
/*
120
  Compare strings, discarding end space
121
122
  SYNOPSIS
123
    my_strnncollsp_simple()
124
    cs			character set handler
125
    a			First string to compare
126
    a_length		Length of 'a'
127
    b			Second string to compare
128
    b_length		Length of 'b'
129
    diff_if_only_endspace_difference
130
		        Set to 1 if the strings should be regarded as different
131
                        if they only difference in end space
132
133
  IMPLEMENTATION
134
    If one string is shorter as the other, then we space extend the other
135
    so that the strings have equal length.
136
137
    This will ensure that the following things hold:
138
139
    "a"  == "a "
140
    "a\0" < "a"
141
    "a\0" < "a "
142
143
  RETURN
144
    < 0	 a <  b
145
    = 0	 a == b
146
    > 0	 a > b
147
*/
148
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
149
int my_strnncollsp_simple(const CHARSET_INFO * const  cs, const uchar *a, size_t a_length, 
1 by brian
clean slate
150
			  const uchar *b, size_t b_length,
276 by Brian Aker
Cleaned out my_bool from strings.
151
                          bool diff_if_only_endspace_difference)
1 by brian
clean slate
152
{
153
  const uchar *map= cs->sort_order, *end;
154
  size_t length;
155
  int res;
156
157
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
158
  diff_if_only_endspace_difference= 0;
159
#endif
160
161
  end= a + (length= min(a_length, b_length));
162
  while (a < end)
163
  {
164
    if (map[*a++] != map[*b++])
165
      return ((int) map[a[-1]] - (int) map[b[-1]]);
166
  }
167
  res= 0;
168
  if (a_length != b_length)
169
  {
170
    int swap= 1;
171
    if (diff_if_only_endspace_difference)
172
      res= 1;                                   /* Assume 'a' is bigger */
173
    /*
174
      Check the next not space character of the longer key. If it's < ' ',
175
      then it's smaller than the other key.
176
    */
177
    if (a_length < b_length)
178
    {
179
      /* put shorter key in s */
180
      a_length= b_length;
181
      a= b;
182
      swap= -1;                                 /* swap sign of result */
183
      res= -res;
184
    }
185
    for (end= a + a_length-length; a < end ; a++)
186
    {
187
      if (map[*a] != ' ')
188
	return (map[*a] < ' ') ? -swap : swap;
189
    }
190
  }
191
  return res;
192
}
193
194
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
195
size_t my_caseup_str_8bit(const CHARSET_INFO * const  cs,char *str)
1 by brian
clean slate
196
{
197
  register uchar *map= cs->to_upper;
198
  char *str_orig= str;
199
  while ((*str= (char) map[(uchar) *str]) != 0)
200
    str++;
201
  return (size_t) (str - str_orig);
202
}
203
204
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
205
size_t my_casedn_str_8bit(const CHARSET_INFO * const  cs,char *str)
1 by brian
clean slate
206
{
207
  register uchar *map= cs->to_lower;
208
  char *str_orig= str;
209
  while ((*str= (char) map[(uchar) *str]) != 0)
210
    str++;
211
  return (size_t) (str - str_orig);
212
}
213
214
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
215
size_t my_caseup_8bit(const CHARSET_INFO * const  cs, char *src, size_t srclen,
1 by brian
clean slate
216
                      char *dst __attribute__((unused)),
217
                      size_t dstlen __attribute__((unused)))
218
{
219
  char *end= src + srclen;
220
  register uchar *map= cs->to_upper;
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
221
  assert(src == dst && srclen == dstlen);
1 by brian
clean slate
222
  for ( ; src != end ; src++)
223
    *src= (char) map[(uchar) *src];
224
  return srclen;
225
}
226
227
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
228
size_t my_casedn_8bit(const CHARSET_INFO * const  cs, char *src, size_t srclen,
1 by brian
clean slate
229
                      char *dst __attribute__((unused)),
230
                      size_t dstlen __attribute__((unused)))
231
{
232
  char *end= src + srclen;
233
  register uchar *map=cs->to_lower;
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
234
  assert(src == dst && srclen == dstlen);
1 by brian
clean slate
235
  for ( ; src != end ; src++)
236
    *src= (char) map[(uchar) *src];
237
  return srclen;
238
}
239
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
240
int my_strcasecmp_8bit(const CHARSET_INFO * const  cs,const char *s, const char *t)
1 by brian
clean slate
241
{
242
  register uchar *map=cs->to_upper;
243
  while (map[(uchar) *s] == map[(uchar) *t++])
244
    if (!*s++) return 0;
245
  return ((int) map[(uchar) s[0]] - (int) map[(uchar) t[-1]]);
246
}
247
248
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
249
int my_mb_wc_8bit(const CHARSET_INFO * const cs,my_wc_t *wc,
1 by brian
clean slate
250
		  const uchar *str,
251
		  const uchar *end __attribute__((unused)))
252
{
253
  if (str >= end)
254
    return MY_CS_TOOSMALL;
255
  
256
  *wc=cs->tab_to_uni[*str];
257
  return (!wc[0] && str[0]) ? -1 : 1;
258
}
259
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
260
int my_wc_mb_8bit(const CHARSET_INFO * const cs,my_wc_t wc,
1 by brian
clean slate
261
		  uchar *str,
262
		  uchar *end)
263
{
264
  MY_UNI_IDX *idx;
265
266
  if (str >= end)
267
    return MY_CS_TOOSMALL;
268
  
269
  for (idx=cs->tab_from_uni; idx->tab ; idx++)
270
  {
271
    if (idx->from <= wc && idx->to >= wc)
272
    {
273
      str[0]= idx->tab[wc - idx->from];
274
      return (!str[0] && wc) ? MY_CS_ILUNI : 1;
275
    }
276
  }
277
  return MY_CS_ILUNI;
278
}
279
280
281
/* 
282
   We can't use vsprintf here as it's not guaranteed to return
283
   the length on all operating systems.
284
   This function is also not called in a safe environment, so the
285
   end buffer must be checked.
286
*/
287
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
288
size_t my_snprintf_8bit(const CHARSET_INFO * const cs  __attribute__((unused)),
1 by brian
clean slate
289
                        char* to, size_t n  __attribute__((unused)),
290
		     const char* fmt, ...)
291
{
292
  va_list args;
293
  int result;
294
  va_start(args,fmt);
77.1.18 by Monty Taylor
Removed my_vsnprintf and my_snprintf.
295
  result= vsnprintf(to, n, fmt, args);
1 by brian
clean slate
296
  va_end(args);
297
  return result;
298
}
299
300
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
301
void my_hash_sort_simple(const CHARSET_INFO * const cs,
1 by brian
clean slate
302
			 const uchar *key, size_t len,
290 by Brian Aker
Update for ulong change over.
303
			 uint32_t *nr1, uint32_t *nr2)
1 by brian
clean slate
304
{
305
  register uchar *sort_order=cs->sort_order;
306
  const uchar *end;
307
  
308
  /*
309
    Remove end space. We have to do this to be able to compare
310
    'A ' and 'A' as identical
311
  */
312
  end= skip_trailing_space(key, len);
313
  
266.7.9 by Andy Lester
removed unnecessary cast
314
  for (; key < end ; key++)
1 by brian
clean slate
315
  {
316
    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
317
	     ((uint) sort_order[(uint) *key])) + (nr1[0] << 8);
318
    nr2[0]+=3;
319
  }
320
}
321
322
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
323
long my_strntol_8bit(const CHARSET_INFO * const cs,
1 by brian
clean slate
324
		     const char *nptr, size_t l, int base,
325
		     char **endptr, int *err)
326
{
327
  int negative;
163 by Brian Aker
Merge Monty's code.
328
  register uint32_t cutoff;
1 by brian
clean slate
329
  register uint cutlim;
163 by Brian Aker
Merge Monty's code.
330
  register uint32_t i;
1 by brian
clean slate
331
  register const char *s;
332
  register uchar c;
333
  const char *save, *e;
334
  int overflow;
335
336
  *err= 0;				/* Initialize error indicator */
337
#ifdef NOT_USED
338
  if (base < 0 || base == 1 || base > 36)
339
    base = 10;
340
#endif
341
342
  s = nptr;
343
  e = nptr+l;
344
  
345
  for ( ; s<e && my_isspace(cs, *s) ; s++);
346
  
347
  if (s == e)
348
  {
349
    goto noconv;
350
  }
351
  
352
  /* Check for a sign.	*/
353
  if (*s == '-')
354
  {
355
    negative = 1;
356
    ++s;
357
  }
358
  else if (*s == '+')
359
  {
360
    negative = 0;
361
    ++s;
362
  }
363
  else
364
    negative = 0;
365
366
#ifdef NOT_USED
367
  if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
368
    s += 2;
369
#endif
370
371
#ifdef NOT_USED
372
  if (base == 0)
373
  {
374
    if (*s == '0')
375
    {
376
      if (s[1]=='X' || s[1]=='x')
377
      {
378
	s += 2;
379
	base = 16;
380
      }
381
      else
382
	base = 8;
383
    }
384
    else
385
      base = 10;
386
  }
387
#endif
388
389
  save = s;
163 by Brian Aker
Merge Monty's code.
390
  cutoff = ((uint32_t)~0L) / (uint32_t) base;
391
  cutlim = (uint) (((uint32_t)~0L) % (uint32_t) base);
1 by brian
clean slate
392
393
  overflow = 0;
394
  i = 0;
395
  for (c = *s; s != e; c = *++s)
396
  {
397
    if (c>='0' && c<='9')
398
      c -= '0';
399
    else if (c>='A' && c<='Z')
400
      c = c - 'A' + 10;
401
    else if (c>='a' && c<='z')
402
      c = c - 'a' + 10;
403
    else
404
      break;
405
    if (c >= base)
406
      break;
407
    if (i > cutoff || (i == cutoff && c > cutlim))
408
      overflow = 1;
409
    else
410
    {
163 by Brian Aker
Merge Monty's code.
411
      i *= (uint32_t) base;
1 by brian
clean slate
412
      i += c;
413
    }
414
  }
415
  
416
  if (s == save)
417
    goto noconv;
418
  
419
  if (endptr != NULL)
420
    *endptr = (char *) s;
421
  
422
  if (negative)
423
  {
163 by Brian Aker
Merge Monty's code.
424
    if (i  > (uint32_t) INT32_MIN)
1 by brian
clean slate
425
      overflow = 1;
426
  }
163 by Brian Aker
Merge Monty's code.
427
  else if (i > INT32_MAX)
1 by brian
clean slate
428
    overflow = 1;
429
  
430
  if (overflow)
431
  {
432
    err[0]= ERANGE;
163 by Brian Aker
Merge Monty's code.
433
    return negative ? INT32_MIN : INT32_MAX;
1 by brian
clean slate
434
  }
435
  
436
  return (negative ? -((long) i) : (long) i);
437
438
noconv:
439
  err[0]= EDOM;
440
  if (endptr != NULL)
441
    *endptr = (char *) nptr;
442
  return 0L;
443
}
444
445
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
446
ulong my_strntoul_8bit(const CHARSET_INFO * const cs,
1 by brian
clean slate
447
		       const char *nptr, size_t l, int base,
448
		       char **endptr, int *err)
449
{
450
  int negative;
163 by Brian Aker
Merge Monty's code.
451
  register uint32_t cutoff;
1 by brian
clean slate
452
  register uint cutlim;
163 by Brian Aker
Merge Monty's code.
453
  register uint32_t i;
1 by brian
clean slate
454
  register const char *s;
455
  register uchar c;
456
  const char *save, *e;
457
  int overflow;
458
459
  *err= 0;				/* Initialize error indicator */
460
#ifdef NOT_USED
461
  if (base < 0 || base == 1 || base > 36)
462
    base = 10;
463
#endif
464
465
  s = nptr;
466
  e = nptr+l;
467
  
468
  for( ; s<e && my_isspace(cs, *s); s++);
469
  
470
  if (s==e)
471
  {
472
    goto noconv;
473
  }
474
475
  if (*s == '-')
476
  {
477
    negative = 1;
478
    ++s;
479
  }
480
  else if (*s == '+')
481
  {
482
    negative = 0;
483
    ++s;
484
  }
485
  else
486
    negative = 0;
487
488
#ifdef NOT_USED
489
  if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
490
    s += 2;
491
#endif
492
493
#ifdef NOT_USED
494
  if (base == 0)
495
  {
496
    if (*s == '0')
497
    {
498
      if (s[1]=='X' || s[1]=='x')
499
      {
500
	s += 2;
501
	base = 16;
502
      }
503
      else
504
	base = 8;
505
    }
506
    else
507
      base = 10;
508
  }
509
#endif
510
511
  save = s;
163 by Brian Aker
Merge Monty's code.
512
  cutoff = ((uint32_t)~0L) / (uint32_t) base;
513
  cutlim = (uint) (((uint32_t)~0L) % (uint32_t) base);
1 by brian
clean slate
514
  overflow = 0;
515
  i = 0;
516
  
517
  for (c = *s; s != e; c = *++s)
518
  {
519
    if (c>='0' && c<='9')
520
      c -= '0';
521
    else if (c>='A' && c<='Z')
522
      c = c - 'A' + 10;
523
    else if (c>='a' && c<='z')
524
      c = c - 'a' + 10;
525
    else
526
      break;
527
    if (c >= base)
528
      break;
529
    if (i > cutoff || (i == cutoff && c > cutlim))
530
      overflow = 1;
531
    else
532
    {
163 by Brian Aker
Merge Monty's code.
533
      i *= (uint32_t) base;
1 by brian
clean slate
534
      i += c;
535
    }
536
  }
537
538
  if (s == save)
539
    goto noconv;
540
541
  if (endptr != NULL)
542
    *endptr = (char *) s;
543
544
  if (overflow)
545
  {
546
    err[0]= ERANGE;
163 by Brian Aker
Merge Monty's code.
547
    return (~(uint32_t) 0);
1 by brian
clean slate
548
  }
549
  
550
  return (negative ? -((long) i) : (long) i);
551
  
552
noconv:
553
  err[0]= EDOM;
554
  if (endptr != NULL)
555
    *endptr = (char *) nptr;
556
  return 0L;
557
}
558
559
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
560
int64_t my_strntoll_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
561
			  const char *nptr, size_t l, int base,
562
			  char **endptr,int *err)
563
{
564
  int negative;
151 by Brian Aker
Ulonglong to uint64_t
565
  register uint64_t cutoff;
1 by brian
clean slate
566
  register uint cutlim;
151 by Brian Aker
Ulonglong to uint64_t
567
  register uint64_t i;
1 by brian
clean slate
568
  register const char *s, *e;
569
  const char *save;
570
  int overflow;
571
572
  *err= 0;				/* Initialize error indicator */
573
#ifdef NOT_USED
574
  if (base < 0 || base == 1 || base > 36)
575
    base = 10;
576
#endif
577
578
  s = nptr;
579
  e = nptr+l;
580
581
  for(; s<e && my_isspace(cs,*s); s++);
582
583
  if (s == e)
584
  {
585
    goto noconv;
586
  }
587
588
  if (*s == '-')
589
  {
590
    negative = 1;
591
    ++s;
592
  }
593
  else if (*s == '+')
594
  {
595
    negative = 0;
596
    ++s;
597
  }
598
  else
599
    negative = 0;
600
601
#ifdef NOT_USED
602
  if (base == 16 && s[0] == '0' && (s[1]=='X'|| s[1]=='x'))
603
    s += 2;
604
#endif
605
606
#ifdef NOT_USED
607
  if (base == 0)
608
  {
609
    if (*s == '0')
610
    {
611
      if (s[1]=='X' || s[1]=='x')
612
      {
613
	s += 2;
614
	base = 16;
615
      }
616
      else
617
	base = 8;
618
    }
619
    else
620
      base = 10;
621
  }
622
#endif
623
624
  save = s;
625
151 by Brian Aker
Ulonglong to uint64_t
626
  cutoff = (~(uint64_t) 0) / (unsigned long int) base;
627
  cutlim = (uint) ((~(uint64_t) 0) % (unsigned long int) base);
1 by brian
clean slate
628
629
  overflow = 0;
630
  i = 0;
631
  for ( ; s != e; s++)
632
  {
633
    register uchar c= *s;
634
    if (c>='0' && c<='9')
635
      c -= '0';
636
    else if (c>='A' && c<='Z')
637
      c = c - 'A' + 10;
638
    else if (c>='a' && c<='z')
639
      c = c - 'a' + 10;
640
    else
641
      break;
642
    if (c >= base)
643
      break;
644
    if (i > cutoff || (i == cutoff && c > cutlim))
645
      overflow = 1;
646
    else
647
    {
151 by Brian Aker
Ulonglong to uint64_t
648
      i *= (uint64_t) base;
1 by brian
clean slate
649
      i += c;
650
    }
651
  }
652
653
  if (s == save)
654
    goto noconv;
655
656
  if (endptr != NULL)
657
    *endptr = (char *) s;
658
659
  if (negative)
660
  {
163 by Brian Aker
Merge Monty's code.
661
    if (i  > (uint64_t) INT64_MIN)
1 by brian
clean slate
662
      overflow = 1;
663
  }
163 by Brian Aker
Merge Monty's code.
664
  else if (i > (uint64_t) INT64_MAX)
1 by brian
clean slate
665
    overflow = 1;
666
667
  if (overflow)
668
  {
669
    err[0]= ERANGE;
163 by Brian Aker
Merge Monty's code.
670
    return negative ? INT64_MIN : INT64_MAX;
1 by brian
clean slate
671
  }
672
152 by Brian Aker
longlong replacement
673
  return (negative ? -((int64_t) i) : (int64_t) i);
1 by brian
clean slate
674
675
noconv:
676
  err[0]= EDOM;
677
  if (endptr != NULL)
678
    *endptr = (char *) nptr;
679
  return 0L;
680
}
681
682
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
683
uint64_t my_strntoull_8bit(const CHARSET_INFO * const cs,
1 by brian
clean slate
684
			   const char *nptr, size_t l, int base,
685
			   char **endptr, int *err)
686
{
687
  int negative;
151 by Brian Aker
Ulonglong to uint64_t
688
  register uint64_t cutoff;
1 by brian
clean slate
689
  register uint cutlim;
151 by Brian Aker
Ulonglong to uint64_t
690
  register uint64_t i;
1 by brian
clean slate
691
  register const char *s, *e;
692
  const char *save;
693
  int overflow;
694
695
  *err= 0;				/* Initialize error indicator */
696
#ifdef NOT_USED
697
  if (base < 0 || base == 1 || base > 36)
698
    base = 10;
699
#endif
700
701
  s = nptr;
702
  e = nptr+l;
703
704
  for(; s<e && my_isspace(cs,*s); s++);
705
706
  if (s == e)
707
  {
708
    goto noconv;
709
  }
710
711
  if (*s == '-')
712
  {
713
    negative = 1;
714
    ++s;
715
  }
716
  else if (*s == '+')
717
  {
718
    negative = 0;
719
    ++s;
720
  }
721
  else
722
    negative = 0;
723
724
#ifdef NOT_USED
725
  if (base == 16 && s[0] == '0' && (s[1]=='X' || s[1]=='x'))
726
    s += 2;
727
#endif
728
729
#ifdef NOT_USED
730
  if (base == 0)
731
  {
732
    if (*s == '0')
733
    {
734
      if (s[1]=='X' || s[1]=='x')
735
      {
736
	s += 2;
737
	base = 16;
738
      }
739
      else
740
	base = 8;
741
    }
742
    else
743
      base = 10;
744
  }
745
#endif
746
747
  save = s;
748
151 by Brian Aker
Ulonglong to uint64_t
749
  cutoff = (~(uint64_t) 0) / (unsigned long int) base;
750
  cutlim = (uint) ((~(uint64_t) 0) % (unsigned long int) base);
1 by brian
clean slate
751
752
  overflow = 0;
753
  i = 0;
754
  for ( ; s != e; s++)
755
  {
756
    register uchar c= *s;
757
758
    if (c>='0' && c<='9')
759
      c -= '0';
760
    else if (c>='A' && c<='Z')
761
      c = c - 'A' + 10;
762
    else if (c>='a' && c<='z')
763
      c = c - 'a' + 10;
764
    else
765
      break;
766
    if (c >= base)
767
      break;
768
    if (i > cutoff || (i == cutoff && c > cutlim))
769
      overflow = 1;
770
    else
771
    {
151 by Brian Aker
Ulonglong to uint64_t
772
      i *= (uint64_t) base;
1 by brian
clean slate
773
      i += c;
774
    }
775
  }
776
777
  if (s == save)
778
    goto noconv;
779
780
  if (endptr != NULL)
781
    *endptr = (char *) s;
782
783
  if (overflow)
784
  {
785
    err[0]= ERANGE;
151 by Brian Aker
Ulonglong to uint64_t
786
    return (~(uint64_t) 0);
1 by brian
clean slate
787
  }
788
152 by Brian Aker
longlong replacement
789
  return (negative ? -((int64_t) i) : (int64_t) i);
1 by brian
clean slate
790
791
noconv:
792
  err[0]= EDOM;
793
  if (endptr != NULL)
794
    *endptr = (char *) nptr;
795
  return 0L;
796
}
797
798
799
/*
800
  Read double from string
801
802
  SYNOPSIS:
803
    my_strntod_8bit()
804
    cs		Character set information
805
    str		String to convert to double
806
    length	Optional length for string.
807
    end		result pointer to end of converted string
808
    err		Error number if failed conversion
809
    
810
  NOTES:
163 by Brian Aker
Merge Monty's code.
811
    If length is not INT32_MAX or str[length] != 0 then the given str must
1 by brian
clean slate
812
    be writeable
163 by Brian Aker
Merge Monty's code.
813
    If length == INT32_MAX the str must be \0 terminated.
1 by brian
clean slate
814
815
    It's implemented this way to save a buffer allocation and a memory copy.
816
817
  RETURN
818
    Value of number in string
819
*/
820
821
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
822
double my_strntod_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
823
		       char *str, size_t length,
824
		       char **end, int *err)
825
{
163 by Brian Aker
Merge Monty's code.
826
  if (length == INT32_MAX)
1 by brian
clean slate
827
    length= 65535;                          /* Should be big enough */
828
  *end= str + length;
829
  return my_strtod(str, end, err);
830
}
831
832
833
/*
834
  This is a fast version optimized for the case of radix 10 / -10
835
836
  Assume len >= 1
837
*/
838
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
839
size_t my_long10_to_str_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
840
                             char *dst, size_t len, int radix, long int val)
841
{
842
  char buffer[66];
843
  register char *p, *e;
844
  long int new_val;
845
  uint sign=0;
846
  unsigned long int uval = (unsigned long int) val;
847
848
  e = p = &buffer[sizeof(buffer)-1];
849
  *p= 0;
850
  
851
  if (radix < 0)
852
  {
853
    if (val < 0)
854
    {
163 by Brian Aker
Merge Monty's code.
855
      /* Avoid integer overflow in (-val) for INT64_MIN (BUG#31799). */
1 by brian
clean slate
856
      uval= (unsigned long int)0 - uval;
857
      *dst++= '-';
858
      len--;
859
      sign= 1;
860
    }
861
  }
862
  
863
  new_val = (long) (uval / 10);
864
  *--p    = '0'+ (char) (uval - (unsigned long) new_val * 10);
865
  val     = new_val;
866
  
867
  while (val != 0)
868
  {
869
    new_val=val/10;
870
    *--p = '0' + (char) (val-new_val*10);
871
    val= new_val;
872
  }
873
  
874
  len= min(len, (size_t) (e-p));
875
  memcpy(dst, p, len);
876
  return len+sign;
877
}
878
879
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
880
size_t my_int64_t10_to_str_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
881
                                 char *dst, size_t len, int radix,
152 by Brian Aker
longlong replacement
882
                                 int64_t val)
1 by brian
clean slate
883
{
884
  char buffer[65];
885
  register char *p, *e;
886
  long long_val;
887
  uint sign= 0;
151 by Brian Aker
Ulonglong to uint64_t
888
  uint64_t uval = (uint64_t)val;
1 by brian
clean slate
889
  
890
  if (radix < 0)
891
  {
892
    if (val < 0)
893
    {
163 by Brian Aker
Merge Monty's code.
894
      /* Avoid integer overflow in (-val) for INT64_MIN (BUG#31799). */
151 by Brian Aker
Ulonglong to uint64_t
895
      uval = (uint64_t)0 - uval;
1 by brian
clean slate
896
      *dst++= '-';
897
      len--;
898
      sign= 1;
899
    }
900
  }
901
  
902
  e = p = &buffer[sizeof(buffer)-1];
903
  *p= 0;
904
  
905
  if (uval == 0)
906
  {
907
    *--p= '0';
908
    len= 1;
909
    goto cnv;
910
  }
911
  
151 by Brian Aker
Ulonglong to uint64_t
912
  while (uval > (uint64_t) LONG_MAX)
1 by brian
clean slate
913
  {
151 by Brian Aker
Ulonglong to uint64_t
914
    uint64_t quo= uval/(uint) 10;
1 by brian
clean slate
915
    uint rem= (uint) (uval- quo* (uint) 10);
916
    *--p = '0' + rem;
917
    uval= quo;
918
  }
919
  
920
  long_val= (long) uval;
921
  while (long_val != 0)
922
  {
923
    long quo= long_val/10;
924
    *--p = (char) ('0' + (long_val - quo*10));
925
    long_val= quo;
926
  }
927
  
928
  len= min(len, (size_t) (e-p));
929
cnv:
930
  memcpy(dst, p, len);
931
  return len+sign;
932
}
933
934
935
/*
936
** Compare string against string with wildcard
937
**	0 if matched
938
**	-1 if not matched with wildcard
939
**	 1 if matched with wildcard
940
*/
941
942
#ifdef LIKE_CMP_TOUPPER
943
#define likeconv(s,A) (uchar) my_toupper(s,A)
944
#else
945
#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
946
#endif
947
948
#define INC_PTR(cs,A,B) (A)++
949
950
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
951
int my_wildcmp_8bit(const CHARSET_INFO * const cs,
1 by brian
clean slate
952
		    const char *str,const char *str_end,
953
		    const char *wildstr,const char *wildend,
954
		    int escape, int w_one, int w_many)
955
{
956
  int result= -1;			/* Not found, using wildcards */
957
958
  while (wildstr != wildend)
959
  {
960
    while (*wildstr != w_many && *wildstr != w_one)
961
    {
962
      if (*wildstr == escape && wildstr+1 != wildend)
963
	wildstr++;
964
965
      if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
966
	return(1);				/* No match */
967
      if (wildstr == wildend)
968
	return(str != str_end);		/* Match if both are at end */
969
      result=1;					/* Found an anchor char     */
970
    }
971
    if (*wildstr == w_one)
972
    {
973
      do
974
      {
975
	if (str == str_end)			/* Skip one char if possible */
976
	  return(result);
977
	INC_PTR(cs,str,str_end);
978
      } while (++wildstr < wildend && *wildstr == w_one);
979
      if (wildstr == wildend)
980
	break;
981
    }
982
    if (*wildstr == w_many)
983
    {						/* Found w_many */
984
      uchar cmp;
985
      
986
      wildstr++;
987
      /* Remove any '%' and '_' from the wild search string */
988
      for (; wildstr != wildend ; wildstr++)
989
      {
990
	if (*wildstr == w_many)
991
	  continue;
992
	if (*wildstr == w_one)
993
	{
994
	  if (str == str_end)
995
	    return(-1);
996
	  INC_PTR(cs,str,str_end);
997
	  continue;
998
	}
999
	break;					/* Not a wild character */
1000
      }
1001
      if (wildstr == wildend)
1002
	return(0);				/* Ok if w_many is last */
1003
      if (str == str_end)
1004
	return(-1);
1005
      
1006
      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
1007
	cmp= *++wildstr;
1008
1009
      INC_PTR(cs,wildstr,wildend);	/* This is compared trough cmp */
1010
      cmp=likeconv(cs,cmp);
1011
      do
1012
      {
1013
	while (str != str_end && (uchar) likeconv(cs,*str) != cmp)
1014
	  str++;
1015
	if (str++ == str_end) return(-1);
1016
	{
1017
	  int tmp=my_wildcmp_8bit(cs,str,str_end,wildstr,wildend,escape,w_one,
1018
				  w_many);
1019
	  if (tmp <= 0)
1020
	    return(tmp);
1021
	}
1022
      } while (str != str_end && wildstr[0] != w_many);
1023
      return(-1);
1024
    }
1025
  }
1026
  return(str != str_end ? 1 : 0);
1027
}
1028
1029
1030
/*
1031
** Calculate min_str and max_str that ranges a LIKE string.
1032
** Arguments:
1033
** ptr		Pointer to LIKE string.
1034
** ptr_length	Length of LIKE string.
1035
** escape	Escape character in LIKE.  (Normally '\').
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
1036
**		All escape characters should be removed from 
1037
**              min_str and max_str
1 by brian
clean slate
1038
** res_length	Length of min_str and max_str.
1039
** min_str	Smallest case sensitive string that ranges LIKE.
1040
**		Should be space padded to res_length.
1041
** max_str	Largest case sensitive string that ranges LIKE.
1042
**		Normally padded with the biggest character sort value.
1043
**
1044
** The function should return 0 if ok and 1 if the LIKE string can't be
1045
** optimized !
1046
*/
1047
276 by Brian Aker
Cleaned out my_bool from strings.
1048
bool my_like_range_simple(const CHARSET_INFO * const cs,
77.1.95 by Monty Taylor
Fixed silly my_bool==char nonsense.
1049
                             const char *ptr, size_t ptr_length,
1050
                             char escape, char w_one, char w_many,
1051
                             size_t res_length,
1052
                             char *min_str,char *max_str,
1053
                             size_t *min_length, size_t *max_length)
1 by brian
clean slate
1054
{
1055
  const char *end= ptr + ptr_length;
1056
  char *min_org=min_str;
1057
  char *min_end=min_str+res_length;
1058
  size_t charlen= res_length / cs->mbmaxlen;
1059
1060
  for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--)
1061
  {
1062
    if (*ptr == escape && ptr+1 != end)
1063
    {
1064
      ptr++;					/* Skip escape */
1065
      *min_str++= *max_str++ = *ptr;
1066
      continue;
1067
    }
1068
    if (*ptr == w_one)				/* '_' in SQL */
1069
    {
1070
      *min_str++='\0';				/* This should be min char */
1071
      *max_str++= (char) cs->max_sort_char;
1072
      continue;
1073
    }
1074
    if (*ptr == w_many)				/* '%' in SQL */
1075
    {
1076
      /* Calculate length of keys */
1077
      *min_length= ((cs->state & MY_CS_BINSORT) ?
1078
                    (size_t) (min_str - min_org) :
1079
                    res_length);
1080
      *max_length= res_length;
1081
      do
1082
      {
1083
	*min_str++= 0;
1084
	*max_str++= (char) cs->max_sort_char;
1085
      } while (min_str != min_end);
1086
      return 0;
1087
    }
1088
    *min_str++= *max_str++ = *ptr;
1089
  }
1090
1091
 *min_length= *max_length = (size_t) (min_str - min_org);
1092
  while (min_str != min_end)
1093
    *min_str++= *max_str++ = ' ';      /* Because if key compression */
1094
  return 0;
1095
}
1096
1097
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1098
size_t my_scan_8bit(const CHARSET_INFO * const cs, const char *str, const char *end, int sq)
1 by brian
clean slate
1099
{
1100
  const char *str0= str;
1101
  switch (sq)
1102
  {
1103
  case MY_SEQ_INTTAIL:
1104
    if (*str == '.')
1105
    {
1106
      for(str++ ; str != end && *str == '0' ; str++);
1107
      return (size_t) (str - str0);
1108
    }
1109
    return 0;
1110
1111
  case MY_SEQ_SPACES:
1112
    for ( ; str < end ; str++)
1113
    {
1114
      if (!my_isspace(cs,*str))
1115
        break;
1116
    }
1117
    return (size_t) (str - str0);
1118
  default:
1119
    return 0;
1120
  }
1121
}
1122
1123
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1124
void my_fill_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
1125
		   char *s, size_t l, int fill)
1126
{
212.6.15 by Mats Kindahl
Removing redundant use of casts in mystrings/ for memcmp(), memcpy(), memset(), and memmove().
1127
  memset(s, fill, l);
1 by brian
clean slate
1128
}
1129
1130
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1131
size_t my_numchars_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
1132
		      const char *b, const char *e)
1133
{
1134
  return (size_t) (e - b);
1135
}
1136
1137
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1138
size_t my_numcells_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
1139
                        const char *b, const char *e)
1140
{
1141
  return (size_t) (e - b);
1142
}
1143
1144
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1145
size_t my_charpos_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
1146
                       const char *b  __attribute__((unused)),
1147
                       const char *e  __attribute__((unused)),
1148
                       size_t pos)
1149
{
1150
  return pos;
1151
}
1152
1153
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1154
size_t my_well_formed_len_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
1155
                               const char *start, const char *end,
1156
                               size_t nchars, int *error)
1157
{
1158
  size_t nbytes= (size_t) (end-start);
1159
  *error= 0;
1160
  return min(nbytes, nchars);
1161
}
1162
1163
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1164
size_t my_lengthsp_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
1165
                        const char *ptr, size_t length)
1166
{
1167
  const char *end;
1168
  end= (const char *) skip_trailing_space((const uchar *)ptr, length);
1169
  return (size_t) (end-ptr);
1170
}
1171
1172
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1173
uint my_instr_simple(const CHARSET_INFO * const cs,
1 by brian
clean slate
1174
                     const char *b, size_t b_length, 
1175
                     const char *s, size_t s_length,
1176
                     my_match_t *match, uint nmatch)
1177
{
1178
  register const uchar *str, *search, *end, *search_end;
1179
  
1180
  if (s_length <= b_length)
1181
  {
1182
    if (!s_length)
1183
    {
1184
      if (nmatch)
1185
      {
1186
        match->beg= 0;
1187
        match->end= 0;
1188
        match->mb_len= 0;
1189
      }
1190
      return 1;		/* Empty string is always found */
1191
    }
1192
    
1193
    str= (const uchar*) b;
1194
    search= (const uchar*) s;
1195
    end= (const uchar*) b+b_length-s_length+1;
1196
    search_end= (const uchar*) s + s_length;
1197
    
1198
skip:
1199
    while (str != end)
1200
    {
1201
      if (cs->sort_order[*str++] == cs->sort_order[*search])
1202
      {
1203
	register const uchar *i,*j;
1204
	
1205
	i= str; 
1206
	j= search+1;
1207
	
1208
	while (j != search_end)
1209
	  if (cs->sort_order[*i++] != cs->sort_order[*j++]) 
1210
            goto skip;
1211
        
1212
	if (nmatch > 0)
1213
	{
1214
	  match[0].beg= 0;
1215
	  match[0].end= (size_t) (str- (const uchar*)b-1);
1216
	  match[0].mb_len= match[0].end;
1217
	  
1218
	  if (nmatch > 1)
1219
	  {
1220
	    match[1].beg= match[0].end;
1221
	    match[1].end= match[0].end+s_length;
1222
	    match[1].mb_len= match[1].end-match[1].beg;
1223
	  }
1224
	}
1225
	return 2;
1226
      }
1227
    }
1228
  }
1229
  return 0;
1230
}
1231
1232
1233
typedef struct
1234
{
1235
  int		nchars;
1236
  MY_UNI_IDX	uidx;
1237
} uni_idx;
1238
1239
#define PLANE_SIZE	0x100
1240
#define PLANE_NUM	0x100
1241
#define PLANE_NUMBER(x)	(((x)>>8) % PLANE_NUM)
1242
1243
static int pcmp(const void * f, const void * s)
1244
{
1245
  const uni_idx *F= (const uni_idx*) f;
1246
  const uni_idx *S= (const uni_idx*) s;
1247
  int res;
1248
1249
  if (!(res=((S->nchars)-(F->nchars))))
1250
    res=((F->uidx.from)-(S->uidx.to));
1251
  return res;
1252
}
1253
276 by Brian Aker
Cleaned out my_bool from strings.
1254
static bool create_fromuni(CHARSET_INFO *cs, void *(*alloc)(size_t))
1 by brian
clean slate
1255
{
1256
  uni_idx	idx[PLANE_NUM];
1257
  int		i,n;
1258
  
1259
  /*
1260
    Check that Unicode map is loaded.
1261
    It can be not loaded when the collation is
1262
    listed in Index.xml but not specified
1263
    in the character set specific XML file.
1264
  */
1265
  if (!cs->tab_to_uni)
163 by Brian Aker
Merge Monty's code.
1266
    return true;
1 by brian
clean slate
1267
  
1268
  /* Clear plane statistics */
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
1269
  memset(idx, 0, sizeof(idx));
1 by brian
clean slate
1270
  
1271
  /* Count number of characters in each plane */
1272
  for (i=0; i< 0x100; i++)
1273
  {
206 by Brian Aker
Removed final uint dead types.
1274
    uint16_t wc=cs->tab_to_uni[i];
1 by brian
clean slate
1275
    int pl= PLANE_NUMBER(wc);
1276
    
1277
    if (wc || !i)
1278
    {
1279
      if (!idx[pl].nchars)
1280
      {
1281
        idx[pl].uidx.from=wc;
1282
        idx[pl].uidx.to=wc;
1283
      }else
1284
      {
1285
        idx[pl].uidx.from=wc<idx[pl].uidx.from?wc:idx[pl].uidx.from;
1286
        idx[pl].uidx.to=wc>idx[pl].uidx.to?wc:idx[pl].uidx.to;
1287
      }
1288
      idx[pl].nchars++;
1289
    }
1290
  }
1291
  
1292
  /* Sort planes in descending order */
1293
  qsort(&idx,PLANE_NUM,sizeof(uni_idx),&pcmp);
1294
  
1295
  for (i=0; i < PLANE_NUM; i++)
1296
  {
1297
    int ch,numchars;
1298
    
1299
    /* Skip empty plane */
1300
    if (!idx[i].nchars)
1301
      break;
1302
    
1303
    numchars=idx[i].uidx.to-idx[i].uidx.from+1;
1304
    if (!(idx[i].uidx.tab=(uchar*) alloc(numchars * sizeof(*idx[i].uidx.tab))))
163 by Brian Aker
Merge Monty's code.
1305
      return true;
1 by brian
clean slate
1306
    
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
1307
    memset(idx[i].uidx.tab, 0, numchars*sizeof(*idx[i].uidx.tab));
1 by brian
clean slate
1308
    
1309
    for (ch=1; ch < PLANE_SIZE; ch++)
1310
    {
206 by Brian Aker
Removed final uint dead types.
1311
      uint16_t wc=cs->tab_to_uni[ch];
1 by brian
clean slate
1312
      if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
1313
      {
1314
        int ofs= wc - idx[i].uidx.from;
1315
        idx[i].uidx.tab[ofs]= ch;
1316
      }
1317
    }
1318
  }
1319
  
1320
  /* Allocate and fill reverse table for each plane */
1321
  n=i;
1322
  if (!(cs->tab_from_uni= (MY_UNI_IDX*) alloc(sizeof(MY_UNI_IDX)*(n+1))))
163 by Brian Aker
Merge Monty's code.
1323
    return true;
1 by brian
clean slate
1324
1325
  for (i=0; i< n; i++)
1326
    cs->tab_from_uni[i]= idx[i].uidx;
1327
  
1328
  /* Set end-of-list marker */
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
1329
  memset(&cs->tab_from_uni[i], 0, sizeof(MY_UNI_IDX));
163 by Brian Aker
Merge Monty's code.
1330
  return false;
1 by brian
clean slate
1331
}
1332
276 by Brian Aker
Cleaned out my_bool from strings.
1333
static bool my_cset_init_8bit(CHARSET_INFO *cs, void *(*alloc)(size_t))
1 by brian
clean slate
1334
{
1335
  cs->caseup_multiply= 1;
1336
  cs->casedn_multiply= 1;
1337
  cs->pad_char= ' ';
1338
  return create_fromuni(cs, alloc);
1339
}
1340
1341
static void set_max_sort_char(CHARSET_INFO *cs)
1342
{
1343
  uchar max_char;
1344
  uint  i;
1345
  
1346
  if (!cs->sort_order)
1347
    return;
1348
  
1349
  max_char=cs->sort_order[(uchar) cs->max_sort_char];
1350
  for (i= 0; i < 256; i++)
1351
  {
1352
    if ((uchar) cs->sort_order[i] > max_char)
1353
    {
1354
      max_char=(uchar) cs->sort_order[i];
1355
      cs->max_sort_char= i;
1356
    }
1357
  }
1358
}
1359
276 by Brian Aker
Cleaned out my_bool from strings.
1360
static bool my_coll_init_simple(CHARSET_INFO *cs,
1 by brian
clean slate
1361
                                   void *(*alloc)(size_t) __attribute__((unused)))
1362
{
1363
  set_max_sort_char(cs);
163 by Brian Aker
Merge Monty's code.
1364
  return false;
1 by brian
clean slate
1365
}
1366
1367
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1368
int64_t my_strtoll10_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
1369
                           const char *nptr, char **endptr, int *error)
1370
{
1371
  return my_strtoll10(nptr, endptr, error);
1372
}
1373
1374
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1375
int my_mb_ctype_8bit(const CHARSET_INFO * const cs, int *ctype,
1 by brian
clean slate
1376
                   const uchar *s, const uchar *e)
1377
{
1378
  if (s >= e)
1379
  {
1380
    *ctype= 0;
1381
    return MY_CS_TOOSMALL;
1382
  }
1383
  *ctype= cs->ctype[*s + 1];
1384
  return 1;
1385
}
1386
1387
163 by Brian Aker
Merge Monty's code.
1388
#undef  UINT64_MAX
1389
#define UINT64_MAX           (~(uint64_t) 0)
1 by brian
clean slate
1390
163 by Brian Aker
Merge Monty's code.
1391
#define CUTOFF  (UINT64_MAX / 10)
1392
#define CUTLIM  (UINT64_MAX % 10)
1 by brian
clean slate
1393
#define DIGITS_IN_ULONGLONG 20
1394
151 by Brian Aker
Ulonglong to uint64_t
1395
static uint64_t d10[DIGITS_IN_ULONGLONG]=
1 by brian
clean slate
1396
{
1397
  1,
1398
  10,
1399
  100,
1400
  1000,
1401
  10000,
1402
  100000,
1403
  1000000,
1404
  10000000,
1405
  100000000,
1406
  1000000000,
1407
  10000000000ULL,
1408
  100000000000ULL,
1409
  1000000000000ULL,
1410
  10000000000000ULL,
1411
  100000000000000ULL,
1412
  1000000000000000ULL,
1413
  10000000000000000ULL,
1414
  100000000000000000ULL,
1415
  1000000000000000000ULL,
1416
  10000000000000000000ULL
1417
};
1418
1419
1420
/*
1421
1422
  Convert a string to unsigned long long integer value
1423
  with rounding.
1424
  
1425
  SYNOPSYS
1426
    my_strntoull10_8bit()
1427
      cs              in      pointer to character set
1428
      str             in      pointer to the string to be converted
1429
      length          in      string length
1430
      unsigned_flag   in      whether the number is unsigned
1431
      endptr          out     pointer to the stop character
1432
      error           out     returned error code
1433
1434
  DESCRIPTION
1435
    This function takes the decimal representation of integer number
1436
    from string str and converts it to an signed or unsigned
1437
    long long integer value.
1438
    Space characters and tab are ignored.
1439
    A sign character might precede the digit characters.
1440
    The number may have any number of pre-zero digits.
1441
    The number may have decimal point and exponent.
1442
    Rounding is always done in "away from zero" style:
1443
      0.5  ->   1
1444
     -0.5  ->  -1
1445
1446
    The function stops reading the string str after "length" bytes
1447
    or at the first character that is not a part of correct number syntax:
1448
1449
    <signed numeric literal> ::=
1450
      [ <sign> ] <exact numeric literal> [ E [ <sign> ] <unsigned integer> ]
1451
1452
    <exact numeric literal> ::=
1453
                        <unsigned integer> [ <period> [ <unsigned integer> ] ]
1454
                      | <period> <unsigned integer>
1455
    <unsigned integer>   ::= <digit>...
1456
     
1457
  RETURN VALUES
152 by Brian Aker
longlong replacement
1458
    Value of string as a signed/unsigned int64_t integer
1 by brian
clean slate
1459
1460
    endptr cannot be NULL. The function will store the end pointer
1461
    to the stop character here.
1462
1463
    The error parameter contains information how things went:
1464
    0	     ok
1465
    ERANGE   If the the value of the converted number is out of range
1466
    In this case the return value is:
163 by Brian Aker
Merge Monty's code.
1467
    - UINT64_MAX if unsigned_flag and the number was too big
1 by brian
clean slate
1468
    - 0 if unsigned_flag and the number was negative
163 by Brian Aker
Merge Monty's code.
1469
    - INT64_MAX if no unsigned_flag and the number is too big
1470
    - INT64_MIN if no unsigned_flag and the number it too big negative
1 by brian
clean slate
1471
    
1472
    EDOM If the string didn't contain any digits.
1473
    In this case the return value is 0.
1474
*/
1475
151 by Brian Aker
Ulonglong to uint64_t
1476
uint64_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1477
my_strntoull10rnd_8bit(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
1478
                       const char *str, size_t length, int unsigned_flag,
1479
                       char **endptr, int *error)
1480
{
1481
  const char *dot, *end9, *beg, *end= str + length;
151 by Brian Aker
Ulonglong to uint64_t
1482
  uint64_t ull;
1 by brian
clean slate
1483
  ulong ul;
1484
  uchar ch;
1485
  int shift= 0, digits= 0, negative, addon;
1486
1487
  /* Skip leading spaces and tabs */
1488
  for ( ; str < end && (*str == ' ' || *str == '\t') ; str++);
1489
1490
  if (str >= end)
1491
    goto ret_edom;
1492
1493
  if ((negative= (*str == '-')) || *str=='+') /* optional sign */
1494
  {
1495
    if (++str == end)
1496
      goto ret_edom;
1497
  }
1498
1499
  beg= str;
1500
  end9= (str + 9) > end ? end : (str + 9);
1501
  /* Accumulate small number into ulong, for performance purposes */
1502
  for (ul= 0 ; str < end9 && (ch= (uchar) (*str - '0')) < 10; str++)
1503
  {
1504
    ul= ul * 10 + ch;
1505
  }
1506
  
1507
  if (str >= end) /* Small number without dots and expanents */
1508
  {
1509
    *endptr= (char*) str;
1510
    if (negative)
1511
    {
1512
      if (unsigned_flag)
1513
      {
236.1.31 by Monty Taylor
Re-remove mysys header from mystrings.
1514
        *error= ul ? ERANGE : 0;
1 by brian
clean slate
1515
        return 0;
1516
      }
1517
      else
1518
      {
1519
        *error= 0;
152 by Brian Aker
longlong replacement
1520
        return (uint64_t) (int64_t) -(long) ul;
1 by brian
clean slate
1521
      }
1522
    }
1523
    else
1524
    {
1525
      *error=0;
151 by Brian Aker
Ulonglong to uint64_t
1526
      return (uint64_t) ul;
1 by brian
clean slate
1527
    }
1528
  }
1529
  
1530
  digits= str - beg;
1531
151 by Brian Aker
Ulonglong to uint64_t
1532
  /* Continue to accumulate into uint64_t */
1 by brian
clean slate
1533
  for (dot= NULL, ull= ul; str < end; str++)
1534
  {
1535
    if ((ch= (uchar) (*str - '0')) < 10)
1536
    {
1537
      if (ull < CUTOFF || (ull == CUTOFF && ch <= CUTLIM))
1538
      {
1539
        ull= ull * 10 + ch;
1540
        digits++;
1541
        continue;
1542
      }
1543
      /*
1544
        Adding the next digit would overflow.
1545
        Remember the next digit in "addon", for rounding.
1546
        Scan all digits with an optional single dot.
1547
      */
1548
      if (ull == CUTOFF)
1549
      {
163 by Brian Aker
Merge Monty's code.
1550
        ull= UINT64_MAX;
1 by brian
clean slate
1551
        addon= 1;
1552
        str++;
1553
      }
1554
      else
1555
        addon= (*str >= '5');
1556
      if (!dot)
1557
      {
1558
        for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; shift++, str++);
1559
        if (str < end && *str == '.')
1560
        {
1561
          str++;
1562
          for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1563
        }
1564
      }
1565
      else
1566
      {
1567
        shift= dot - str;
1568
        for ( ; str < end && (ch= (uchar) (*str - '0')) < 10; str++);
1569
      }
1570
      goto exp;
1571
    }
1572
    
1573
    if (*str == '.')
1574
    {
1575
      if (dot)
1576
      {
1577
        /* The second dot character */
1578
        addon= 0;
1579
        goto exp;
1580
      }
1581
      else
1582
      {
1583
        dot= str + 1;
1584
      }
1585
      continue;
1586
    }
1587
    
1588
    /* Unknown character, exit the loop */
1589
    break; 
1590
  }
1591
  shift= dot ? dot - str : 0; /* Right shift */
1592
  addon= 0;
1593
1594
exp:    /* [ E [ <sign> ] <unsigned integer> ] */
1595
1596
  if (!digits)
1597
  {
1598
    str= beg;
1599
    goto ret_edom;
1600
  }
1601
  
1602
  if (str < end && (*str == 'e' || *str == 'E'))
1603
  {
1604
    str++;
1605
    if (str < end)
1606
    {
1607
      int negative_exp, exponent;
1608
      if ((negative_exp= (*str == '-')) || *str=='+')
1609
      {
1610
        if (++str == end)
1611
          goto ret_sign;
1612
      }
1613
      for (exponent= 0 ;
1614
           str < end && (ch= (uchar) (*str - '0')) < 10;
1615
           str++)
1616
      {
1617
        exponent= exponent * 10 + ch;
1618
      }
1619
      shift+= negative_exp ? -exponent : exponent;
1620
    }
1621
  }
1622
  
1623
  if (shift == 0) /* No shift, check addon digit */
1624
  {
1625
    if (addon)
1626
    {
163 by Brian Aker
Merge Monty's code.
1627
      if (ull == UINT64_MAX)
1 by brian
clean slate
1628
        goto ret_too_big;
1629
      ull++;
1630
    }
1631
    goto ret_sign;
1632
  }
1633
1634
  if (shift < 0) /* Right shift */
1635
  {
151 by Brian Aker
Ulonglong to uint64_t
1636
    uint64_t d, r;
1 by brian
clean slate
1637
    
1638
    if (-shift >= DIGITS_IN_ULONGLONG)
1639
      goto ret_zero; /* Exponent is a big negative number, return 0 */
1640
    
1641
    d= d10[-shift];
1642
    r= (ull % d) * 2;
1643
    ull /= d;
1644
    if (r >= d)
1645
      ull++;
1646
    goto ret_sign;
1647
  }
1648
1649
  if (shift > DIGITS_IN_ULONGLONG) /* Huge left shift */
1650
  {
1651
    if (!ull)
1652
      goto ret_sign;
1653
    goto ret_too_big;
1654
  }
1655
1656
  for ( ; shift > 0; shift--, ull*= 10) /* Left shift */
1657
  {
1658
    if (ull > CUTOFF)
1659
      goto ret_too_big; /* Overflow, number too big */
1660
  }
1661
1662
ret_sign:
1663
  *endptr= (char*) str;
1664
1665
  if (!unsigned_flag)
1666
  {
1667
    if (negative)
1668
    {
163 by Brian Aker
Merge Monty's code.
1669
      if (ull > (uint64_t) INT64_MIN)
1 by brian
clean slate
1670
      {
236.1.31 by Monty Taylor
Re-remove mysys header from mystrings.
1671
        *error= ERANGE;
163 by Brian Aker
Merge Monty's code.
1672
        return (uint64_t) INT64_MIN;
1 by brian
clean slate
1673
      }
1674
      *error= 0;
152 by Brian Aker
longlong replacement
1675
      return (uint64_t) -(int64_t) ull;
1 by brian
clean slate
1676
    }
1677
    else
1678
    {
163 by Brian Aker
Merge Monty's code.
1679
      if (ull > (uint64_t) INT64_MAX)
1 by brian
clean slate
1680
      {
236.1.31 by Monty Taylor
Re-remove mysys header from mystrings.
1681
        *error= ERANGE;
163 by Brian Aker
Merge Monty's code.
1682
        return (uint64_t) INT64_MAX;
1 by brian
clean slate
1683
      }
1684
      *error= 0;
1685
      return ull;
1686
    }
1687
  }
1688
1689
  /* Unsigned number */
1690
  if (negative && ull)
1691
  {
236.1.31 by Monty Taylor
Re-remove mysys header from mystrings.
1692
    *error= ERANGE;
1 by brian
clean slate
1693
    return 0;
1694
  }
1695
  *error= 0;
1696
  return ull;
1697
1698
ret_zero:
1699
  *endptr= (char*) str;
1700
  *error= 0;
1701
  return 0;
1702
1703
ret_edom:
1704
  *endptr= (char*) str;
236.1.31 by Monty Taylor
Re-remove mysys header from mystrings.
1705
  *error= EDOM;
1 by brian
clean slate
1706
  return 0;
1707
  
1708
ret_too_big:
1709
  *endptr= (char*) str;
236.1.31 by Monty Taylor
Re-remove mysys header from mystrings.
1710
  *error= ERANGE;
1 by brian
clean slate
1711
  return unsigned_flag ?
163 by Brian Aker
Merge Monty's code.
1712
         UINT64_MAX :
1713
         negative ? (uint64_t) INT64_MIN : (uint64_t) INT64_MAX;
1 by brian
clean slate
1714
}
1715
1716
1717
/*
1718
  Check if a constant can be propagated
1719
1720
  SYNOPSIS:
1721
    my_propagate_simple()
1722
    cs		Character set information
1723
    str		String to convert to double
1724
    length	Optional length for string.
1725
    
1726
  NOTES:
1727
   Takes the string in the given charset and check
1728
   if it can be safely propagated in the optimizer.
1729
   
1730
   create table t1 (
1731
     s char(5) character set latin1 collate latin1_german2_ci);
1732
   insert into t1 values (0xf6); -- o-umlaut
1733
   select * from t1 where length(s)=1 and s='oe';
1734
1735
   The above query should return one row.
1736
   We cannot convert this query into:
1737
   select * from t1 where length('oe')=1 and s='oe';
1738
   
1739
   Currently we don't check the constant itself,
1740
   and decide not to propagate a constant
1741
   just if the collation itself allows tricky things
1742
   like expansions and contractions. In the future
1743
   we can write a more sophisticated functions to
1744
   check the constants. For example, 'oa' can always
1745
   be safety propagated in German2 because unlike 
1746
   'oe' it does not have any special meaning.
1747
1748
  RETURN
1749
    1 if constant can be safely propagated
1750
    0 if it is not safe to propagate the constant
1751
*/
1752
1753
1754
276 by Brian Aker
Cleaned out my_bool from strings.
1755
bool my_propagate_simple(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
1756
                            const uchar *str __attribute__((unused)),
1757
                            size_t length __attribute__((unused)))
1758
{
1759
  return 1;
1760
}
1761
1762
276 by Brian Aker
Cleaned out my_bool from strings.
1763
bool my_propagate_complex(const CHARSET_INFO * const cs __attribute__((unused)),
1 by brian
clean slate
1764
                             const uchar *str __attribute__((unused)),
1765
                             size_t length __attribute__((unused)))
1766
{
1767
  return 0;
1768
}
1769
1770
1771
1772
/*
1773
  Normalize strxfrm flags
1774
1775
  SYNOPSIS:
1776
    my_strxfrm_flag_normalize()
1777
    flags    - non-normalized flags
1778
    nlevels  - number of levels
1779
    
1780
  NOTES:
1781
    If levels are omitted, then 1-maximum is assumed.
1782
    If any level number is greater than the maximum,
1783
    it is treated as the maximum.
1784
1785
  RETURN
1786
    normalized flags
1787
*/
1788
1789
uint my_strxfrm_flag_normalize(uint flags, uint maximum)
1790
{
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
1791
  assert(maximum >= 1 && maximum <= MY_STRXFRM_NLEVELS);
1 by brian
clean slate
1792
  
1793
  /* If levels are omitted, then 1-maximum is assumed*/
1794
  if (!(flags & MY_STRXFRM_LEVEL_ALL))
1795
  {
1796
    static uint def_level_flags[]= {0, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
1797
    uint flag_pad= flags & MY_STRXFRM_PAD_WITH_SPACE;
1798
    flags= def_level_flags[maximum] | flag_pad;
1799
  }
1800
  else
1801
  {
1802
    uint i;
1803
    uint flag_lev= flags & MY_STRXFRM_LEVEL_ALL;
1804
    uint flag_dsc= (flags >> MY_STRXFRM_DESC_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1805
    uint flag_rev= (flags >> MY_STRXFRM_REVERSE_SHIFT) & MY_STRXFRM_LEVEL_ALL;
1806
    uint flag_pad= flags & MY_STRXFRM_PAD_WITH_SPACE;
1807
1808
    /*
1809
      If any level number is greater than the maximum,
1810
      it is treated as the maximum.
1811
    */
1812
    for (maximum--, flags= 0, i= 0; i < MY_STRXFRM_NLEVELS; i++)
1813
    {
1814
      uint src_bit= 1 << i;
1815
      uint dst_bit= 1 << min(i, maximum);
1816
      if (flag_lev & src_bit)
1817
      {
1818
        flags|= dst_bit;
1819
        flags|= (flag_dsc & dst_bit) << MY_STRXFRM_DESC_SHIFT;
1820
        flags|= (flag_rev & dst_bit) << MY_STRXFRM_REVERSE_SHIFT;
1821
      }
1822
      else
1823
      {
1824
        /* Check that there are no DESC or REVERSE flag for skipped level */
51.3.9 by Jay Pipes
Removal of DBUG from strings/ library
1825
        assert(!(flag_dsc & src_bit) && !(flag_rev & src_bit));
1 by brian
clean slate
1826
      }
1827
    }
1828
    flags|= flag_pad;
1829
  }
1830
  
1831
  return flags;
1832
}
1833
1834
/*
1835
  Apply DESC and REVERSE collation rules.
1836
1837
  SYNOPSIS:
1838
    my_strxfrm_desc_and_reverse()
1839
    str      - pointer to string
1840
    strend   - end of string
1841
    flags    - flags
1842
    level    - which level, starting from 0.
1843
    
1844
  NOTES:
1845
    Apply DESC or REVERSE or both flags.
1846
    
1847
    If DESC flag is given, then the weights
1848
    come out NOTed or negated for that level.
1849
    
1850
    If REVERSE flags is given, then the weights come out in
1851
    reverse order for that level, that is, starting with
1852
    the last character and ending with the first character.
1853
    
1854
    If nether DESC nor REVERSE flags are give,
1855
    the string is not changed.
1856
    
1857
*/
1858
void my_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
1859
                                 uint flags, uint level)
1860
{
1861
  if (flags & (MY_STRXFRM_DESC_LEVEL1 << level))
1862
  {
1863
    if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1864
    {
1865
      for (strend--; str <= strend;)
1866
      {
1867
        uchar tmp= *str;
1868
        *str++= ~*strend;
1869
        *strend--= ~tmp;
1870
      }
1871
    }
1872
    else
1873
    {
1874
      for (; str < strend; str++)
1875
        *str= ~*str;
1876
    }
1877
  }
1878
  else if (flags & (MY_STRXFRM_REVERSE_LEVEL1 << level))
1879
  {
1880
    for (strend--; str < strend;)
1881
    {
1882
      uchar tmp= *str;
1883
      *str++= *strend;
1884
      *strend--= tmp;
1885
    }
1886
  }
1887
}
1888
1889
1890
size_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1891
my_strxfrm_pad_desc_and_reverse(const CHARSET_INFO * const cs,
1 by brian
clean slate
1892
                                uchar *str, uchar *frmend, uchar *strend,
1893
                                uint nweights, uint flags, uint level)
1894
{
1895
  if (nweights && frmend < strend && (flags & MY_STRXFRM_PAD_WITH_SPACE))
1896
  {
1897
    uint fill_length= min((uint) (strend - frmend), nweights * cs->mbminlen);
1898
    cs->cset->fill(cs, (char*) frmend, fill_length, cs->pad_char);
1899
    frmend+= fill_length;
1900
  }
1901
  my_strxfrm_desc_and_reverse(str, frmend, flags, level);
1902
  return frmend - str;
1903
}
1904
1905
1906
MY_CHARSET_HANDLER my_charset_8bit_handler=
1907
{
1908
    my_cset_init_8bit,
1909
    NULL,			/* ismbchar      */
1910
    my_mbcharlen_8bit,		/* mbcharlen     */
1911
    my_numchars_8bit,
1912
    my_charpos_8bit,
1913
    my_well_formed_len_8bit,
1914
    my_lengthsp_8bit,
1915
    my_numcells_8bit,
1916
    my_mb_wc_8bit,
1917
    my_wc_mb_8bit,
1918
    my_mb_ctype_8bit,
1919
    my_caseup_str_8bit,
1920
    my_casedn_str_8bit,
1921
    my_caseup_8bit,
1922
    my_casedn_8bit,
1923
    my_snprintf_8bit,
1924
    my_long10_to_str_8bit,
152 by Brian Aker
longlong replacement
1925
    my_int64_t10_to_str_8bit,
1 by brian
clean slate
1926
    my_fill_8bit,
1927
    my_strntol_8bit,
1928
    my_strntoul_8bit,
1929
    my_strntoll_8bit,
1930
    my_strntoull_8bit,
1931
    my_strntod_8bit,
1932
    my_strtoll10_8bit,
1933
    my_strntoull10rnd_8bit,
1934
    my_scan_8bit
1935
};
1936
1937
MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
1938
{
1939
    my_coll_init_simple,	/* init */
1940
    my_strnncoll_simple,
1941
    my_strnncollsp_simple,
1942
    my_strnxfrm_simple,
1943
    my_strnxfrmlen_simple,
1944
    my_like_range_simple,
1945
    my_wildcmp_8bit,
1946
    my_strcasecmp_8bit,
1947
    my_instr_simple,
1948
    my_hash_sort_simple,
1949
    my_propagate_simple
1950
};