~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/* This file is originally from the mysql distribution. Coded by monty */
17
212.5.39 by Monty Taylor
Phew. Moved my_base and my_global.
18
#include "global.h"
212.5.13 by Monty Taylor
Moved my_sys/my_pthread/my_nosys and mysys_err to mysys.
19
#include <mysys/my_sys.h>
212.5.45 by Monty Taylor
Removed excess AM_CPPFLAGS from the tree. Now the only thing that should be in the include path should be -I${top_srcdir} and -I${top_builddir}w
20
#include <mystrings/m_string.h>
1 by brian
clean slate
21
398.1.5 by Monty Taylor
Removed C++ includes and std namespace from global.h.
22
#include <algorithm>
23
1 by brian
clean slate
24
/*
25
  The following extern declarations are ok as these are interface functions
26
  required by the string function
27
*/
28
481 by Brian Aker
Remove all of uchar.
29
extern unsigned char* sql_alloc(unsigned size);
1 by brian
clean slate
30
extern void sql_element_free(void *ptr);
31
32
#include "sql_string.h"
33
34
/*****************************************************************************
35
** String functions
36
*****************************************************************************/
37
205 by Brian Aker
uint32 -> uin32_t
38
bool String::real_alloc(uint32_t arg_length)
1 by brian
clean slate
39
{
40
  arg_length=ALIGN_SIZE(arg_length+1);
41
  str_length=0;
42
  if (Alloced_length < arg_length)
43
  {
44
    free();
641.3.6 by Monty Taylor
Removed some my_malloc calls.
45
    if (!(Ptr=(char*) malloc(arg_length)))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
46
      return true;
1 by brian
clean slate
47
    Alloced_length=arg_length;
48
    alloced=1;
49
  }
50
  Ptr[0]=0;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
51
  return false;
1 by brian
clean slate
52
}
53
54
55
/*
56
** Check that string is big enough. Set string[alloc_length] to 0
57
** (for C functions)
58
*/
59
205 by Brian Aker
uint32 -> uin32_t
60
bool String::realloc(uint32_t alloc_length)
1 by brian
clean slate
61
{
205 by Brian Aker
uint32 -> uin32_t
62
  uint32_t len=ALIGN_SIZE(alloc_length+1);
1 by brian
clean slate
63
  if (Alloced_length < len)
64
  {
65
    char *new_ptr;
66
    if (alloced)
67
    {
656.1.26 by Monty Taylor
Finally removed all of the my_malloc stuff.
68
      if ((new_ptr= (char*) ::realloc(Ptr,len)))
1 by brian
clean slate
69
      {
70
	Ptr=new_ptr;
71
	Alloced_length=len;
72
      }
73
      else
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
74
	return true;				// Signal error
1 by brian
clean slate
75
    }
641.3.6 by Monty Taylor
Removed some my_malloc calls.
76
    else if ((new_ptr= (char*) malloc(len)))
1 by brian
clean slate
77
    {
78
      if (str_length)				// Avoid bugs in memcpy on AIX
79
	memcpy(new_ptr,Ptr,str_length);
80
      new_ptr[str_length]=0;
81
      Ptr=new_ptr;
82
      Alloced_length=len;
83
      alloced=1;
84
    }
85
    else
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
86
      return true;			// Signal error
1 by brian
clean slate
87
  }
88
  Ptr[alloc_length]=0;			// This make other funcs shorter
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
89
  return false;
1 by brian
clean slate
90
}
91
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
92
bool String::set_int(int64_t num, bool unsigned_flag, const CHARSET_INFO * const cs)
1 by brian
clean slate
93
{
482 by Brian Aker
Remove uint.
94
  uint32_t l=20*cs->mbmaxlen+1;
1 by brian
clean slate
95
  int base= unsigned_flag ? 10 : -10;
96
97
  if (alloc(l))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
98
    return true;
205 by Brian Aker
uint32 -> uin32_t
99
  str_length=(uint32_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
1 by brian
clean slate
100
  str_charset=cs;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
101
  return false;
1 by brian
clean slate
102
}
103
482 by Brian Aker
Remove uint.
104
bool String::set_real(double num,uint32_t decimals, const CHARSET_INFO * const cs)
1 by brian
clean slate
105
{
106
  char buff[FLOATING_POINT_BUFFER];
482 by Brian Aker
Remove uint.
107
  uint32_t dummy_errors;
1 by brian
clean slate
108
  size_t len;
109
110
  str_charset=cs;
111
  if (decimals >= NOT_FIXED_DEC)
112
  {
113
    len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
383.1.12 by Brian Aker
Much closer toward UTF8 being around all the time...
114
    return copy(buff, len, &my_charset_utf8_general_ci, cs, &dummy_errors);
1 by brian
clean slate
115
  }
116
  len= my_fcvt(num, decimals, buff, NULL);
383.1.12 by Brian Aker
Much closer toward UTF8 being around all the time...
117
  return copy(buff, (uint32_t) len, &my_charset_utf8_general_ci, cs,
1 by brian
clean slate
118
              &dummy_errors);
119
}
120
121
122
bool String::copy()
123
{
124
  if (!alloced)
125
  {
126
    Alloced_length=0;				// Force realloc
127
    return realloc(str_length);
128
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
129
  return false;
1 by brian
clean slate
130
}
131
132
bool String::copy(const String &str)
133
{
134
  if (alloc(str.str_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
135
    return true;
1 by brian
clean slate
136
  str_length=str.str_length;
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
137
  memmove(Ptr, str.Ptr, str_length);		// May be overlapping
1 by brian
clean slate
138
  Ptr[str_length]=0;
139
  str_charset=str.str_charset;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
140
  return false;
1 by brian
clean slate
141
}
142
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
143
bool String::copy(const char *str,uint32_t arg_length, const CHARSET_INFO * const cs)
1 by brian
clean slate
144
{
145
  if (alloc(arg_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
146
    return true;
1 by brian
clean slate
147
  if ((str_length=arg_length))
148
    memcpy(Ptr,str,arg_length);
149
  Ptr[arg_length]=0;
150
  str_charset=cs;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
151
  return false;
1 by brian
clean slate
152
}
153
154
155
/*
156
  Checks that the source string can be just copied to the destination string
157
  without conversion.
158
159
  SYNPOSIS
160
161
  needs_conversion()
162
  arg_length		Length of string to copy.
163
  from_cs		Character set to copy from
164
  to_cs			Character set to copy to
205 by Brian Aker
uint32 -> uin32_t
165
  uint32_t *offset	Returns number of unaligned characters.
1 by brian
clean slate
166
167
  RETURN
168
   0  No conversion needed
169
   1  Either character set conversion or adding leading  zeros
170
      (e.g. for UCS-2) must be done
171
172
  NOTE
173
  to_cs may be NULL for "no conversion" if the system variable
174
  character_set_results is NULL.
175
*/
176
205 by Brian Aker
uint32 -> uin32_t
177
bool String::needs_conversion(uint32_t arg_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
178
			      const CHARSET_INFO * const from_cs,
179
			      const CHARSET_INFO * const to_cs,
205 by Brian Aker
uint32 -> uin32_t
180
			      uint32_t *offset)
1 by brian
clean slate
181
{
182
  *offset= 0;
183
  if (!to_cs ||
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
184
      (to_cs == &my_charset_bin) ||
1 by brian
clean slate
185
      (to_cs == from_cs) ||
186
      my_charset_same(from_cs, to_cs) ||
187
      ((from_cs == &my_charset_bin) &&
188
       (!(*offset=(arg_length % to_cs->mbminlen)))))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
189
    return false;
190
  return true;
1 by brian
clean slate
191
}
192
193
194
195
205 by Brian Aker
uint32 -> uin32_t
196
bool String::set_or_copy_aligned(const char *str,uint32_t arg_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
197
                                 const CHARSET_INFO * const cs)
1 by brian
clean slate
198
{
199
  /* How many bytes are in incomplete character */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
200
  uint32_t offset= (arg_length % cs->mbminlen);
201
910.1.5 by Brian Aker
Remove some dead bits of string (and fix the semi_join test).
202
  assert(!offset); /* All characters are complete, just copy */
203
204
  set(str, arg_length, cs);
205
  return false;
1 by brian
clean slate
206
}
207
208
	/* Copy with charset conversion */
209
205 by Brian Aker
uint32 -> uin32_t
210
bool String::copy(const char *str, uint32_t arg_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
211
		          const CHARSET_INFO * const from_cs,
482 by Brian Aker
Remove uint.
212
				  const CHARSET_INFO * const to_cs, uint32_t *errors)
1 by brian
clean slate
213
{
205 by Brian Aker
uint32 -> uin32_t
214
  uint32_t offset;
1 by brian
clean slate
215
  if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
216
  {
217
    *errors= 0;
218
    return copy(str, arg_length, to_cs);
219
  }
220
  if ((from_cs == &my_charset_bin) && offset)
221
  {
222
    *errors= 0;
910.1.5 by Brian Aker
Remove some dead bits of string (and fix the semi_join test).
223
    assert((from_cs == &my_charset_bin) && offset);
224
    return false; //copy_aligned(str, arg_length, offset, to_cs);
1 by brian
clean slate
225
  }
205 by Brian Aker
uint32 -> uin32_t
226
  uint32_t new_length= to_cs->mbmaxlen*arg_length;
1 by brian
clean slate
227
  if (alloc(new_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
228
    return true;
1 by brian
clean slate
229
  str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
230
                              str, arg_length, from_cs, errors);
231
  str_charset=to_cs;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
232
  return false;
1 by brian
clean slate
233
}
234
235
236
/*
237
  Set a string to the value of a latin1-string, keeping the original charset
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
238
1 by brian
clean slate
239
  SYNOPSIS
240
    copy_or_set()
241
    str			String of a simple charset (latin1)
242
    arg_length		Length of string
243
244
  IMPLEMENTATION
245
    If string object is of a simple character set, set it to point to the
246
    given string.
247
    If not, make a copy and convert it to the new character set.
248
249
  RETURN
250
    0	ok
251
    1	Could not allocate result buffer
252
253
*/
254
205 by Brian Aker
uint32 -> uin32_t
255
bool String::set_ascii(const char *str, uint32_t arg_length)
1 by brian
clean slate
256
{
257
  if (str_charset->mbminlen == 1)
258
  {
259
    set(str, arg_length, str_charset);
260
    return 0;
261
  }
482 by Brian Aker
Remove uint.
262
  uint32_t dummy_errors;
383.1.12 by Brian Aker
Much closer toward UTF8 being around all the time...
263
  return copy(str, arg_length, &my_charset_utf8_general_ci, str_charset, &dummy_errors);
1 by brian
clean slate
264
}
265
266
bool String::append(const String &s)
267
{
268
  if (s.length())
269
  {
270
    if (realloc(str_length+s.length()))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
271
      return true;
1 by brian
clean slate
272
    memcpy(Ptr+str_length,s.ptr(),s.length());
273
    str_length+=s.length();
274
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
275
  return false;
1 by brian
clean slate
276
}
277
278
279
/*
280
  Append an ASCII string to the a string of the current character set
281
*/
282
205 by Brian Aker
uint32 -> uin32_t
283
bool String::append(const char *s,uint32_t arg_length)
1 by brian
clean slate
284
{
285
  if (!arg_length)
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
286
    return false;
1 by brian
clean slate
287
288
  /*
289
    For an ASCII incompatible string, e.g. UCS-2, we need to convert
290
  */
291
  if (str_charset->mbminlen > 1)
292
  {
205 by Brian Aker
uint32 -> uin32_t
293
    uint32_t add_length=arg_length * str_charset->mbmaxlen;
482 by Brian Aker
Remove uint.
294
    uint32_t dummy_errors;
1 by brian
clean slate
295
    if (realloc(str_length+ add_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
296
      return true;
1 by brian
clean slate
297
    str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
383.1.12 by Brian Aker
Much closer toward UTF8 being around all the time...
298
				  s, arg_length, &my_charset_utf8_general_ci,
1 by brian
clean slate
299
                                  &dummy_errors);
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
300
    return false;
1 by brian
clean slate
301
  }
302
303
  /*
304
    For an ASCII compatinble string we can just append.
305
  */
306
  if (realloc(str_length+arg_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
307
    return true;
1 by brian
clean slate
308
  memcpy(Ptr+str_length,s,arg_length);
309
  str_length+=arg_length;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
310
  return false;
1 by brian
clean slate
311
}
312
313
314
/*
315
  Append a 0-terminated ASCII string
316
*/
317
318
bool String::append(const char *s)
319
{
320
  return append(s, strlen(s));
321
}
322
323
324
/*
325
  Append a string in the given charset to the string
326
  with character set recoding
327
*/
328
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
329
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const cs)
1 by brian
clean slate
330
{
205 by Brian Aker
uint32 -> uin32_t
331
  uint32_t dummy_offset;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
332
1 by brian
clean slate
333
  if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
334
  {
205 by Brian Aker
uint32 -> uin32_t
335
    uint32_t add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
482 by Brian Aker
Remove uint.
336
    uint32_t dummy_errors;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
337
    if (realloc(str_length + add_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
338
      return true;
1 by brian
clean slate
339
    str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
340
				  s, arg_length, cs, &dummy_errors);
341
  }
342
  else
343
  {
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
344
    if (realloc(str_length + arg_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
345
      return true;
1 by brian
clean slate
346
    memcpy(Ptr + str_length, s, arg_length);
347
    str_length+= arg_length;
348
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
349
  return false;
1 by brian
clean slate
350
}
351
352
205 by Brian Aker
uint32 -> uin32_t
353
bool String::append_with_prefill(const char *s,uint32_t arg_length,
354
		 uint32_t full_length, char fill_char)
1 by brian
clean slate
355
{
356
  int t_length= arg_length > full_length ? arg_length : full_length;
357
358
  if (realloc(str_length + t_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
359
    return true;
1 by brian
clean slate
360
  t_length= full_length - arg_length;
361
  if (t_length > 0)
362
  {
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
363
    memset(Ptr+str_length, fill_char, t_length);
1 by brian
clean slate
364
    str_length=str_length + t_length;
365
  }
366
  append(s, arg_length);
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
367
  return false;
1 by brian
clean slate
368
}
369
205 by Brian Aker
uint32 -> uin32_t
370
uint32_t String::numchars()
1 by brian
clean slate
371
{
372
  return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
373
}
374
205 by Brian Aker
uint32 -> uin32_t
375
int String::charpos(int i,uint32_t offset)
1 by brian
clean slate
376
{
377
  if (i <= 0)
378
    return i;
379
  return str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,i);
380
}
381
205 by Brian Aker
uint32 -> uin32_t
382
int String::strstr(const String &s,uint32_t offset)
1 by brian
clean slate
383
{
384
  if (s.length()+offset <= str_length)
385
  {
386
    if (!s.length())
387
      return ((int) offset);	// Empty string is always found
388
389
    register const char *str = Ptr+offset;
390
    register const char *search=s.ptr();
391
    const char *end=Ptr+str_length-s.length()+1;
392
    const char *search_end=s.ptr()+s.length();
393
skip:
394
    while (str != end)
395
    {
396
      if (*str++ == *search)
397
      {
398
	register char *i,*j;
399
	i=(char*) str; j=(char*) search+1;
400
	while (j != search_end)
401
	  if (*i++ != *j++) goto skip;
402
	return (int) (str-Ptr) -1;
403
      }
404
    }
405
  }
406
  return -1;
407
}
408
409
/*
410
** Search string from end. Offset is offset to the end of string
411
*/
412
205 by Brian Aker
uint32 -> uin32_t
413
int String::strrstr(const String &s,uint32_t offset)
1 by brian
clean slate
414
{
415
  if (s.length() <= offset && offset <= str_length)
416
  {
417
    if (!s.length())
418
      return offset;				// Empty string is always found
419
    register const char *str = Ptr+offset-1;
420
    register const char *search=s.ptr()+s.length()-1;
421
422
    const char *end=Ptr+s.length()-2;
423
    const char *search_end=s.ptr()-1;
424
skip:
425
    while (str != end)
426
    {
427
      if (*str-- == *search)
428
      {
429
	register char *i,*j;
430
	i=(char*) str; j=(char*) search-1;
431
	while (j != search_end)
432
	  if (*i-- != *j--) goto skip;
433
	return (int) (i-Ptr) +1;
434
      }
435
    }
436
  }
437
  return -1;
438
}
439
440
/*
441
  Replace substring with string
442
  If wrong parameter or not enough memory, do nothing
443
*/
444
205 by Brian Aker
uint32 -> uin32_t
445
bool String::replace(uint32_t offset,uint32_t arg_length,const String &to)
1 by brian
clean slate
446
{
447
  return replace(offset,arg_length,to.ptr(),to.length());
448
}
449
205 by Brian Aker
uint32 -> uin32_t
450
bool String::replace(uint32_t offset,uint32_t arg_length,
451
                     const char *to, uint32_t to_length)
1 by brian
clean slate
452
{
453
  long diff = (long) to_length-(long) arg_length;
454
  if (offset+arg_length <= str_length)
455
  {
456
    if (diff < 0)
457
    {
458
      if (to_length)
459
	memcpy(Ptr+offset,to,to_length);
629.3.4 by Kristian Nielsen
Take Mats'es changes from bmove()->memcpy(), and fix all of them to be
460
      memmove(Ptr+offset+to_length, Ptr+offset+arg_length,
461
              str_length-offset-arg_length);
1 by brian
clean slate
462
    }
463
    else
464
    {
465
      if (diff)
466
      {
205 by Brian Aker
uint32 -> uin32_t
467
	if (realloc(str_length+(uint32_t) diff))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
468
	  return true;
481 by Brian Aker
Remove all of uchar.
469
	bmove_upp((unsigned char*) Ptr+str_length+diff, (unsigned char*) Ptr+str_length,
1 by brian
clean slate
470
		  str_length-offset-arg_length);
471
      }
472
      if (to_length)
473
	memcpy(Ptr+offset,to,to_length);
474
    }
205 by Brian Aker
uint32 -> uin32_t
475
    str_length+=(uint32_t) diff;
1 by brian
clean slate
476
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
477
  return false;
1 by brian
clean slate
478
}
479
480
481
482
/*
483
  Compare strings according to collation, without end space.
484
485
  SYNOPSIS
486
    sortcmp()
487
    s		First string
488
    t		Second string
489
    cs		Collation
490
491
  NOTE:
492
    Normally this is case sensitive comparison
493
494
  RETURN
495
  < 0	s < t
496
  0	s == t
497
  > 0	s > t
498
*/
499
500
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
501
int sortcmp(const String *s,const String *t, const CHARSET_INFO * const cs)
1 by brian
clean slate
502
{
503
 return cs->coll->strnncollsp(cs,
481 by Brian Aker
Remove all of uchar.
504
                              (unsigned char *) s->ptr(),s->length(),
505
                              (unsigned char *) t->ptr(),t->length(), 0);
1 by brian
clean slate
506
}
507
508
509
/*
510
  Compare strings byte by byte. End spaces are also compared.
511
512
  SYNOPSIS
513
    stringcmp()
514
    s		First string
515
    t		Second string
516
517
  NOTE:
481 by Brian Aker
Remove all of uchar.
518
    Strings are compared as a stream of unsigned chars
1 by brian
clean slate
519
520
  RETURN
521
  < 0	s < t
522
  0	s == t
523
  > 0	s > t
524
*/
525
526
527
int stringcmp(const String *s,const String *t)
528
{
398.1.4 by Monty Taylor
Renamed max/min.
529
  uint32_t s_len=s->length(),t_len=t->length(),len=cmin(s_len,t_len);
1 by brian
clean slate
530
  int cmp= memcmp(s->ptr(), t->ptr(), len);
531
  return (cmp) ? cmp : (int) (s_len - t_len);
532
}
533
534
205 by Brian Aker
uint32 -> uin32_t
535
String *copy_if_not_alloced(String *to,String *from,uint32_t from_length)
1 by brian
clean slate
536
{
537
  if (from->Alloced_length >= from_length)
538
    return from;
539
  if (from->alloced || !to || from == to)
540
  {
541
    (void) from->realloc(from_length);
542
    return from;
543
  }
544
  if (to->realloc(from_length))
545
    return from;				// Actually an error
398.1.4 by Monty Taylor
Renamed max/min.
546
  if ((to->str_length=cmin(from->str_length,from_length)))
1 by brian
clean slate
547
    memcpy(to->Ptr,from->Ptr,to->str_length);
548
  to->str_charset=from->str_charset;
549
  return to;
550
}
551
552
553
/****************************************************************************
554
  Help functions
555
****************************************************************************/
556
557
/*
558
  copy a string from one character set to another
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
559
1 by brian
clean slate
560
  SYNOPSIS
561
    copy_and_convert()
562
    to			Store result here
563
    to_cs		Character set of result string
564
    from		Copy from here
565
    from_length		Length of from string
566
    from_cs		From character set
567
568
  NOTES
569
    'to' must be big enough as form_length * to_cs->mbmaxlen
570
571
  RETURN
572
    length of bytes copied to 'to'
573
*/
574
575
205 by Brian Aker
uint32 -> uin32_t
576
static uint32_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
577
copy_and_convert_extended(char *to, uint32_t to_length,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
578
                          const CHARSET_INFO * const to_cs,
205 by Brian Aker
uint32 -> uin32_t
579
                          const char *from, uint32_t from_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
580
                          const CHARSET_INFO * const from_cs,
482 by Brian Aker
Remove uint.
581
                          uint32_t *errors)
1 by brian
clean slate
582
{
583
  int         cnvres;
584
  my_wc_t     wc;
481 by Brian Aker
Remove all of uchar.
585
  const unsigned char *from_end= (const unsigned char*) from+from_length;
1 by brian
clean slate
586
  char *to_start= to;
481 by Brian Aker
Remove all of uchar.
587
  unsigned char *to_end= (unsigned char*) to+to_length;
1 by brian
clean slate
588
  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
589
  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
482 by Brian Aker
Remove uint.
590
  uint32_t error_count= 0;
1 by brian
clean slate
591
592
  while (1)
593
  {
481 by Brian Aker
Remove all of uchar.
594
    if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from,
1 by brian
clean slate
595
				      from_end)) > 0)
596
      from+= cnvres;
597
    else if (cnvres == MY_CS_ILSEQ)
598
    {
599
      error_count++;
600
      from++;
601
      wc= '?';
602
    }
603
    else if (cnvres > MY_CS_TOOSMALL)
604
    {
605
      /*
606
        A correct multibyte sequence detected
607
        But it doesn't have Unicode mapping.
608
      */
609
      error_count++;
610
      from+= (-cnvres);
611
      wc= '?';
612
    }
613
    else
614
      break;  // Not enough characters
615
616
outp:
481 by Brian Aker
Remove all of uchar.
617
    if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
1 by brian
clean slate
618
      to+= cnvres;
619
    else if (cnvres == MY_CS_ILUNI && wc != '?')
620
    {
621
      error_count++;
622
      wc= '?';
623
      goto outp;
624
    }
625
    else
626
      break;
627
  }
628
  *errors= error_count;
205 by Brian Aker
uint32 -> uin32_t
629
  return (uint32_t) (to - to_start);
1 by brian
clean slate
630
}
631
632
633
/*
634
  Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
635
*/
205 by Brian Aker
uint32 -> uin32_t
636
uint32_t
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
637
copy_and_convert(char *to, uint32_t to_length, const CHARSET_INFO * const to_cs,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
638
                 const char *from, uint32_t from_length,
482 by Brian Aker
Remove uint.
639
				 const CHARSET_INFO * const from_cs, uint32_t *errors)
1 by brian
clean slate
640
{
641
  /*
642
    If any of the character sets is not ASCII compatible,
643
    immediately switch to slow mb_wc->wc_mb method.
644
  */
645
  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
646
    return copy_and_convert_extended(to, to_length, to_cs,
647
                                     from, from_length, from_cs, errors);
648
398.1.4 by Monty Taylor
Renamed max/min.
649
  uint32_t length= cmin(to_length, from_length), length2= length;
1 by brian
clean slate
650
651
#if defined(__i386__)
652
  /*
653
    Special loop for i386, it allows to refer to a
654
    non-aligned memory block as UINT32, which makes
655
    it possible to copy four bytes at once. This
656
    gives about 10% performance improvement comparing
657
    to byte-by-byte loop.
658
  */
659
  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
660
  {
205 by Brian Aker
uint32 -> uin32_t
661
    if ((*(uint32_t*)from) & 0x80808080)
1 by brian
clean slate
662
      break;
205 by Brian Aker
uint32 -> uin32_t
663
    *((uint32_t*) to)= *((const uint32_t*) from);
1 by brian
clean slate
664
  }
665
#endif
666
667
  for (; ; *to++= *from++, length--)
668
  {
669
    if (!length)
670
    {
671
      *errors= 0;
672
      return length2;
673
    }
674
    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
675
    {
205 by Brian Aker
uint32 -> uin32_t
676
      uint32_t copied_length= length2 - length;
1 by brian
clean slate
677
      to_length-= copied_length;
678
      from_length-= copied_length;
679
      return copied_length + copy_and_convert_extended(to, to_length,
680
                                                       to_cs,
681
                                                       from, from_length,
682
                                                       from_cs,
683
                                                       errors);
684
    }
685
  }
686
892.2.4 by Monty Taylor
Fixed more warnings.
687
#ifndef __sun
1 by brian
clean slate
688
  return 0;           // Make compiler happy
892.2.4 by Monty Taylor
Fixed more warnings.
689
#endif
1 by brian
clean slate
690
}
691
692
693
/**
694
  Copy string with HEX-encoding of "bad" characters.
695
696
  @details This functions copies the string pointed by "src"
697
  to the string pointed by "dst". Not more than "srclen" bytes
698
  are read from "src". Any sequences of bytes representing
699
  a not-well-formed substring (according to cs) are hex-encoded,
700
  and all well-formed substrings (according to cs) are copied as is.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
701
  Not more than "dstlen" bytes are written to "dst". The number
1 by brian
clean slate
702
  of bytes written to "dst" is returned.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
703
1 by brian
clean slate
704
   @param      cs       character set pointer of the destination string
705
   @param[out] dst      destination string
706
   @param      dstlen   size of dst
707
   @param      src      source string
708
   @param      srclen   length of src
709
710
   @retval     result length
711
*/
712
713
size_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
714
my_copy_with_hex_escaping(const CHARSET_INFO * const cs,
1 by brian
clean slate
715
                          char *dst, size_t dstlen,
716
                          const char *src, size_t srclen)
717
{
718
  const char *srcend= src + srclen;
719
  char *dst0= dst;
720
721
  for ( ; src < srcend ; )
722
  {
723
    size_t chlen;
724
    if ((chlen= my_ismbchar(cs, src, srcend)))
725
    {
726
      if (dstlen < chlen)
727
        break; /* purecov: inspected */
728
      memcpy(dst, src, chlen);
729
      src+= chlen;
730
      dst+= chlen;
731
      dstlen-= chlen;
732
    }
733
    else if (*src & 0x80)
734
    {
735
      if (dstlen < 4)
736
        break; /* purecov: inspected */
737
      *dst++= '\\';
738
      *dst++= 'x';
739
      *dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
740
      *dst++= _dig_vec_upper[((unsigned char) *src) & 15];
741
      src++;
742
      dstlen-= 4;
743
    }
744
    else
745
    {
746
      if (dstlen < 1)
747
        break; /* purecov: inspected */
748
      *dst++= *src++;
749
      dstlen--;
750
    }
751
  }
752
  return dst - dst0;
753
}
754
755
/*
756
  copy a string,
757
  with optional character set conversion,
758
  with optional left padding (for binary -> UCS2 conversion)
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
759
1 by brian
clean slate
760
  SYNOPSIS
761
    well_formed_copy_nchars()
762
    to			     Store result here
763
    to_length                Maxinum length of "to" string
764
    to_cs		     Character set of "to" string
765
    from		     Copy from here
766
    from_length		     Length of from string
767
    from_cs		     From character set
768
    nchars                   Copy not more that nchars characters
769
    well_formed_error_pos    Return position when "from" is not well formed
770
                             or NULL otherwise.
771
    cannot_convert_error_pos Return position where a not convertable
772
                             character met, or NULL otherwise.
773
    from_end_pos             Return position where scanning of "from"
774
                             string stopped.
775
  NOTES
776
777
  RETURN
778
    length of bytes copied to 'to'
779
*/
780
781
205 by Brian Aker
uint32 -> uin32_t
782
uint32_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
783
well_formed_copy_nchars(const CHARSET_INFO * const to_cs,
482 by Brian Aker
Remove uint.
784
                        char *to, uint32_t to_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
785
                        const CHARSET_INFO * const from_cs,
482 by Brian Aker
Remove uint.
786
                        const char *from, uint32_t from_length,
787
                        uint32_t nchars,
1 by brian
clean slate
788
                        const char **well_formed_error_pos,
789
                        const char **cannot_convert_error_pos,
790
                        const char **from_end_pos)
791
{
482 by Brian Aker
Remove uint.
792
  uint32_t res;
1 by brian
clean slate
793
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
794
  if ((to_cs == &my_charset_bin) ||
1 by brian
clean slate
795
      (from_cs == &my_charset_bin) ||
796
      (to_cs == from_cs) ||
797
      my_charset_same(from_cs, to_cs))
798
  {
799
    if (to_length < to_cs->mbminlen || !nchars)
800
    {
801
      *from_end_pos= from;
802
      *cannot_convert_error_pos= NULL;
803
      *well_formed_error_pos= NULL;
804
      return 0;
805
    }
806
807
    if (to_cs == &my_charset_bin)
808
    {
398.1.4 by Monty Taylor
Renamed max/min.
809
      res= cmin(cmin(nchars, to_length), from_length);
1 by brian
clean slate
810
      memmove(to, from, res);
811
      *from_end_pos= from + res;
812
      *well_formed_error_pos= NULL;
813
      *cannot_convert_error_pos= NULL;
814
    }
815
    else
816
    {
817
      int well_formed_error;
482 by Brian Aker
Remove uint.
818
      uint32_t from_offset;
1 by brian
clean slate
819
820
      if ((from_offset= (from_length % to_cs->mbminlen)) &&
821
          (from_cs == &my_charset_bin))
822
      {
823
        /*
824
          Copying from BINARY to UCS2 needs to prepend zeros sometimes:
825
          INSERT INTO t1 (ucs2_column) VALUES (0x01);
826
          0x01 -> 0x0001
827
        */
482 by Brian Aker
Remove uint.
828
        uint32_t pad_length= to_cs->mbminlen - from_offset;
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
829
        memset(to, 0, pad_length);
1 by brian
clean slate
830
        memmove(to + pad_length, from, from_offset);
831
        nchars--;
832
        from+= from_offset;
833
        from_length-= from_offset;
834
        to+= to_cs->mbminlen;
835
        to_length-= to_cs->mbminlen;
836
      }
837
838
      set_if_smaller(from_length, to_length);
839
      res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
840
                                        nchars, &well_formed_error);
841
      memmove(to, from, res);
842
      *from_end_pos= from + res;
843
      *well_formed_error_pos= well_formed_error ? from + res : NULL;
844
      *cannot_convert_error_pos= NULL;
845
      if (from_offset)
846
        res+= to_cs->mbminlen;
847
    }
848
  }
849
  else
850
  {
851
    int cnvres;
852
    my_wc_t wc;
853
    my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
854
    my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
481 by Brian Aker
Remove all of uchar.
855
    const unsigned char *from_end= (const unsigned char*) from + from_length;
856
    unsigned char *to_end= (unsigned char*) to + to_length;
1 by brian
clean slate
857
    char *to_start= to;
858
    *well_formed_error_pos= NULL;
859
    *cannot_convert_error_pos= NULL;
860
861
    for ( ; nchars; nchars--)
862
    {
863
      const char *from_prev= from;
481 by Brian Aker
Remove all of uchar.
864
      if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from, from_end)) > 0)
1 by brian
clean slate
865
        from+= cnvres;
866
      else if (cnvres == MY_CS_ILSEQ)
867
      {
868
        if (!*well_formed_error_pos)
869
          *well_formed_error_pos= from;
870
        from++;
871
        wc= '?';
872
      }
873
      else if (cnvres > MY_CS_TOOSMALL)
874
      {
875
        /*
876
          A correct multibyte sequence detected
877
          But it doesn't have Unicode mapping.
878
        */
879
        if (!*cannot_convert_error_pos)
880
          *cannot_convert_error_pos= from;
881
        from+= (-cnvres);
882
        wc= '?';
883
      }
884
      else
885
        break;  // Not enough characters
886
887
outp:
481 by Brian Aker
Remove all of uchar.
888
      if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
1 by brian
clean slate
889
        to+= cnvres;
890
      else if (cnvres == MY_CS_ILUNI && wc != '?')
891
      {
892
        if (!*cannot_convert_error_pos)
893
          *cannot_convert_error_pos= from_prev;
894
        wc= '?';
895
        goto outp;
896
      }
897
      else
898
      {
899
        from= from_prev;
900
        break;
901
      }
902
    }
903
    *from_end_pos= from;
904
    res= to - to_start;
905
  }
205 by Brian Aker
uint32 -> uin32_t
906
  return (uint32_t) res;
1 by brian
clean slate
907
}
908
909
910
911
912
void String::print(String *str)
913
{
914
  char *st= (char*)Ptr, *end= st+str_length;
915
  for (; st < end; st++)
916
  {
481 by Brian Aker
Remove all of uchar.
917
    unsigned char c= *st;
1 by brian
clean slate
918
    switch (c)
919
    {
920
    case '\\':
520.4.32 by Monty Taylor
Fixed oops.
921
      str->append("\\\\", sizeof("\\\\")-1);
1 by brian
clean slate
922
      break;
923
    case '\0':
520.4.32 by Monty Taylor
Fixed oops.
924
      str->append("\\0", sizeof("\\0")-1);
1 by brian
clean slate
925
      break;
926
    case '\'':
520.4.32 by Monty Taylor
Fixed oops.
927
      str->append("\\'", sizeof("\\'")-1);
1 by brian
clean slate
928
      break;
929
    case '\n':
520.4.32 by Monty Taylor
Fixed oops.
930
      str->append("\\n", sizeof("\\n")-1);
1 by brian
clean slate
931
      break;
932
    case '\r':
520.4.32 by Monty Taylor
Fixed oops.
933
      str->append("\\r", sizeof("\\r")-1);
1 by brian
clean slate
934
      break;
935
    case '\032': // Ctrl-Z
520.4.32 by Monty Taylor
Fixed oops.
936
      str->append("\\Z", sizeof("\\Z")-1);
1 by brian
clean slate
937
      break;
938
    default:
939
      str->append(c);
940
    }
941
  }
942
}
943
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
944
/*
945
  Quote the given identifier.
946
  If the given identifier is empty, it will be quoted.
947
948
  SYNOPSIS
949
  append_identifier()
950
  name                  the identifier to be appended
951
  name_length           length of the appending identifier
952
*/
953
954
/* Factor the extern out */
955
extern const CHARSET_INFO *system_charset_info, *files_charset_info;
956
779.3.10 by Monty Taylor
Turned on -Wshadow.
957
void String::append_identifier(const char *name, uint32_t in_length)
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
958
{
959
  const char *name_end;
960
  char quote_char;
961
  int q= '`';
962
963
  /*
964
    The identifier must be quoted as it includes a quote character or
965
   it's a keyword
966
  */
967
779.3.10 by Monty Taylor
Turned on -Wshadow.
968
  reserve(in_length*2 + 2);
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
969
  quote_char= (char) q;
970
  append(&quote_char, 1, system_charset_info);
971
779.3.10 by Monty Taylor
Turned on -Wshadow.
972
  for (name_end= name+in_length ; name < name_end ; name+= in_length)
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
973
  {
974
    unsigned char chr= (unsigned char) *name;
779.3.10 by Monty Taylor
Turned on -Wshadow.
975
    in_length= my_mbcharlen(system_charset_info, chr);
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
976
    /*
977
      my_mbcharlen can return 0 on a wrong multibyte
978
      sequence. It is possible when upgrading from 4.0,
979
      and identifier contains some accented characters.
980
      The manual says it does not work. So we'll just
981
      change length to 1 not to hang in the endless loop.
982
    */
779.3.10 by Monty Taylor
Turned on -Wshadow.
983
    if (!in_length)
984
      in_length= 1;
985
    if (in_length == 1 && chr == (unsigned char) quote_char)
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
986
      append(&quote_char, 1, system_charset_info);
779.3.10 by Monty Taylor
Turned on -Wshadow.
987
    append(name, in_length, system_charset_info);
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
988
  }
989
  append(&quote_char, 1, system_charset_info);
990
}
991
1 by brian
clean slate
992
993
/*
994
  Exchange state of this object and argument.
995
996
  SYNOPSIS
997
    String::swap()
998
999
  RETURN
1000
    Target string will contain state of this object and vice versa.
1001
*/
1002
1003
void String::swap(String &s)
1004
{
322.2.2 by Mats Kindahl
Hiding THD::proc_info field and providing a setter and getter.
1005
  std::swap(Ptr, s.Ptr);
1006
  std::swap(str_length, s.str_length);
1007
  std::swap(Alloced_length, s.Alloced_length);
1008
  std::swap(alloced, s.alloced);
1009
  std::swap(str_charset, s.str_charset);
1 by brian
clean slate
1010
}
598.1.1 by Super-User
Fixed solaris build crap.
1011
1012
1013
bool operator==(const String &s1, const String &s2)
1014
{
1015
  return stringcmp(&s1,&s2) == 0;
1016
}
1017
1018
bool operator!=(const String &s1, const String &s2)
1019
{
1020
  return !(s1 == s2);
1021
}
1022