~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/* This file is originally from the mysql distribution. Coded by monty */
17
212.5.39 by Monty Taylor
Phew. Moved my_base and my_global.
18
#include "global.h"
212.5.13 by Monty Taylor
Moved my_sys/my_pthread/my_nosys and mysys_err to mysys.
19
#include <mysys/my_sys.h>
212.5.45 by Monty Taylor
Removed excess AM_CPPFLAGS from the tree. Now the only thing that should be in the include path should be -I${top_srcdir} and -I${top_builddir}w
20
#include <mystrings/m_string.h>
1 by brian
clean slate
21
398.1.5 by Monty Taylor
Removed C++ includes and std namespace from global.h.
22
#include <algorithm>
23
1 by brian
clean slate
24
/*
25
  The following extern declarations are ok as these are interface functions
26
  required by the string function
27
*/
28
481 by Brian Aker
Remove all of uchar.
29
extern unsigned char* sql_alloc(unsigned size);
1 by brian
clean slate
30
extern void sql_element_free(void *ptr);
31
32
#include "sql_string.h"
33
34
/*****************************************************************************
35
** String functions
36
*****************************************************************************/
37
205 by Brian Aker
uint32 -> uin32_t
38
bool String::real_alloc(uint32_t arg_length)
1 by brian
clean slate
39
{
40
  arg_length=ALIGN_SIZE(arg_length+1);
41
  str_length=0;
42
  if (Alloced_length < arg_length)
43
  {
44
    free();
641.3.6 by Monty Taylor
Removed some my_malloc calls.
45
    if (!(Ptr=(char*) malloc(arg_length)))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
46
      return true;
1 by brian
clean slate
47
    Alloced_length=arg_length;
48
    alloced=1;
49
  }
50
  Ptr[0]=0;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
51
  return false;
1 by brian
clean slate
52
}
53
54
55
/*
56
** Check that string is big enough. Set string[alloc_length] to 0
57
** (for C functions)
58
*/
59
205 by Brian Aker
uint32 -> uin32_t
60
bool String::realloc(uint32_t alloc_length)
1 by brian
clean slate
61
{
205 by Brian Aker
uint32 -> uin32_t
62
  uint32_t len=ALIGN_SIZE(alloc_length+1);
1 by brian
clean slate
63
  if (Alloced_length < len)
64
  {
65
    char *new_ptr;
66
    if (alloced)
67
    {
656.1.26 by Monty Taylor
Finally removed all of the my_malloc stuff.
68
      if ((new_ptr= (char*) ::realloc(Ptr,len)))
1 by brian
clean slate
69
      {
70
	Ptr=new_ptr;
71
	Alloced_length=len;
72
      }
73
      else
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
74
	return true;				// Signal error
1 by brian
clean slate
75
    }
641.3.6 by Monty Taylor
Removed some my_malloc calls.
76
    else if ((new_ptr= (char*) malloc(len)))
1 by brian
clean slate
77
    {
78
      if (str_length)				// Avoid bugs in memcpy on AIX
79
	memcpy(new_ptr,Ptr,str_length);
80
      new_ptr[str_length]=0;
81
      Ptr=new_ptr;
82
      Alloced_length=len;
83
      alloced=1;
84
    }
85
    else
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
86
      return true;			// Signal error
1 by brian
clean slate
87
  }
88
  Ptr[alloc_length]=0;			// This make other funcs shorter
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
89
  return false;
1 by brian
clean slate
90
}
91
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
92
bool String::set_int(int64_t num, bool unsigned_flag, const CHARSET_INFO * const cs)
1 by brian
clean slate
93
{
482 by Brian Aker
Remove uint.
94
  uint32_t l=20*cs->mbmaxlen+1;
1 by brian
clean slate
95
  int base= unsigned_flag ? 10 : -10;
96
97
  if (alloc(l))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
98
    return true;
205 by Brian Aker
uint32 -> uin32_t
99
  str_length=(uint32_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
1 by brian
clean slate
100
  str_charset=cs;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
101
  return false;
1 by brian
clean slate
102
}
103
482 by Brian Aker
Remove uint.
104
bool String::set_real(double num,uint32_t decimals, const CHARSET_INFO * const cs)
1 by brian
clean slate
105
{
106
  char buff[FLOATING_POINT_BUFFER];
482 by Brian Aker
Remove uint.
107
  uint32_t dummy_errors;
1 by brian
clean slate
108
  size_t len;
109
110
  str_charset=cs;
111
  if (decimals >= NOT_FIXED_DEC)
112
  {
113
    len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
383.1.12 by Brian Aker
Much closer toward UTF8 being around all the time...
114
    return copy(buff, len, &my_charset_utf8_general_ci, cs, &dummy_errors);
1 by brian
clean slate
115
  }
116
  len= my_fcvt(num, decimals, buff, NULL);
383.1.12 by Brian Aker
Much closer toward UTF8 being around all the time...
117
  return copy(buff, (uint32_t) len, &my_charset_utf8_general_ci, cs,
1 by brian
clean slate
118
              &dummy_errors);
119
}
120
121
122
bool String::copy()
123
{
124
  if (!alloced)
125
  {
126
    Alloced_length=0;				// Force realloc
127
    return realloc(str_length);
128
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
129
  return false;
1 by brian
clean slate
130
}
131
132
bool String::copy(const String &str)
133
{
134
  if (alloc(str.str_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
135
    return true;
1 by brian
clean slate
136
  str_length=str.str_length;
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
137
  memmove(Ptr, str.Ptr, str_length);		// May be overlapping
1 by brian
clean slate
138
  Ptr[str_length]=0;
139
  str_charset=str.str_charset;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
140
  return false;
1 by brian
clean slate
141
}
142
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
143
bool String::copy(const char *str,uint32_t arg_length, const CHARSET_INFO * const cs)
1 by brian
clean slate
144
{
145
  if (alloc(arg_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
146
    return true;
1 by brian
clean slate
147
  if ((str_length=arg_length))
148
    memcpy(Ptr,str,arg_length);
149
  Ptr[arg_length]=0;
150
  str_charset=cs;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
151
  return false;
1 by brian
clean slate
152
}
153
154
155
/*
156
  Checks that the source string can be just copied to the destination string
157
  without conversion.
158
159
  SYNPOSIS
160
161
  needs_conversion()
162
  arg_length		Length of string to copy.
163
  from_cs		Character set to copy from
164
  to_cs			Character set to copy to
205 by Brian Aker
uint32 -> uin32_t
165
  uint32_t *offset	Returns number of unaligned characters.
1 by brian
clean slate
166
167
  RETURN
168
   0  No conversion needed
169
   1  Either character set conversion or adding leading  zeros
170
      (e.g. for UCS-2) must be done
171
172
  NOTE
173
  to_cs may be NULL for "no conversion" if the system variable
174
  character_set_results is NULL.
175
*/
176
205 by Brian Aker
uint32 -> uin32_t
177
bool String::needs_conversion(uint32_t arg_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
178
			      const CHARSET_INFO * const from_cs,
179
			      const CHARSET_INFO * const to_cs,
205 by Brian Aker
uint32 -> uin32_t
180
			      uint32_t *offset)
1 by brian
clean slate
181
{
182
  *offset= 0;
183
  if (!to_cs ||
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
184
      (to_cs == &my_charset_bin) ||
1 by brian
clean slate
185
      (to_cs == from_cs) ||
186
      my_charset_same(from_cs, to_cs) ||
187
      ((from_cs == &my_charset_bin) &&
188
       (!(*offset=(arg_length % to_cs->mbminlen)))))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
189
    return false;
190
  return true;
1 by brian
clean slate
191
}
192
193
194
195
205 by Brian Aker
uint32 -> uin32_t
196
bool String::set_or_copy_aligned(const char *str,uint32_t arg_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
197
                                 const CHARSET_INFO * const cs)
1 by brian
clean slate
198
{
199
  /* How many bytes are in incomplete character */
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
200
  uint32_t offset= (arg_length % cs->mbminlen);
201
910.1.5 by Brian Aker
Remove some dead bits of string (and fix the semi_join test).
202
  assert(!offset); /* All characters are complete, just copy */
203
204
  set(str, arg_length, cs);
205
  return false;
1 by brian
clean slate
206
}
207
208
	/* Copy with charset conversion */
209
205 by Brian Aker
uint32 -> uin32_t
210
bool String::copy(const char *str, uint32_t arg_length,
975.1.2 by Brian Aker
LCOV cleanup (more of...).
211
		          const CHARSET_INFO * const,
482 by Brian Aker
Remove uint.
212
				  const CHARSET_INFO * const to_cs, uint32_t *errors)
1 by brian
clean slate
213
{
975.1.2 by Brian Aker
LCOV cleanup (more of...).
214
  *errors= 0;
215
  return copy(str, arg_length, to_cs);
1 by brian
clean slate
216
}
217
218
219
/*
220
  Set a string to the value of a latin1-string, keeping the original charset
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
221
1 by brian
clean slate
222
  SYNOPSIS
223
    copy_or_set()
224
    str			String of a simple charset (latin1)
225
    arg_length		Length of string
226
227
  IMPLEMENTATION
228
    If string object is of a simple character set, set it to point to the
229
    given string.
230
    If not, make a copy and convert it to the new character set.
231
232
  RETURN
233
    0	ok
234
    1	Could not allocate result buffer
235
236
*/
237
205 by Brian Aker
uint32 -> uin32_t
238
bool String::set_ascii(const char *str, uint32_t arg_length)
1 by brian
clean slate
239
{
240
  if (str_charset->mbminlen == 1)
241
  {
242
    set(str, arg_length, str_charset);
243
    return 0;
244
  }
482 by Brian Aker
Remove uint.
245
  uint32_t dummy_errors;
383.1.12 by Brian Aker
Much closer toward UTF8 being around all the time...
246
  return copy(str, arg_length, &my_charset_utf8_general_ci, str_charset, &dummy_errors);
1 by brian
clean slate
247
}
248
249
bool String::append(const String &s)
250
{
251
  if (s.length())
252
  {
253
    if (realloc(str_length+s.length()))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
254
      return true;
1 by brian
clean slate
255
    memcpy(Ptr+str_length,s.ptr(),s.length());
256
    str_length+=s.length();
257
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
258
  return false;
1 by brian
clean slate
259
}
260
261
262
/*
263
  Append an ASCII string to the a string of the current character set
264
*/
265
205 by Brian Aker
uint32 -> uin32_t
266
bool String::append(const char *s,uint32_t arg_length)
1 by brian
clean slate
267
{
268
  if (!arg_length)
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
269
    return false;
1 by brian
clean slate
270
271
  /*
272
    For an ASCII incompatible string, e.g. UCS-2, we need to convert
273
  */
274
  if (str_charset->mbminlen > 1)
275
  {
205 by Brian Aker
uint32 -> uin32_t
276
    uint32_t add_length=arg_length * str_charset->mbmaxlen;
482 by Brian Aker
Remove uint.
277
    uint32_t dummy_errors;
1 by brian
clean slate
278
    if (realloc(str_length+ add_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
279
      return true;
1 by brian
clean slate
280
    str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
383.1.12 by Brian Aker
Much closer toward UTF8 being around all the time...
281
				  s, arg_length, &my_charset_utf8_general_ci,
1 by brian
clean slate
282
                                  &dummy_errors);
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
283
    return false;
1 by brian
clean slate
284
  }
285
286
  /*
287
    For an ASCII compatinble string we can just append.
288
  */
289
  if (realloc(str_length+arg_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
290
    return true;
1 by brian
clean slate
291
  memcpy(Ptr+str_length,s,arg_length);
292
  str_length+=arg_length;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
293
  return false;
1 by brian
clean slate
294
}
295
296
297
/*
298
  Append a 0-terminated ASCII string
299
*/
300
301
bool String::append(const char *s)
302
{
303
  return append(s, strlen(s));
304
}
305
306
307
/*
308
  Append a string in the given charset to the string
309
  with character set recoding
310
*/
311
975.1.2 by Brian Aker
LCOV cleanup (more of...).
312
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const)
1 by brian
clean slate
313
{
975.1.2 by Brian Aker
LCOV cleanup (more of...).
314
  if (realloc(str_length + arg_length))
315
    return true;
316
  memcpy(Ptr + str_length, s, arg_length);
317
  str_length+= arg_length;
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
318
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
319
  return false;
1 by brian
clean slate
320
}
321
322
205 by Brian Aker
uint32 -> uin32_t
323
bool String::append_with_prefill(const char *s,uint32_t arg_length,
324
		 uint32_t full_length, char fill_char)
1 by brian
clean slate
325
{
326
  int t_length= arg_length > full_length ? arg_length : full_length;
327
328
  if (realloc(str_length + t_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
329
    return true;
1 by brian
clean slate
330
  t_length= full_length - arg_length;
331
  if (t_length > 0)
332
  {
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
333
    memset(Ptr+str_length, fill_char, t_length);
1 by brian
clean slate
334
    str_length=str_length + t_length;
335
  }
336
  append(s, arg_length);
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
337
  return false;
1 by brian
clean slate
338
}
339
205 by Brian Aker
uint32 -> uin32_t
340
uint32_t String::numchars()
1 by brian
clean slate
341
{
342
  return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
343
}
344
205 by Brian Aker
uint32 -> uin32_t
345
int String::charpos(int i,uint32_t offset)
1 by brian
clean slate
346
{
347
  if (i <= 0)
348
    return i;
349
  return str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,i);
350
}
351
205 by Brian Aker
uint32 -> uin32_t
352
int String::strstr(const String &s,uint32_t offset)
1 by brian
clean slate
353
{
354
  if (s.length()+offset <= str_length)
355
  {
356
    if (!s.length())
357
      return ((int) offset);	// Empty string is always found
358
359
    register const char *str = Ptr+offset;
360
    register const char *search=s.ptr();
361
    const char *end=Ptr+str_length-s.length()+1;
362
    const char *search_end=s.ptr()+s.length();
363
skip:
364
    while (str != end)
365
    {
366
      if (*str++ == *search)
367
      {
368
	register char *i,*j;
369
	i=(char*) str; j=(char*) search+1;
370
	while (j != search_end)
371
	  if (*i++ != *j++) goto skip;
372
	return (int) (str-Ptr) -1;
373
      }
374
    }
375
  }
376
  return -1;
377
}
378
379
/*
380
** Search string from end. Offset is offset to the end of string
381
*/
382
205 by Brian Aker
uint32 -> uin32_t
383
int String::strrstr(const String &s,uint32_t offset)
1 by brian
clean slate
384
{
385
  if (s.length() <= offset && offset <= str_length)
386
  {
387
    if (!s.length())
388
      return offset;				// Empty string is always found
389
    register const char *str = Ptr+offset-1;
390
    register const char *search=s.ptr()+s.length()-1;
391
392
    const char *end=Ptr+s.length()-2;
393
    const char *search_end=s.ptr()-1;
394
skip:
395
    while (str != end)
396
    {
397
      if (*str-- == *search)
398
      {
399
	register char *i,*j;
400
	i=(char*) str; j=(char*) search-1;
401
	while (j != search_end)
402
	  if (*i-- != *j--) goto skip;
403
	return (int) (i-Ptr) +1;
404
      }
405
    }
406
  }
407
  return -1;
408
}
409
410
/*
411
  Replace substring with string
412
  If wrong parameter or not enough memory, do nothing
413
*/
414
205 by Brian Aker
uint32 -> uin32_t
415
bool String::replace(uint32_t offset,uint32_t arg_length,const String &to)
1 by brian
clean slate
416
{
417
  return replace(offset,arg_length,to.ptr(),to.length());
418
}
419
205 by Brian Aker
uint32 -> uin32_t
420
bool String::replace(uint32_t offset,uint32_t arg_length,
421
                     const char *to, uint32_t to_length)
1 by brian
clean slate
422
{
423
  long diff = (long) to_length-(long) arg_length;
424
  if (offset+arg_length <= str_length)
425
  {
426
    if (diff < 0)
427
    {
428
      if (to_length)
429
	memcpy(Ptr+offset,to,to_length);
629.3.4 by Kristian Nielsen
Take Mats'es changes from bmove()->memcpy(), and fix all of them to be
430
      memmove(Ptr+offset+to_length, Ptr+offset+arg_length,
431
              str_length-offset-arg_length);
1 by brian
clean slate
432
    }
433
    else
434
    {
435
      if (diff)
436
      {
205 by Brian Aker
uint32 -> uin32_t
437
	if (realloc(str_length+(uint32_t) diff))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
438
	  return true;
481 by Brian Aker
Remove all of uchar.
439
	bmove_upp((unsigned char*) Ptr+str_length+diff, (unsigned char*) Ptr+str_length,
1 by brian
clean slate
440
		  str_length-offset-arg_length);
441
      }
442
      if (to_length)
443
	memcpy(Ptr+offset,to,to_length);
444
    }
205 by Brian Aker
uint32 -> uin32_t
445
    str_length+=(uint32_t) diff;
1 by brian
clean slate
446
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
447
  return false;
1 by brian
clean slate
448
}
449
450
451
452
/*
453
  Compare strings according to collation, without end space.
454
455
  SYNOPSIS
456
    sortcmp()
457
    s		First string
458
    t		Second string
459
    cs		Collation
460
461
  NOTE:
462
    Normally this is case sensitive comparison
463
464
  RETURN
465
  < 0	s < t
466
  0	s == t
467
  > 0	s > t
468
*/
469
470
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
471
int sortcmp(const String *s,const String *t, const CHARSET_INFO * const cs)
1 by brian
clean slate
472
{
473
 return cs->coll->strnncollsp(cs,
481 by Brian Aker
Remove all of uchar.
474
                              (unsigned char *) s->ptr(),s->length(),
475
                              (unsigned char *) t->ptr(),t->length(), 0);
1 by brian
clean slate
476
}
477
478
479
/*
480
  Compare strings byte by byte. End spaces are also compared.
481
482
  SYNOPSIS
483
    stringcmp()
484
    s		First string
485
    t		Second string
486
487
  NOTE:
481 by Brian Aker
Remove all of uchar.
488
    Strings are compared as a stream of unsigned chars
1 by brian
clean slate
489
490
  RETURN
491
  < 0	s < t
492
  0	s == t
493
  > 0	s > t
494
*/
495
496
497
int stringcmp(const String *s,const String *t)
498
{
398.1.4 by Monty Taylor
Renamed max/min.
499
  uint32_t s_len=s->length(),t_len=t->length(),len=cmin(s_len,t_len);
1 by brian
clean slate
500
  int cmp= memcmp(s->ptr(), t->ptr(), len);
501
  return (cmp) ? cmp : (int) (s_len - t_len);
502
}
503
504
205 by Brian Aker
uint32 -> uin32_t
505
String *copy_if_not_alloced(String *to,String *from,uint32_t from_length)
1 by brian
clean slate
506
{
507
  if (from->Alloced_length >= from_length)
508
    return from;
509
  if (from->alloced || !to || from == to)
510
  {
511
    (void) from->realloc(from_length);
512
    return from;
513
  }
514
  if (to->realloc(from_length))
515
    return from;				// Actually an error
398.1.4 by Monty Taylor
Renamed max/min.
516
  if ((to->str_length=cmin(from->str_length,from_length)))
1 by brian
clean slate
517
    memcpy(to->Ptr,from->Ptr,to->str_length);
518
  to->str_charset=from->str_charset;
519
  return to;
520
}
521
522
523
/****************************************************************************
524
  Help functions
525
****************************************************************************/
526
527
/*
528
  copy a string from one character set to another
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
529
1 by brian
clean slate
530
  SYNOPSIS
531
    copy_and_convert()
532
    to			Store result here
533
    to_cs		Character set of result string
534
    from		Copy from here
535
    from_length		Length of from string
536
    from_cs		From character set
537
538
  NOTES
539
    'to' must be big enough as form_length * to_cs->mbmaxlen
540
541
  RETURN
542
    length of bytes copied to 'to'
543
*/
544
545
205 by Brian Aker
uint32 -> uin32_t
546
static uint32_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
547
copy_and_convert_extended(char *to, uint32_t to_length,
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
548
                          const CHARSET_INFO * const to_cs,
205 by Brian Aker
uint32 -> uin32_t
549
                          const char *from, uint32_t from_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
550
                          const CHARSET_INFO * const from_cs,
482 by Brian Aker
Remove uint.
551
                          uint32_t *errors)
1 by brian
clean slate
552
{
553
  int         cnvres;
554
  my_wc_t     wc;
481 by Brian Aker
Remove all of uchar.
555
  const unsigned char *from_end= (const unsigned char*) from+from_length;
1 by brian
clean slate
556
  char *to_start= to;
481 by Brian Aker
Remove all of uchar.
557
  unsigned char *to_end= (unsigned char*) to+to_length;
1 by brian
clean slate
558
  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
559
  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
482 by Brian Aker
Remove uint.
560
  uint32_t error_count= 0;
1 by brian
clean slate
561
562
  while (1)
563
  {
481 by Brian Aker
Remove all of uchar.
564
    if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from,
1 by brian
clean slate
565
				      from_end)) > 0)
566
      from+= cnvres;
567
    else if (cnvres == MY_CS_ILSEQ)
568
    {
569
      error_count++;
570
      from++;
571
      wc= '?';
572
    }
573
    else if (cnvres > MY_CS_TOOSMALL)
574
    {
575
      /*
576
        A correct multibyte sequence detected
577
        But it doesn't have Unicode mapping.
578
      */
579
      error_count++;
580
      from+= (-cnvres);
581
      wc= '?';
582
    }
583
    else
584
      break;  // Not enough characters
585
586
outp:
481 by Brian Aker
Remove all of uchar.
587
    if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
1 by brian
clean slate
588
      to+= cnvres;
589
    else if (cnvres == MY_CS_ILUNI && wc != '?')
590
    {
591
      error_count++;
592
      wc= '?';
593
      goto outp;
594
    }
595
    else
596
      break;
597
  }
598
  *errors= error_count;
205 by Brian Aker
uint32 -> uin32_t
599
  return (uint32_t) (to - to_start);
1 by brian
clean slate
600
}
601
602
603
/*
604
  Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
605
*/
205 by Brian Aker
uint32 -> uin32_t
606
uint32_t
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
607
copy_and_convert(char *to, uint32_t to_length, const CHARSET_INFO * const to_cs,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
608
                 const char *from, uint32_t from_length,
482 by Brian Aker
Remove uint.
609
				 const CHARSET_INFO * const from_cs, uint32_t *errors)
1 by brian
clean slate
610
{
611
  /*
612
    If any of the character sets is not ASCII compatible,
613
    immediately switch to slow mb_wc->wc_mb method.
614
  */
615
  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
616
    return copy_and_convert_extended(to, to_length, to_cs,
617
                                     from, from_length, from_cs, errors);
618
398.1.4 by Monty Taylor
Renamed max/min.
619
  uint32_t length= cmin(to_length, from_length), length2= length;
1 by brian
clean slate
620
621
#if defined(__i386__)
622
  /*
623
    Special loop for i386, it allows to refer to a
624
    non-aligned memory block as UINT32, which makes
625
    it possible to copy four bytes at once. This
626
    gives about 10% performance improvement comparing
627
    to byte-by-byte loop.
628
  */
629
  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
630
  {
205 by Brian Aker
uint32 -> uin32_t
631
    if ((*(uint32_t*)from) & 0x80808080)
1 by brian
clean slate
632
      break;
205 by Brian Aker
uint32 -> uin32_t
633
    *((uint32_t*) to)= *((const uint32_t*) from);
1 by brian
clean slate
634
  }
635
#endif
636
637
  for (; ; *to++= *from++, length--)
638
  {
639
    if (!length)
640
    {
641
      *errors= 0;
642
      return length2;
643
    }
644
    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
645
    {
205 by Brian Aker
uint32 -> uin32_t
646
      uint32_t copied_length= length2 - length;
1 by brian
clean slate
647
      to_length-= copied_length;
648
      from_length-= copied_length;
649
      return copied_length + copy_and_convert_extended(to, to_length,
650
                                                       to_cs,
651
                                                       from, from_length,
652
                                                       from_cs,
653
                                                       errors);
654
    }
655
  }
656
892.2.4 by Monty Taylor
Fixed more warnings.
657
#ifndef __sun
1 by brian
clean slate
658
  return 0;           // Make compiler happy
892.2.4 by Monty Taylor
Fixed more warnings.
659
#endif
1 by brian
clean slate
660
}
661
662
663
/**
664
  Copy string with HEX-encoding of "bad" characters.
665
666
  @details This functions copies the string pointed by "src"
667
  to the string pointed by "dst". Not more than "srclen" bytes
668
  are read from "src". Any sequences of bytes representing
669
  a not-well-formed substring (according to cs) are hex-encoded,
670
  and all well-formed substrings (according to cs) are copied as is.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
671
  Not more than "dstlen" bytes are written to "dst". The number
1 by brian
clean slate
672
  of bytes written to "dst" is returned.
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
673
1 by brian
clean slate
674
   @param      cs       character set pointer of the destination string
675
   @param[out] dst      destination string
676
   @param      dstlen   size of dst
677
   @param      src      source string
678
   @param      srclen   length of src
679
680
   @retval     result length
681
*/
682
683
size_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
684
my_copy_with_hex_escaping(const CHARSET_INFO * const cs,
1 by brian
clean slate
685
                          char *dst, size_t dstlen,
686
                          const char *src, size_t srclen)
687
{
688
  const char *srcend= src + srclen;
689
  char *dst0= dst;
690
691
  for ( ; src < srcend ; )
692
  {
693
    size_t chlen;
694
    if ((chlen= my_ismbchar(cs, src, srcend)))
695
    {
696
      if (dstlen < chlen)
697
        break; /* purecov: inspected */
698
      memcpy(dst, src, chlen);
699
      src+= chlen;
700
      dst+= chlen;
701
      dstlen-= chlen;
702
    }
703
    else if (*src & 0x80)
704
    {
705
      if (dstlen < 4)
706
        break; /* purecov: inspected */
707
      *dst++= '\\';
708
      *dst++= 'x';
709
      *dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
710
      *dst++= _dig_vec_upper[((unsigned char) *src) & 15];
711
      src++;
712
      dstlen-= 4;
713
    }
714
    else
715
    {
716
      if (dstlen < 1)
717
        break; /* purecov: inspected */
718
      *dst++= *src++;
719
      dstlen--;
720
    }
721
  }
722
  return dst - dst0;
723
}
724
725
/*
726
  copy a string,
727
  with optional character set conversion,
728
  with optional left padding (for binary -> UCS2 conversion)
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
729
1 by brian
clean slate
730
  SYNOPSIS
731
    well_formed_copy_nchars()
732
    to			     Store result here
733
    to_length                Maxinum length of "to" string
734
    to_cs		     Character set of "to" string
735
    from		     Copy from here
736
    from_length		     Length of from string
737
    from_cs		     From character set
738
    nchars                   Copy not more that nchars characters
739
    well_formed_error_pos    Return position when "from" is not well formed
740
                             or NULL otherwise.
741
    cannot_convert_error_pos Return position where a not convertable
742
                             character met, or NULL otherwise.
743
    from_end_pos             Return position where scanning of "from"
744
                             string stopped.
745
  NOTES
746
747
  RETURN
748
    length of bytes copied to 'to'
749
*/
750
751
205 by Brian Aker
uint32 -> uin32_t
752
uint32_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
753
well_formed_copy_nchars(const CHARSET_INFO * const to_cs,
482 by Brian Aker
Remove uint.
754
                        char *to, uint32_t to_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
755
                        const CHARSET_INFO * const from_cs,
482 by Brian Aker
Remove uint.
756
                        const char *from, uint32_t from_length,
757
                        uint32_t nchars,
1 by brian
clean slate
758
                        const char **well_formed_error_pos,
759
                        const char **cannot_convert_error_pos,
760
                        const char **from_end_pos)
761
{
482 by Brian Aker
Remove uint.
762
  uint32_t res;
1 by brian
clean slate
763
660.1.3 by Eric Herman
removed trailing whitespace with simple script:
764
  if ((to_cs == &my_charset_bin) ||
1 by brian
clean slate
765
      (from_cs == &my_charset_bin) ||
766
      (to_cs == from_cs) ||
767
      my_charset_same(from_cs, to_cs))
768
  {
769
    if (to_length < to_cs->mbminlen || !nchars)
770
    {
771
      *from_end_pos= from;
772
      *cannot_convert_error_pos= NULL;
773
      *well_formed_error_pos= NULL;
774
      return 0;
775
    }
776
777
    if (to_cs == &my_charset_bin)
778
    {
398.1.4 by Monty Taylor
Renamed max/min.
779
      res= cmin(cmin(nchars, to_length), from_length);
1 by brian
clean slate
780
      memmove(to, from, res);
781
      *from_end_pos= from + res;
782
      *well_formed_error_pos= NULL;
783
      *cannot_convert_error_pos= NULL;
784
    }
785
    else
786
    {
787
      int well_formed_error;
482 by Brian Aker
Remove uint.
788
      uint32_t from_offset;
1 by brian
clean slate
789
790
      if ((from_offset= (from_length % to_cs->mbminlen)) &&
791
          (from_cs == &my_charset_bin))
792
      {
793
        /*
794
          Copying from BINARY to UCS2 needs to prepend zeros sometimes:
795
          INSERT INTO t1 (ucs2_column) VALUES (0x01);
796
          0x01 -> 0x0001
797
        */
482 by Brian Aker
Remove uint.
798
        uint32_t pad_length= to_cs->mbminlen - from_offset;
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
799
        memset(to, 0, pad_length);
1 by brian
clean slate
800
        memmove(to + pad_length, from, from_offset);
801
        nchars--;
802
        from+= from_offset;
803
        from_length-= from_offset;
804
        to+= to_cs->mbminlen;
805
        to_length-= to_cs->mbminlen;
806
      }
807
808
      set_if_smaller(from_length, to_length);
809
      res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
810
                                        nchars, &well_formed_error);
811
      memmove(to, from, res);
812
      *from_end_pos= from + res;
813
      *well_formed_error_pos= well_formed_error ? from + res : NULL;
814
      *cannot_convert_error_pos= NULL;
815
      if (from_offset)
816
        res+= to_cs->mbminlen;
817
    }
818
  }
819
  else
820
  {
821
    int cnvres;
822
    my_wc_t wc;
823
    my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
824
    my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
481 by Brian Aker
Remove all of uchar.
825
    const unsigned char *from_end= (const unsigned char*) from + from_length;
826
    unsigned char *to_end= (unsigned char*) to + to_length;
1 by brian
clean slate
827
    char *to_start= to;
828
    *well_formed_error_pos= NULL;
829
    *cannot_convert_error_pos= NULL;
830
831
    for ( ; nchars; nchars--)
832
    {
833
      const char *from_prev= from;
481 by Brian Aker
Remove all of uchar.
834
      if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from, from_end)) > 0)
1 by brian
clean slate
835
        from+= cnvres;
836
      else if (cnvres == MY_CS_ILSEQ)
837
      {
838
        if (!*well_formed_error_pos)
839
          *well_formed_error_pos= from;
840
        from++;
841
        wc= '?';
842
      }
843
      else if (cnvres > MY_CS_TOOSMALL)
844
      {
845
        /*
846
          A correct multibyte sequence detected
847
          But it doesn't have Unicode mapping.
848
        */
849
        if (!*cannot_convert_error_pos)
850
          *cannot_convert_error_pos= from;
851
        from+= (-cnvres);
852
        wc= '?';
853
      }
854
      else
855
        break;  // Not enough characters
856
857
outp:
481 by Brian Aker
Remove all of uchar.
858
      if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
1 by brian
clean slate
859
        to+= cnvres;
860
      else if (cnvres == MY_CS_ILUNI && wc != '?')
861
      {
862
        if (!*cannot_convert_error_pos)
863
          *cannot_convert_error_pos= from_prev;
864
        wc= '?';
865
        goto outp;
866
      }
867
      else
868
      {
869
        from= from_prev;
870
        break;
871
      }
872
    }
873
    *from_end_pos= from;
874
    res= to - to_start;
875
  }
205 by Brian Aker
uint32 -> uin32_t
876
  return (uint32_t) res;
1 by brian
clean slate
877
}
878
879
880
881
882
void String::print(String *str)
883
{
884
  char *st= (char*)Ptr, *end= st+str_length;
885
  for (; st < end; st++)
886
  {
481 by Brian Aker
Remove all of uchar.
887
    unsigned char c= *st;
1 by brian
clean slate
888
    switch (c)
889
    {
890
    case '\\':
520.4.32 by Monty Taylor
Fixed oops.
891
      str->append("\\\\", sizeof("\\\\")-1);
1 by brian
clean slate
892
      break;
893
    case '\0':
520.4.32 by Monty Taylor
Fixed oops.
894
      str->append("\\0", sizeof("\\0")-1);
1 by brian
clean slate
895
      break;
896
    case '\'':
520.4.32 by Monty Taylor
Fixed oops.
897
      str->append("\\'", sizeof("\\'")-1);
1 by brian
clean slate
898
      break;
899
    case '\n':
520.4.32 by Monty Taylor
Fixed oops.
900
      str->append("\\n", sizeof("\\n")-1);
1 by brian
clean slate
901
      break;
902
    case '\r':
520.4.32 by Monty Taylor
Fixed oops.
903
      str->append("\\r", sizeof("\\r")-1);
1 by brian
clean slate
904
      break;
905
    case '\032': // Ctrl-Z
520.4.32 by Monty Taylor
Fixed oops.
906
      str->append("\\Z", sizeof("\\Z")-1);
1 by brian
clean slate
907
      break;
908
    default:
909
      str->append(c);
910
    }
911
  }
912
}
913
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
914
/*
915
  Quote the given identifier.
916
  If the given identifier is empty, it will be quoted.
917
918
  SYNOPSIS
919
  append_identifier()
920
  name                  the identifier to be appended
921
  name_length           length of the appending identifier
922
*/
923
924
/* Factor the extern out */
925
extern const CHARSET_INFO *system_charset_info, *files_charset_info;
926
779.3.10 by Monty Taylor
Turned on -Wshadow.
927
void String::append_identifier(const char *name, uint32_t in_length)
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
928
{
929
  const char *name_end;
930
  char quote_char;
931
  int q= '`';
932
933
  /*
934
    The identifier must be quoted as it includes a quote character or
935
   it's a keyword
936
  */
937
779.3.10 by Monty Taylor
Turned on -Wshadow.
938
  reserve(in_length*2 + 2);
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
939
  quote_char= (char) q;
940
  append(&quote_char, 1, system_charset_info);
941
779.3.10 by Monty Taylor
Turned on -Wshadow.
942
  for (name_end= name+in_length ; name < name_end ; name+= in_length)
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
943
  {
944
    unsigned char chr= (unsigned char) *name;
779.3.10 by Monty Taylor
Turned on -Wshadow.
945
    in_length= my_mbcharlen(system_charset_info, chr);
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
946
    /*
947
      my_mbcharlen can return 0 on a wrong multibyte
948
      sequence. It is possible when upgrading from 4.0,
949
      and identifier contains some accented characters.
950
      The manual says it does not work. So we'll just
951
      change length to 1 not to hang in the endless loop.
952
    */
779.3.10 by Monty Taylor
Turned on -Wshadow.
953
    if (!in_length)
954
      in_length= 1;
955
    if (in_length == 1 && chr == (unsigned char) quote_char)
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
956
      append(&quote_char, 1, system_charset_info);
779.3.10 by Monty Taylor
Turned on -Wshadow.
957
    append(name, in_length, system_charset_info);
794 by Brian Aker
Refactor append_identifier and remove dead OPTION_QUOTE_SHOW_CREATE option
958
  }
959
  append(&quote_char, 1, system_charset_info);
960
}
961
1 by brian
clean slate
962
963
/*
964
  Exchange state of this object and argument.
965
966
  SYNOPSIS
967
    String::swap()
968
969
  RETURN
970
    Target string will contain state of this object and vice versa.
971
*/
972
973
void String::swap(String &s)
974
{
322.2.2 by Mats Kindahl
Hiding THD::proc_info field and providing a setter and getter.
975
  std::swap(Ptr, s.Ptr);
976
  std::swap(str_length, s.str_length);
977
  std::swap(Alloced_length, s.Alloced_length);
978
  std::swap(alloced, s.alloced);
979
  std::swap(str_charset, s.str_charset);
1 by brian
clean slate
980
}
598.1.1 by Super-User
Fixed solaris build crap.
981
982
983
bool operator==(const String &s1, const String &s2)
984
{
985
  return stringcmp(&s1,&s2) == 0;
986
}
987
988
bool operator!=(const String &s1, const String &s2)
989
{
990
  return !(s1 == s2);
991
}
992