~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/* This file is originally from the mysql distribution. Coded by monty */
17
212.5.39 by Monty Taylor
Phew. Moved my_base and my_global.
18
#include "global.h"
212.5.13 by Monty Taylor
Moved my_sys/my_pthread/my_nosys and mysys_err to mysys.
19
#include <mysys/my_sys.h>
212.5.45 by Monty Taylor
Removed excess AM_CPPFLAGS from the tree. Now the only thing that should be in the include path should be -I${top_srcdir} and -I${top_builddir}w
20
#include <mystrings/m_string.h>
1 by brian
clean slate
21
22
/*
23
  The following extern declarations are ok as these are interface functions
24
  required by the string function
25
*/
26
27
extern uchar* sql_alloc(unsigned size);
28
extern void sql_element_free(void *ptr);
29
30
#include "sql_string.h"
31
32
/*****************************************************************************
33
** String functions
34
*****************************************************************************/
35
205 by Brian Aker
uint32 -> uin32_t
36
bool String::real_alloc(uint32_t arg_length)
1 by brian
clean slate
37
{
38
  arg_length=ALIGN_SIZE(arg_length+1);
39
  str_length=0;
40
  if (Alloced_length < arg_length)
41
  {
42
    free();
43
    if (!(Ptr=(char*) my_malloc(arg_length,MYF(MY_WME))))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
44
      return true;
1 by brian
clean slate
45
    Alloced_length=arg_length;
46
    alloced=1;
47
  }
48
  Ptr[0]=0;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
49
  return false;
1 by brian
clean slate
50
}
51
52
53
/*
54
** Check that string is big enough. Set string[alloc_length] to 0
55
** (for C functions)
56
*/
57
205 by Brian Aker
uint32 -> uin32_t
58
bool String::realloc(uint32_t alloc_length)
1 by brian
clean slate
59
{
205 by Brian Aker
uint32 -> uin32_t
60
  uint32_t len=ALIGN_SIZE(alloc_length+1);
1 by brian
clean slate
61
  if (Alloced_length < len)
62
  {
63
    char *new_ptr;
64
    if (alloced)
65
    {
66
      if ((new_ptr= (char*) my_realloc(Ptr,len,MYF(MY_WME))))
67
      {
68
	Ptr=new_ptr;
69
	Alloced_length=len;
70
      }
71
      else
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
72
	return true;				// Signal error
1 by brian
clean slate
73
    }
74
    else if ((new_ptr= (char*) my_malloc(len,MYF(MY_WME))))
75
    {
76
      if (str_length)				// Avoid bugs in memcpy on AIX
77
	memcpy(new_ptr,Ptr,str_length);
78
      new_ptr[str_length]=0;
79
      Ptr=new_ptr;
80
      Alloced_length=len;
81
      alloced=1;
82
    }
83
    else
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
84
      return true;			// Signal error
1 by brian
clean slate
85
  }
86
  Ptr[alloc_length]=0;			// This make other funcs shorter
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
87
  return false;
1 by brian
clean slate
88
}
89
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
90
bool String::set_int(int64_t num, bool unsigned_flag, const CHARSET_INFO * const cs)
1 by brian
clean slate
91
{
92
  uint l=20*cs->mbmaxlen+1;
93
  int base= unsigned_flag ? 10 : -10;
94
95
  if (alloc(l))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
96
    return true;
205 by Brian Aker
uint32 -> uin32_t
97
  str_length=(uint32_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
1 by brian
clean slate
98
  str_charset=cs;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
99
  return false;
1 by brian
clean slate
100
}
101
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
102
bool String::set_real(double num,uint decimals, const CHARSET_INFO * const cs)
1 by brian
clean slate
103
{
104
  char buff[FLOATING_POINT_BUFFER];
105
  uint dummy_errors;
106
  size_t len;
107
108
  str_charset=cs;
109
  if (decimals >= NOT_FIXED_DEC)
110
  {
111
    len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
112
    return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
113
  }
114
  len= my_fcvt(num, decimals, buff, NULL);
205 by Brian Aker
uint32 -> uin32_t
115
  return copy(buff, (uint32_t) len, &my_charset_latin1, cs,
1 by brian
clean slate
116
              &dummy_errors);
117
}
118
119
120
bool String::copy()
121
{
122
  if (!alloced)
123
  {
124
    Alloced_length=0;				// Force realloc
125
    return realloc(str_length);
126
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
127
  return false;
1 by brian
clean slate
128
}
129
130
bool String::copy(const String &str)
131
{
132
  if (alloc(str.str_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
133
    return true;
1 by brian
clean slate
134
  str_length=str.str_length;
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
135
  memmove(Ptr, str.Ptr, str_length);		// May be overlapping
1 by brian
clean slate
136
  Ptr[str_length]=0;
137
  str_charset=str.str_charset;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
138
  return false;
1 by brian
clean slate
139
}
140
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
141
bool String::copy(const char *str,uint32_t arg_length, const CHARSET_INFO * const cs)
1 by brian
clean slate
142
{
143
  if (alloc(arg_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
144
    return true;
1 by brian
clean slate
145
  if ((str_length=arg_length))
146
    memcpy(Ptr,str,arg_length);
147
  Ptr[arg_length]=0;
148
  str_charset=cs;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
149
  return false;
1 by brian
clean slate
150
}
151
152
153
/*
154
  Checks that the source string can be just copied to the destination string
155
  without conversion.
156
157
  SYNPOSIS
158
159
  needs_conversion()
160
  arg_length		Length of string to copy.
161
  from_cs		Character set to copy from
162
  to_cs			Character set to copy to
205 by Brian Aker
uint32 -> uin32_t
163
  uint32_t *offset	Returns number of unaligned characters.
1 by brian
clean slate
164
165
  RETURN
166
   0  No conversion needed
167
   1  Either character set conversion or adding leading  zeros
168
      (e.g. for UCS-2) must be done
169
170
  NOTE
171
  to_cs may be NULL for "no conversion" if the system variable
172
  character_set_results is NULL.
173
*/
174
205 by Brian Aker
uint32 -> uin32_t
175
bool String::needs_conversion(uint32_t arg_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
176
			      const CHARSET_INFO * const from_cs,
177
			      const CHARSET_INFO * const to_cs,
205 by Brian Aker
uint32 -> uin32_t
178
			      uint32_t *offset)
1 by brian
clean slate
179
{
180
  *offset= 0;
181
  if (!to_cs ||
182
      (to_cs == &my_charset_bin) || 
183
      (to_cs == from_cs) ||
184
      my_charset_same(from_cs, to_cs) ||
185
      ((from_cs == &my_charset_bin) &&
186
       (!(*offset=(arg_length % to_cs->mbminlen)))))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
187
    return false;
188
  return true;
1 by brian
clean slate
189
}
190
191
192
/*
193
  Copy a multi-byte character sets with adding leading zeros.
194
195
  SYNOPSIS
196
197
  copy_aligned()
198
  str			String to copy
199
  arg_length		Length of string. This should NOT be dividable with
200
			cs->mbminlen.
201
  offset		arg_length % cs->mb_minlength
202
  cs			Character set for 'str'
203
204
  NOTES
205
    For real multi-byte, ascii incompatible charactser sets,
206
    like UCS-2, add leading zeros if we have an incomplete character.
207
    Thus, 
208
      SELECT _ucs2 0xAA 
209
    will automatically be converted into
210
      SELECT _ucs2 0x00AA
211
212
  RETURN
213
    0  ok
214
    1  error
215
*/
216
205 by Brian Aker
uint32 -> uin32_t
217
bool String::copy_aligned(const char *str,uint32_t arg_length, uint32_t offset,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
218
                          const CHARSET_INFO * const cs)
1 by brian
clean slate
219
{
220
  /* How many bytes are in incomplete character */
221
  offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
222
  assert(offset && offset != cs->mbmaxlen);
1 by brian
clean slate
223
205 by Brian Aker
uint32 -> uin32_t
224
  uint32_t aligned_length= arg_length + offset;
1 by brian
clean slate
225
  if (alloc(aligned_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
226
    return true;
1 by brian
clean slate
227
  
228
  /*
229
    Note, this is only safe for big-endian UCS-2.
230
    If we add little-endian UCS-2 sometimes, this code
231
    will be more complicated. But it's OK for now.
232
  */
212.6.6 by Mats Kindahl
Removing redundant use of casts in drizzled/ for memcmp(), memcpy(), memset(), and memmove().
233
  memset(Ptr, 0, offset);
1 by brian
clean slate
234
  memcpy(Ptr + offset, str, arg_length);
235
  Ptr[aligned_length]=0;
236
  /* str_length is always >= 0 as arg_length is != 0 */
237
  str_length= aligned_length;
238
  str_charset= cs;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
239
  return false;
1 by brian
clean slate
240
}
241
242
205 by Brian Aker
uint32 -> uin32_t
243
bool String::set_or_copy_aligned(const char *str,uint32_t arg_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
244
                                 const CHARSET_INFO * const cs)
1 by brian
clean slate
245
{
246
  /* How many bytes are in incomplete character */
205 by Brian Aker
uint32 -> uin32_t
247
  uint32_t offset= (arg_length % cs->mbminlen); 
1 by brian
clean slate
248
  
249
  if (!offset) /* All characters are complete, just copy */
250
  {
251
    set(str, arg_length, cs);
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
252
    return false;
1 by brian
clean slate
253
  }
254
  return copy_aligned(str, arg_length, offset, cs);
255
}
256
257
	/* Copy with charset conversion */
258
205 by Brian Aker
uint32 -> uin32_t
259
bool String::copy(const char *str, uint32_t arg_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
260
		          const CHARSET_INFO * const from_cs,
261
				  const CHARSET_INFO * const to_cs, uint *errors)
1 by brian
clean slate
262
{
205 by Brian Aker
uint32 -> uin32_t
263
  uint32_t offset;
1 by brian
clean slate
264
  if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
265
  {
266
    *errors= 0;
267
    return copy(str, arg_length, to_cs);
268
  }
269
  if ((from_cs == &my_charset_bin) && offset)
270
  {
271
    *errors= 0;
272
    return copy_aligned(str, arg_length, offset, to_cs);
273
  }
205 by Brian Aker
uint32 -> uin32_t
274
  uint32_t new_length= to_cs->mbmaxlen*arg_length;
1 by brian
clean slate
275
  if (alloc(new_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
276
    return true;
1 by brian
clean slate
277
  str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
278
                              str, arg_length, from_cs, errors);
279
  str_charset=to_cs;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
280
  return false;
1 by brian
clean slate
281
}
282
283
284
/*
285
  Set a string to the value of a latin1-string, keeping the original charset
286
  
287
  SYNOPSIS
288
    copy_or_set()
289
    str			String of a simple charset (latin1)
290
    arg_length		Length of string
291
292
  IMPLEMENTATION
293
    If string object is of a simple character set, set it to point to the
294
    given string.
295
    If not, make a copy and convert it to the new character set.
296
297
  RETURN
298
    0	ok
299
    1	Could not allocate result buffer
300
301
*/
302
205 by Brian Aker
uint32 -> uin32_t
303
bool String::set_ascii(const char *str, uint32_t arg_length)
1 by brian
clean slate
304
{
305
  if (str_charset->mbminlen == 1)
306
  {
307
    set(str, arg_length, str_charset);
308
    return 0;
309
  }
310
  uint dummy_errors;
311
  return copy(str, arg_length, &my_charset_latin1, str_charset, &dummy_errors);
312
}
313
314
315
/* This is used by mysql.cc */
316
205 by Brian Aker
uint32 -> uin32_t
317
bool String::fill(uint32_t max_length,char fill_char)
1 by brian
clean slate
318
{
319
  if (str_length > max_length)
320
    Ptr[str_length=max_length]=0;
321
  else
322
  {
323
    if (realloc(max_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
324
      return true;
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
325
    memset(Ptr+str_length, fill_char, max_length-str_length);
1 by brian
clean slate
326
    str_length=max_length;
327
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
328
  return false;
1 by brian
clean slate
329
}
330
331
void String::strip_sp()
332
{
333
   while (str_length && my_isspace(str_charset,Ptr[str_length-1]))
334
    str_length--;
335
}
336
337
bool String::append(const String &s)
338
{
339
  if (s.length())
340
  {
341
    if (realloc(str_length+s.length()))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
342
      return true;
1 by brian
clean slate
343
    memcpy(Ptr+str_length,s.ptr(),s.length());
344
    str_length+=s.length();
345
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
346
  return false;
1 by brian
clean slate
347
}
348
349
350
/*
351
  Append an ASCII string to the a string of the current character set
352
*/
353
205 by Brian Aker
uint32 -> uin32_t
354
bool String::append(const char *s,uint32_t arg_length)
1 by brian
clean slate
355
{
356
  if (!arg_length)
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
357
    return false;
1 by brian
clean slate
358
359
  /*
360
    For an ASCII incompatible string, e.g. UCS-2, we need to convert
361
  */
362
  if (str_charset->mbminlen > 1)
363
  {
205 by Brian Aker
uint32 -> uin32_t
364
    uint32_t add_length=arg_length * str_charset->mbmaxlen;
1 by brian
clean slate
365
    uint dummy_errors;
366
    if (realloc(str_length+ add_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
367
      return true;
1 by brian
clean slate
368
    str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
369
				  s, arg_length, &my_charset_latin1,
370
                                  &dummy_errors);
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
371
    return false;
1 by brian
clean slate
372
  }
373
374
  /*
375
    For an ASCII compatinble string we can just append.
376
  */
377
  if (realloc(str_length+arg_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
378
    return true;
1 by brian
clean slate
379
  memcpy(Ptr+str_length,s,arg_length);
380
  str_length+=arg_length;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
381
  return false;
1 by brian
clean slate
382
}
383
384
385
/*
386
  Append a 0-terminated ASCII string
387
*/
388
389
bool String::append(const char *s)
390
{
391
  return append(s, strlen(s));
392
}
393
394
395
/*
396
  Append a string in the given charset to the string
397
  with character set recoding
398
*/
399
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
400
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const cs)
1 by brian
clean slate
401
{
205 by Brian Aker
uint32 -> uin32_t
402
  uint32_t dummy_offset;
1 by brian
clean slate
403
  
404
  if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
405
  {
205 by Brian Aker
uint32 -> uin32_t
406
    uint32_t add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
1 by brian
clean slate
407
    uint dummy_errors;
408
    if (realloc(str_length + add_length)) 
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
409
      return true;
1 by brian
clean slate
410
    str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
411
				  s, arg_length, cs, &dummy_errors);
412
  }
413
  else
414
  {
415
    if (realloc(str_length + arg_length)) 
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
416
      return true;
1 by brian
clean slate
417
    memcpy(Ptr + str_length, s, arg_length);
418
    str_length+= arg_length;
419
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
420
  return false;
1 by brian
clean slate
421
}
422
423
205 by Brian Aker
uint32 -> uin32_t
424
bool String::append(IO_CACHE* file, uint32_t arg_length)
1 by brian
clean slate
425
{
426
  if (realloc(str_length+arg_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
427
    return true;
1 by brian
clean slate
428
  if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
429
  {
430
    shrink(str_length);
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
431
    return true;
1 by brian
clean slate
432
  }
433
  str_length+=arg_length;
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
434
  return false;
1 by brian
clean slate
435
}
436
205 by Brian Aker
uint32 -> uin32_t
437
bool String::append_with_prefill(const char *s,uint32_t arg_length,
438
		 uint32_t full_length, char fill_char)
1 by brian
clean slate
439
{
440
  int t_length= arg_length > full_length ? arg_length : full_length;
441
442
  if (realloc(str_length + t_length))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
443
    return true;
1 by brian
clean slate
444
  t_length= full_length - arg_length;
445
  if (t_length > 0)
446
  {
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
447
    memset(Ptr+str_length, fill_char, t_length);
1 by brian
clean slate
448
    str_length=str_length + t_length;
449
  }
450
  append(s, arg_length);
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
451
  return false;
1 by brian
clean slate
452
}
453
205 by Brian Aker
uint32 -> uin32_t
454
uint32_t String::numchars()
1 by brian
clean slate
455
{
456
  return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
457
}
458
205 by Brian Aker
uint32 -> uin32_t
459
int String::charpos(int i,uint32_t offset)
1 by brian
clean slate
460
{
461
  if (i <= 0)
462
    return i;
463
  return str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,i);
464
}
465
205 by Brian Aker
uint32 -> uin32_t
466
int String::strstr(const String &s,uint32_t offset)
1 by brian
clean slate
467
{
468
  if (s.length()+offset <= str_length)
469
  {
470
    if (!s.length())
471
      return ((int) offset);	// Empty string is always found
472
473
    register const char *str = Ptr+offset;
474
    register const char *search=s.ptr();
475
    const char *end=Ptr+str_length-s.length()+1;
476
    const char *search_end=s.ptr()+s.length();
477
skip:
478
    while (str != end)
479
    {
480
      if (*str++ == *search)
481
      {
482
	register char *i,*j;
483
	i=(char*) str; j=(char*) search+1;
484
	while (j != search_end)
485
	  if (*i++ != *j++) goto skip;
486
	return (int) (str-Ptr) -1;
487
      }
488
    }
489
  }
490
  return -1;
491
}
492
493
/*
494
** Search string from end. Offset is offset to the end of string
495
*/
496
205 by Brian Aker
uint32 -> uin32_t
497
int String::strrstr(const String &s,uint32_t offset)
1 by brian
clean slate
498
{
499
  if (s.length() <= offset && offset <= str_length)
500
  {
501
    if (!s.length())
502
      return offset;				// Empty string is always found
503
    register const char *str = Ptr+offset-1;
504
    register const char *search=s.ptr()+s.length()-1;
505
506
    const char *end=Ptr+s.length()-2;
507
    const char *search_end=s.ptr()-1;
508
skip:
509
    while (str != end)
510
    {
511
      if (*str-- == *search)
512
      {
513
	register char *i,*j;
514
	i=(char*) str; j=(char*) search-1;
515
	while (j != search_end)
516
	  if (*i-- != *j--) goto skip;
517
	return (int) (i-Ptr) +1;
518
      }
519
    }
520
  }
521
  return -1;
522
}
523
524
/*
525
  Replace substring with string
526
  If wrong parameter or not enough memory, do nothing
527
*/
528
205 by Brian Aker
uint32 -> uin32_t
529
bool String::replace(uint32_t offset,uint32_t arg_length,const String &to)
1 by brian
clean slate
530
{
531
  return replace(offset,arg_length,to.ptr(),to.length());
532
}
533
205 by Brian Aker
uint32 -> uin32_t
534
bool String::replace(uint32_t offset,uint32_t arg_length,
535
                     const char *to, uint32_t to_length)
1 by brian
clean slate
536
{
537
  long diff = (long) to_length-(long) arg_length;
538
  if (offset+arg_length <= str_length)
539
  {
540
    if (diff < 0)
541
    {
542
      if (to_length)
543
	memcpy(Ptr+offset,to,to_length);
212.6.3 by Mats Kindahl
Removing deprecated functions from code and replacing them with C99 equivalents:
544
      memcpy(Ptr+offset+to_length, Ptr+offset+arg_length,
545
             str_length-offset-arg_length);
1 by brian
clean slate
546
    }
547
    else
548
    {
549
      if (diff)
550
      {
205 by Brian Aker
uint32 -> uin32_t
551
	if (realloc(str_length+(uint32_t) diff))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
552
	  return true;
1 by brian
clean slate
553
	bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
554
		  str_length-offset-arg_length);
555
      }
556
      if (to_length)
557
	memcpy(Ptr+offset,to,to_length);
558
    }
205 by Brian Aker
uint32 -> uin32_t
559
    str_length+=(uint32_t) diff;
1 by brian
clean slate
560
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
561
  return false;
1 by brian
clean slate
562
}
563
564
565
// added by Holyfoot for "geometry" needs
205 by Brian Aker
uint32 -> uin32_t
566
int String::reserve(uint32_t space_needed, uint32_t grow_by)
1 by brian
clean slate
567
{
568
  if (Alloced_length < str_length + space_needed)
569
  {
570
    if (realloc(Alloced_length + max(space_needed, grow_by) - 1))
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
571
      return true;
1 by brian
clean slate
572
  }
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
573
  return false;
1 by brian
clean slate
574
}
575
205 by Brian Aker
uint32 -> uin32_t
576
void String::qs_append(const char *str, uint32_t len)
1 by brian
clean slate
577
{
578
  memcpy(Ptr + str_length, str, len + 1);
579
  str_length += len;
580
}
581
582
void String::qs_append(double d)
583
{
584
  char *buff = Ptr + str_length;
585
  str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff, NULL);
586
}
587
588
void String::qs_append(double *d)
589
{
590
  double ld;
591
  float8get(ld, (char*) d);
592
  qs_append(ld);
593
}
594
595
void String::qs_append(int i)
596
{
597
  char *buff= Ptr + str_length;
598
  char *end= int10_to_str(i, buff, -10);
599
  str_length+= (int) (end-buff);
600
}
601
602
void String::qs_append(uint i)
603
{
604
  char *buff= Ptr + str_length;
605
  char *end= int10_to_str(i, buff, 10);
606
  str_length+= (int) (end-buff);
607
}
608
609
/*
610
  Compare strings according to collation, without end space.
611
612
  SYNOPSIS
613
    sortcmp()
614
    s		First string
615
    t		Second string
616
    cs		Collation
617
618
  NOTE:
619
    Normally this is case sensitive comparison
620
621
  RETURN
622
  < 0	s < t
623
  0	s == t
624
  > 0	s > t
625
*/
626
627
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
628
int sortcmp(const String *s,const String *t, const CHARSET_INFO * const cs)
1 by brian
clean slate
629
{
630
 return cs->coll->strnncollsp(cs,
631
                              (uchar *) s->ptr(),s->length(),
632
                              (uchar *) t->ptr(),t->length(), 0);
633
}
634
635
636
/*
637
  Compare strings byte by byte. End spaces are also compared.
638
639
  SYNOPSIS
640
    stringcmp()
641
    s		First string
642
    t		Second string
643
644
  NOTE:
645
    Strings are compared as a stream of uchars
646
647
  RETURN
648
  < 0	s < t
649
  0	s == t
650
  > 0	s > t
651
*/
652
653
654
int stringcmp(const String *s,const String *t)
655
{
205 by Brian Aker
uint32 -> uin32_t
656
  uint32_t s_len=s->length(),t_len=t->length(),len=min(s_len,t_len);
1 by brian
clean slate
657
  int cmp= memcmp(s->ptr(), t->ptr(), len);
658
  return (cmp) ? cmp : (int) (s_len - t_len);
659
}
660
661
205 by Brian Aker
uint32 -> uin32_t
662
String *copy_if_not_alloced(String *to,String *from,uint32_t from_length)
1 by brian
clean slate
663
{
664
  if (from->Alloced_length >= from_length)
665
    return from;
666
  if (from->alloced || !to || from == to)
667
  {
668
    (void) from->realloc(from_length);
669
    return from;
670
  }
671
  if (to->realloc(from_length))
672
    return from;				// Actually an error
673
  if ((to->str_length=min(from->str_length,from_length)))
674
    memcpy(to->Ptr,from->Ptr,to->str_length);
675
  to->str_charset=from->str_charset;
676
  return to;
677
}
678
679
680
/****************************************************************************
681
  Help functions
682
****************************************************************************/
683
684
/*
685
  copy a string from one character set to another
686
  
687
  SYNOPSIS
688
    copy_and_convert()
689
    to			Store result here
690
    to_cs		Character set of result string
691
    from		Copy from here
692
    from_length		Length of from string
693
    from_cs		From character set
694
695
  NOTES
696
    'to' must be big enough as form_length * to_cs->mbmaxlen
697
698
  RETURN
699
    length of bytes copied to 'to'
700
*/
701
702
205 by Brian Aker
uint32 -> uin32_t
703
static uint32_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
704
copy_and_convert_extended(char *to, uint32_t to_length,
705
                          const CHARSET_INFO * const to_cs, 
205 by Brian Aker
uint32 -> uin32_t
706
                          const char *from, uint32_t from_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
707
                          const CHARSET_INFO * const from_cs,
1 by brian
clean slate
708
                          uint *errors)
709
{
710
  int         cnvres;
711
  my_wc_t     wc;
712
  const uchar *from_end= (const uchar*) from+from_length;
713
  char *to_start= to;
714
  uchar *to_end= (uchar*) to+to_length;
715
  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
716
  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
717
  uint error_count= 0;
718
719
  while (1)
720
  {
721
    if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
722
				      from_end)) > 0)
723
      from+= cnvres;
724
    else if (cnvres == MY_CS_ILSEQ)
725
    {
726
      error_count++;
727
      from++;
728
      wc= '?';
729
    }
730
    else if (cnvres > MY_CS_TOOSMALL)
731
    {
732
      /*
733
        A correct multibyte sequence detected
734
        But it doesn't have Unicode mapping.
735
      */
736
      error_count++;
737
      from+= (-cnvres);
738
      wc= '?';
739
    }
740
    else
741
      break;  // Not enough characters
742
743
outp:
744
    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
745
      to+= cnvres;
746
    else if (cnvres == MY_CS_ILUNI && wc != '?')
747
    {
748
      error_count++;
749
      wc= '?';
750
      goto outp;
751
    }
752
    else
753
      break;
754
  }
755
  *errors= error_count;
205 by Brian Aker
uint32 -> uin32_t
756
  return (uint32_t) (to - to_start);
1 by brian
clean slate
757
}
758
759
760
/*
761
  Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
762
*/
205 by Brian Aker
uint32 -> uin32_t
763
uint32_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
764
copy_and_convert(char *to, uint32_t to_length, const CHARSET_INFO * const to_cs, 
765
                 const char *from, uint32_t from_length,
766
				 const CHARSET_INFO * const from_cs, uint *errors)
1 by brian
clean slate
767
{
768
  /*
769
    If any of the character sets is not ASCII compatible,
770
    immediately switch to slow mb_wc->wc_mb method.
771
  */
772
  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
773
    return copy_and_convert_extended(to, to_length, to_cs,
774
                                     from, from_length, from_cs, errors);
775
205 by Brian Aker
uint32 -> uin32_t
776
  uint32_t length= min(to_length, from_length), length2= length;
1 by brian
clean slate
777
778
#if defined(__i386__)
779
  /*
780
    Special loop for i386, it allows to refer to a
781
    non-aligned memory block as UINT32, which makes
782
    it possible to copy four bytes at once. This
783
    gives about 10% performance improvement comparing
784
    to byte-by-byte loop.
785
  */
786
  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
787
  {
205 by Brian Aker
uint32 -> uin32_t
788
    if ((*(uint32_t*)from) & 0x80808080)
1 by brian
clean slate
789
      break;
205 by Brian Aker
uint32 -> uin32_t
790
    *((uint32_t*) to)= *((const uint32_t*) from);
1 by brian
clean slate
791
  }
792
#endif
793
794
  for (; ; *to++= *from++, length--)
795
  {
796
    if (!length)
797
    {
798
      *errors= 0;
799
      return length2;
800
    }
801
    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
802
    {
205 by Brian Aker
uint32 -> uin32_t
803
      uint32_t copied_length= length2 - length;
1 by brian
clean slate
804
      to_length-= copied_length;
805
      from_length-= copied_length;
806
      return copied_length + copy_and_convert_extended(to, to_length,
807
                                                       to_cs,
808
                                                       from, from_length,
809
                                                       from_cs,
810
                                                       errors);
811
    }
812
  }
813
51.1.74 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
814
  assert(false); // Should never get to here
1 by brian
clean slate
815
  return 0;           // Make compiler happy
816
}
817
818
819
/**
820
  Copy string with HEX-encoding of "bad" characters.
821
822
  @details This functions copies the string pointed by "src"
823
  to the string pointed by "dst". Not more than "srclen" bytes
824
  are read from "src". Any sequences of bytes representing
825
  a not-well-formed substring (according to cs) are hex-encoded,
826
  and all well-formed substrings (according to cs) are copied as is.
827
  Not more than "dstlen" bytes are written to "dst". The number 
828
  of bytes written to "dst" is returned.
829
  
830
   @param      cs       character set pointer of the destination string
831
   @param[out] dst      destination string
832
   @param      dstlen   size of dst
833
   @param      src      source string
834
   @param      srclen   length of src
835
836
   @retval     result length
837
*/
838
839
size_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
840
my_copy_with_hex_escaping(const CHARSET_INFO * const cs,
1 by brian
clean slate
841
                          char *dst, size_t dstlen,
842
                          const char *src, size_t srclen)
843
{
844
  const char *srcend= src + srclen;
845
  char *dst0= dst;
846
847
  for ( ; src < srcend ; )
848
  {
849
    size_t chlen;
850
    if ((chlen= my_ismbchar(cs, src, srcend)))
851
    {
852
      if (dstlen < chlen)
853
        break; /* purecov: inspected */
854
      memcpy(dst, src, chlen);
855
      src+= chlen;
856
      dst+= chlen;
857
      dstlen-= chlen;
858
    }
859
    else if (*src & 0x80)
860
    {
861
      if (dstlen < 4)
862
        break; /* purecov: inspected */
863
      *dst++= '\\';
864
      *dst++= 'x';
865
      *dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
866
      *dst++= _dig_vec_upper[((unsigned char) *src) & 15];
867
      src++;
868
      dstlen-= 4;
869
    }
870
    else
871
    {
872
      if (dstlen < 1)
873
        break; /* purecov: inspected */
874
      *dst++= *src++;
875
      dstlen--;
876
    }
877
  }
878
  return dst - dst0;
879
}
880
881
/*
882
  copy a string,
883
  with optional character set conversion,
884
  with optional left padding (for binary -> UCS2 conversion)
885
  
886
  SYNOPSIS
887
    well_formed_copy_nchars()
888
    to			     Store result here
889
    to_length                Maxinum length of "to" string
890
    to_cs		     Character set of "to" string
891
    from		     Copy from here
892
    from_length		     Length of from string
893
    from_cs		     From character set
894
    nchars                   Copy not more that nchars characters
895
    well_formed_error_pos    Return position when "from" is not well formed
896
                             or NULL otherwise.
897
    cannot_convert_error_pos Return position where a not convertable
898
                             character met, or NULL otherwise.
899
    from_end_pos             Return position where scanning of "from"
900
                             string stopped.
901
  NOTES
902
903
  RETURN
904
    length of bytes copied to 'to'
905
*/
906
907
205 by Brian Aker
uint32 -> uin32_t
908
uint32_t
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
909
well_formed_copy_nchars(const CHARSET_INFO * const to_cs,
1 by brian
clean slate
910
                        char *to, uint to_length,
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
911
                        const CHARSET_INFO * const from_cs,
1 by brian
clean slate
912
                        const char *from, uint from_length,
913
                        uint nchars,
914
                        const char **well_formed_error_pos,
915
                        const char **cannot_convert_error_pos,
916
                        const char **from_end_pos)
917
{
918
  uint res;
919
920
  if ((to_cs == &my_charset_bin) || 
921
      (from_cs == &my_charset_bin) ||
922
      (to_cs == from_cs) ||
923
      my_charset_same(from_cs, to_cs))
924
  {
925
    if (to_length < to_cs->mbminlen || !nchars)
926
    {
927
      *from_end_pos= from;
928
      *cannot_convert_error_pos= NULL;
929
      *well_formed_error_pos= NULL;
930
      return 0;
931
    }
932
933
    if (to_cs == &my_charset_bin)
934
    {
935
      res= min(min(nchars, to_length), from_length);
936
      memmove(to, from, res);
937
      *from_end_pos= from + res;
938
      *well_formed_error_pos= NULL;
939
      *cannot_convert_error_pos= NULL;
940
    }
941
    else
942
    {
943
      int well_formed_error;
944
      uint from_offset;
945
946
      if ((from_offset= (from_length % to_cs->mbminlen)) &&
947
          (from_cs == &my_charset_bin))
948
      {
949
        /*
950
          Copying from BINARY to UCS2 needs to prepend zeros sometimes:
951
          INSERT INTO t1 (ucs2_column) VALUES (0x01);
952
          0x01 -> 0x0001
953
        */
954
        uint pad_length= to_cs->mbminlen - from_offset;
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
955
        memset(to, 0, pad_length);
1 by brian
clean slate
956
        memmove(to + pad_length, from, from_offset);
957
        nchars--;
958
        from+= from_offset;
959
        from_length-= from_offset;
960
        to+= to_cs->mbminlen;
961
        to_length-= to_cs->mbminlen;
962
      }
963
964
      set_if_smaller(from_length, to_length);
965
      res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
966
                                        nchars, &well_formed_error);
967
      memmove(to, from, res);
968
      *from_end_pos= from + res;
969
      *well_formed_error_pos= well_formed_error ? from + res : NULL;
970
      *cannot_convert_error_pos= NULL;
971
      if (from_offset)
972
        res+= to_cs->mbminlen;
973
    }
974
  }
975
  else
976
  {
977
    int cnvres;
978
    my_wc_t wc;
979
    my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
980
    my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
981
    const uchar *from_end= (const uchar*) from + from_length;
982
    uchar *to_end= (uchar*) to + to_length;
983
    char *to_start= to;
984
    *well_formed_error_pos= NULL;
985
    *cannot_convert_error_pos= NULL;
986
987
    for ( ; nchars; nchars--)
988
    {
989
      const char *from_prev= from;
990
      if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
991
        from+= cnvres;
992
      else if (cnvres == MY_CS_ILSEQ)
993
      {
994
        if (!*well_formed_error_pos)
995
          *well_formed_error_pos= from;
996
        from++;
997
        wc= '?';
998
      }
999
      else if (cnvres > MY_CS_TOOSMALL)
1000
      {
1001
        /*
1002
          A correct multibyte sequence detected
1003
          But it doesn't have Unicode mapping.
1004
        */
1005
        if (!*cannot_convert_error_pos)
1006
          *cannot_convert_error_pos= from;
1007
        from+= (-cnvres);
1008
        wc= '?';
1009
      }
1010
      else
1011
        break;  // Not enough characters
1012
1013
outp:
1014
      if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
1015
        to+= cnvres;
1016
      else if (cnvres == MY_CS_ILUNI && wc != '?')
1017
      {
1018
        if (!*cannot_convert_error_pos)
1019
          *cannot_convert_error_pos= from_prev;
1020
        wc= '?';
1021
        goto outp;
1022
      }
1023
      else
1024
      {
1025
        from= from_prev;
1026
        break;
1027
      }
1028
    }
1029
    *from_end_pos= from;
1030
    res= to - to_start;
1031
  }
205 by Brian Aker
uint32 -> uin32_t
1032
  return (uint32_t) res;
1 by brian
clean slate
1033
}
1034
1035
1036
1037
1038
void String::print(String *str)
1039
{
1040
  char *st= (char*)Ptr, *end= st+str_length;
1041
  for (; st < end; st++)
1042
  {
1043
    uchar c= *st;
1044
    switch (c)
1045
    {
1046
    case '\\':
1047
      str->append(STRING_WITH_LEN("\\\\"));
1048
      break;
1049
    case '\0':
1050
      str->append(STRING_WITH_LEN("\\0"));
1051
      break;
1052
    case '\'':
1053
      str->append(STRING_WITH_LEN("\\'"));
1054
      break;
1055
    case '\n':
1056
      str->append(STRING_WITH_LEN("\\n"));
1057
      break;
1058
    case '\r':
1059
      str->append(STRING_WITH_LEN("\\r"));
1060
      break;
1061
    case '\032': // Ctrl-Z
1062
      str->append(STRING_WITH_LEN("\\Z"));
1063
      break;
1064
    default:
1065
      str->append(c);
1066
    }
1067
  }
1068
}
1069
1070
1071
/*
1072
  Exchange state of this object and argument.
1073
1074
  SYNOPSIS
1075
    String::swap()
1076
1077
  RETURN
1078
    Target string will contain state of this object and vice versa.
1079
*/
1080
1081
void String::swap(String &s)
1082
{
1083
  swap_variables(char *, Ptr, s.Ptr);
205 by Brian Aker
uint32 -> uin32_t
1084
  swap_variables(uint32_t, str_length, s.str_length);
1085
  swap_variables(uint32_t, Alloced_length, s.Alloced_length);
1 by brian
clean slate
1086
  swap_variables(bool, alloced, s.alloced);
264.2.6 by Andrey Hristov
Constify the usage of CHARSET_INFO almost to the last place in the code.
1087
  swap_variables(const CHARSET_INFO *, str_charset, s.str_charset);
1 by brian
clean slate
1088
}