~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/* This file is originally from the mysql distribution. Coded by monty */
17
18
#ifdef USE_PRAGMA_IMPLEMENTATION
19
#pragma implementation				// gcc: Class implementation
20
#endif
21
22
#include <my_global.h>
23
#include <my_sys.h>
24
#include <m_string.h>
25
#include <m_ctype.h>
26
27
/*
28
  The following extern declarations are ok as these are interface functions
29
  required by the string function
30
*/
31
32
extern uchar* sql_alloc(unsigned size);
33
extern void sql_element_free(void *ptr);
34
35
#include "sql_string.h"
36
37
/*****************************************************************************
38
** String functions
39
*****************************************************************************/
40
41
bool String::real_alloc(uint32 arg_length)
42
{
43
  arg_length=ALIGN_SIZE(arg_length+1);
44
  str_length=0;
45
  if (Alloced_length < arg_length)
46
  {
47
    free();
48
    if (!(Ptr=(char*) my_malloc(arg_length,MYF(MY_WME))))
49
      return TRUE;
50
    Alloced_length=arg_length;
51
    alloced=1;
52
  }
53
  Ptr[0]=0;
54
  return FALSE;
55
}
56
57
58
/*
59
** Check that string is big enough. Set string[alloc_length] to 0
60
** (for C functions)
61
*/
62
63
bool String::realloc(uint32 alloc_length)
64
{
65
  uint32 len=ALIGN_SIZE(alloc_length+1);
66
  if (Alloced_length < len)
67
  {
68
    char *new_ptr;
69
    if (alloced)
70
    {
71
      if ((new_ptr= (char*) my_realloc(Ptr,len,MYF(MY_WME))))
72
      {
73
	Ptr=new_ptr;
74
	Alloced_length=len;
75
      }
76
      else
77
	return TRUE;				// Signal error
78
    }
79
    else if ((new_ptr= (char*) my_malloc(len,MYF(MY_WME))))
80
    {
81
      if (str_length)				// Avoid bugs in memcpy on AIX
82
	memcpy(new_ptr,Ptr,str_length);
83
      new_ptr[str_length]=0;
84
      Ptr=new_ptr;
85
      Alloced_length=len;
86
      alloced=1;
87
    }
88
    else
89
      return TRUE;			// Signal error
90
  }
91
  Ptr[alloc_length]=0;			// This make other funcs shorter
92
  return FALSE;
93
}
94
95
bool String::set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs)
96
{
97
  uint l=20*cs->mbmaxlen+1;
98
  int base= unsigned_flag ? 10 : -10;
99
100
  if (alloc(l))
101
    return TRUE;
102
  str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num);
103
  str_charset=cs;
104
  return FALSE;
105
}
106
107
bool String::set_real(double num,uint decimals, CHARSET_INFO *cs)
108
{
109
  char buff[FLOATING_POINT_BUFFER];
110
  uint dummy_errors;
111
  size_t len;
112
113
  str_charset=cs;
114
  if (decimals >= NOT_FIXED_DEC)
115
  {
116
    len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
117
    return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
118
  }
119
  len= my_fcvt(num, decimals, buff, NULL);
120
  return copy(buff, (uint32) len, &my_charset_latin1, cs,
121
              &dummy_errors);
122
}
123
124
125
bool String::copy()
126
{
127
  if (!alloced)
128
  {
129
    Alloced_length=0;				// Force realloc
130
    return realloc(str_length);
131
  }
132
  return FALSE;
133
}
134
135
bool String::copy(const String &str)
136
{
137
  if (alloc(str.str_length))
138
    return TRUE;
139
  str_length=str.str_length;
140
  bmove(Ptr,str.Ptr,str_length);		// May be overlapping
141
  Ptr[str_length]=0;
142
  str_charset=str.str_charset;
143
  return FALSE;
144
}
145
146
bool String::copy(const char *str,uint32 arg_length, CHARSET_INFO *cs)
147
{
148
  if (alloc(arg_length))
149
    return TRUE;
150
  if ((str_length=arg_length))
151
    memcpy(Ptr,str,arg_length);
152
  Ptr[arg_length]=0;
153
  str_charset=cs;
154
  return FALSE;
155
}
156
157
158
/*
159
  Checks that the source string can be just copied to the destination string
160
  without conversion.
161
162
  SYNPOSIS
163
164
  needs_conversion()
165
  arg_length		Length of string to copy.
166
  from_cs		Character set to copy from
167
  to_cs			Character set to copy to
168
  uint32 *offset	Returns number of unaligned characters.
169
170
  RETURN
171
   0  No conversion needed
172
   1  Either character set conversion or adding leading  zeros
173
      (e.g. for UCS-2) must be done
174
175
  NOTE
176
  to_cs may be NULL for "no conversion" if the system variable
177
  character_set_results is NULL.
178
*/
179
180
bool String::needs_conversion(uint32 arg_length,
181
			      CHARSET_INFO *from_cs,
182
			      CHARSET_INFO *to_cs,
183
			      uint32 *offset)
184
{
185
  *offset= 0;
186
  if (!to_cs ||
187
      (to_cs == &my_charset_bin) || 
188
      (to_cs == from_cs) ||
189
      my_charset_same(from_cs, to_cs) ||
190
      ((from_cs == &my_charset_bin) &&
191
       (!(*offset=(arg_length % to_cs->mbminlen)))))
192
    return FALSE;
193
  return TRUE;
194
}
195
196
197
/*
198
  Copy a multi-byte character sets with adding leading zeros.
199
200
  SYNOPSIS
201
202
  copy_aligned()
203
  str			String to copy
204
  arg_length		Length of string. This should NOT be dividable with
205
			cs->mbminlen.
206
  offset		arg_length % cs->mb_minlength
207
  cs			Character set for 'str'
208
209
  NOTES
210
    For real multi-byte, ascii incompatible charactser sets,
211
    like UCS-2, add leading zeros if we have an incomplete character.
212
    Thus, 
213
      SELECT _ucs2 0xAA 
214
    will automatically be converted into
215
      SELECT _ucs2 0x00AA
216
217
  RETURN
218
    0  ok
219
    1  error
220
*/
221
222
bool String::copy_aligned(const char *str,uint32 arg_length, uint32 offset,
223
			  CHARSET_INFO *cs)
224
{
225
  /* How many bytes are in incomplete character */
226
  offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */
227
  DBUG_ASSERT(offset && offset != cs->mbmaxlen);
228
229
  uint32 aligned_length= arg_length + offset;
230
  if (alloc(aligned_length))
231
    return TRUE;
232
  
233
  /*
234
    Note, this is only safe for big-endian UCS-2.
235
    If we add little-endian UCS-2 sometimes, this code
236
    will be more complicated. But it's OK for now.
237
  */
238
  bzero((char*) Ptr, offset);
239
  memcpy(Ptr + offset, str, arg_length);
240
  Ptr[aligned_length]=0;
241
  /* str_length is always >= 0 as arg_length is != 0 */
242
  str_length= aligned_length;
243
  str_charset= cs;
244
  return FALSE;
245
}
246
247
248
bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
249
				 CHARSET_INFO *cs)
250
{
251
  /* How many bytes are in incomplete character */
252
  uint32 offset= (arg_length % cs->mbminlen); 
253
  
254
  if (!offset) /* All characters are complete, just copy */
255
  {
256
    set(str, arg_length, cs);
257
    return FALSE;
258
  }
259
  return copy_aligned(str, arg_length, offset, cs);
260
}
261
262
	/* Copy with charset conversion */
263
264
bool String::copy(const char *str, uint32 arg_length,
265
		  CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors)
266
{
267
  uint32 offset;
268
  if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
269
  {
270
    *errors= 0;
271
    return copy(str, arg_length, to_cs);
272
  }
273
  if ((from_cs == &my_charset_bin) && offset)
274
  {
275
    *errors= 0;
276
    return copy_aligned(str, arg_length, offset, to_cs);
277
  }
278
  uint32 new_length= to_cs->mbmaxlen*arg_length;
279
  if (alloc(new_length))
280
    return TRUE;
281
  str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
282
                              str, arg_length, from_cs, errors);
283
  str_charset=to_cs;
284
  return FALSE;
285
}
286
287
288
/*
289
  Set a string to the value of a latin1-string, keeping the original charset
290
  
291
  SYNOPSIS
292
    copy_or_set()
293
    str			String of a simple charset (latin1)
294
    arg_length		Length of string
295
296
  IMPLEMENTATION
297
    If string object is of a simple character set, set it to point to the
298
    given string.
299
    If not, make a copy and convert it to the new character set.
300
301
  RETURN
302
    0	ok
303
    1	Could not allocate result buffer
304
305
*/
306
307
bool String::set_ascii(const char *str, uint32 arg_length)
308
{
309
  if (str_charset->mbminlen == 1)
310
  {
311
    set(str, arg_length, str_charset);
312
    return 0;
313
  }
314
  uint dummy_errors;
315
  return copy(str, arg_length, &my_charset_latin1, str_charset, &dummy_errors);
316
}
317
318
319
/* This is used by mysql.cc */
320
321
bool String::fill(uint32 max_length,char fill_char)
322
{
323
  if (str_length > max_length)
324
    Ptr[str_length=max_length]=0;
325
  else
326
  {
327
    if (realloc(max_length))
328
      return TRUE;
329
    bfill(Ptr+str_length,max_length-str_length,fill_char);
330
    str_length=max_length;
331
  }
332
  return FALSE;
333
}
334
335
void String::strip_sp()
336
{
337
   while (str_length && my_isspace(str_charset,Ptr[str_length-1]))
338
    str_length--;
339
}
340
341
bool String::append(const String &s)
342
{
343
  if (s.length())
344
  {
345
    if (realloc(str_length+s.length()))
346
      return TRUE;
347
    memcpy(Ptr+str_length,s.ptr(),s.length());
348
    str_length+=s.length();
349
  }
350
  return FALSE;
351
}
352
353
354
/*
355
  Append an ASCII string to the a string of the current character set
356
*/
357
358
bool String::append(const char *s,uint32 arg_length)
359
{
360
  if (!arg_length)
361
    return FALSE;
362
363
  /*
364
    For an ASCII incompatible string, e.g. UCS-2, we need to convert
365
  */
366
  if (str_charset->mbminlen > 1)
367
  {
368
    uint32 add_length=arg_length * str_charset->mbmaxlen;
369
    uint dummy_errors;
370
    if (realloc(str_length+ add_length))
371
      return TRUE;
372
    str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
373
				  s, arg_length, &my_charset_latin1,
374
                                  &dummy_errors);
375
    return FALSE;
376
  }
377
378
  /*
379
    For an ASCII compatinble string we can just append.
380
  */
381
  if (realloc(str_length+arg_length))
382
    return TRUE;
383
  memcpy(Ptr+str_length,s,arg_length);
384
  str_length+=arg_length;
385
  return FALSE;
386
}
387
388
389
/*
390
  Append a 0-terminated ASCII string
391
*/
392
393
bool String::append(const char *s)
394
{
395
  return append(s, strlen(s));
396
}
397
398
399
/*
400
  Append a string in the given charset to the string
401
  with character set recoding
402
*/
403
404
bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs)
405
{
406
  uint32 dummy_offset;
407
  
408
  if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
409
  {
410
    uint32 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
411
    uint dummy_errors;
412
    if (realloc(str_length + add_length)) 
413
      return TRUE;
414
    str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
415
				  s, arg_length, cs, &dummy_errors);
416
  }
417
  else
418
  {
419
    if (realloc(str_length + arg_length)) 
420
      return TRUE;
421
    memcpy(Ptr + str_length, s, arg_length);
422
    str_length+= arg_length;
423
  }
424
  return FALSE;
425
}
426
427
428
bool String::append(IO_CACHE* file, uint32 arg_length)
429
{
430
  if (realloc(str_length+arg_length))
431
    return TRUE;
432
  if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
433
  {
434
    shrink(str_length);
435
    return TRUE;
436
  }
437
  str_length+=arg_length;
438
  return FALSE;
439
}
440
441
bool String::append_with_prefill(const char *s,uint32 arg_length,
442
		 uint32 full_length, char fill_char)
443
{
444
  int t_length= arg_length > full_length ? arg_length : full_length;
445
446
  if (realloc(str_length + t_length))
447
    return TRUE;
448
  t_length= full_length - arg_length;
449
  if (t_length > 0)
450
  {
451
    bfill(Ptr+str_length, t_length, fill_char);
452
    str_length=str_length + t_length;
453
  }
454
  append(s, arg_length);
455
  return FALSE;
456
}
457
458
uint32 String::numchars()
459
{
460
  return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
461
}
462
463
int String::charpos(int i,uint32 offset)
464
{
465
  if (i <= 0)
466
    return i;
467
  return str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,i);
468
}
469
470
int String::strstr(const String &s,uint32 offset)
471
{
472
  if (s.length()+offset <= str_length)
473
  {
474
    if (!s.length())
475
      return ((int) offset);	// Empty string is always found
476
477
    register const char *str = Ptr+offset;
478
    register const char *search=s.ptr();
479
    const char *end=Ptr+str_length-s.length()+1;
480
    const char *search_end=s.ptr()+s.length();
481
skip:
482
    while (str != end)
483
    {
484
      if (*str++ == *search)
485
      {
486
	register char *i,*j;
487
	i=(char*) str; j=(char*) search+1;
488
	while (j != search_end)
489
	  if (*i++ != *j++) goto skip;
490
	return (int) (str-Ptr) -1;
491
      }
492
    }
493
  }
494
  return -1;
495
}
496
497
/*
498
** Search string from end. Offset is offset to the end of string
499
*/
500
501
int String::strrstr(const String &s,uint32 offset)
502
{
503
  if (s.length() <= offset && offset <= str_length)
504
  {
505
    if (!s.length())
506
      return offset;				// Empty string is always found
507
    register const char *str = Ptr+offset-1;
508
    register const char *search=s.ptr()+s.length()-1;
509
510
    const char *end=Ptr+s.length()-2;
511
    const char *search_end=s.ptr()-1;
512
skip:
513
    while (str != end)
514
    {
515
      if (*str-- == *search)
516
      {
517
	register char *i,*j;
518
	i=(char*) str; j=(char*) search-1;
519
	while (j != search_end)
520
	  if (*i-- != *j--) goto skip;
521
	return (int) (i-Ptr) +1;
522
      }
523
    }
524
  }
525
  return -1;
526
}
527
528
/*
529
  Replace substring with string
530
  If wrong parameter or not enough memory, do nothing
531
*/
532
533
bool String::replace(uint32 offset,uint32 arg_length,const String &to)
534
{
535
  return replace(offset,arg_length,to.ptr(),to.length());
536
}
537
538
bool String::replace(uint32 offset,uint32 arg_length,
539
                     const char *to, uint32 to_length)
540
{
541
  long diff = (long) to_length-(long) arg_length;
542
  if (offset+arg_length <= str_length)
543
  {
544
    if (diff < 0)
545
    {
546
      if (to_length)
547
	memcpy(Ptr+offset,to,to_length);
548
      bmove(Ptr+offset+to_length,Ptr+offset+arg_length,
549
	    str_length-offset-arg_length);
550
    }
551
    else
552
    {
553
      if (diff)
554
      {
555
	if (realloc(str_length+(uint32) diff))
556
	  return TRUE;
557
	bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
558
		  str_length-offset-arg_length);
559
      }
560
      if (to_length)
561
	memcpy(Ptr+offset,to,to_length);
562
    }
563
    str_length+=(uint32) diff;
564
  }
565
  return FALSE;
566
}
567
568
569
// added by Holyfoot for "geometry" needs
570
int String::reserve(uint32 space_needed, uint32 grow_by)
571
{
572
  if (Alloced_length < str_length + space_needed)
573
  {
574
    if (realloc(Alloced_length + max(space_needed, grow_by) - 1))
575
      return TRUE;
576
  }
577
  return FALSE;
578
}
579
580
void String::qs_append(const char *str, uint32 len)
581
{
582
  memcpy(Ptr + str_length, str, len + 1);
583
  str_length += len;
584
}
585
586
void String::qs_append(double d)
587
{
588
  char *buff = Ptr + str_length;
589
  str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff, NULL);
590
}
591
592
void String::qs_append(double *d)
593
{
594
  double ld;
595
  float8get(ld, (char*) d);
596
  qs_append(ld);
597
}
598
599
void String::qs_append(int i)
600
{
601
  char *buff= Ptr + str_length;
602
  char *end= int10_to_str(i, buff, -10);
603
  str_length+= (int) (end-buff);
604
}
605
606
void String::qs_append(uint i)
607
{
608
  char *buff= Ptr + str_length;
609
  char *end= int10_to_str(i, buff, 10);
610
  str_length+= (int) (end-buff);
611
}
612
613
/*
614
  Compare strings according to collation, without end space.
615
616
  SYNOPSIS
617
    sortcmp()
618
    s		First string
619
    t		Second string
620
    cs		Collation
621
622
  NOTE:
623
    Normally this is case sensitive comparison
624
625
  RETURN
626
  < 0	s < t
627
  0	s == t
628
  > 0	s > t
629
*/
630
631
632
int sortcmp(const String *s,const String *t, CHARSET_INFO *cs)
633
{
634
 return cs->coll->strnncollsp(cs,
635
                              (uchar *) s->ptr(),s->length(),
636
                              (uchar *) t->ptr(),t->length(), 0);
637
}
638
639
640
/*
641
  Compare strings byte by byte. End spaces are also compared.
642
643
  SYNOPSIS
644
    stringcmp()
645
    s		First string
646
    t		Second string
647
648
  NOTE:
649
    Strings are compared as a stream of uchars
650
651
  RETURN
652
  < 0	s < t
653
  0	s == t
654
  > 0	s > t
655
*/
656
657
658
int stringcmp(const String *s,const String *t)
659
{
660
  uint32 s_len=s->length(),t_len=t->length(),len=min(s_len,t_len);
661
  int cmp= memcmp(s->ptr(), t->ptr(), len);
662
  return (cmp) ? cmp : (int) (s_len - t_len);
663
}
664
665
666
String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
667
{
668
  if (from->Alloced_length >= from_length)
669
    return from;
670
  if (from->alloced || !to || from == to)
671
  {
672
    (void) from->realloc(from_length);
673
    return from;
674
  }
675
  if (to->realloc(from_length))
676
    return from;				// Actually an error
677
  if ((to->str_length=min(from->str_length,from_length)))
678
    memcpy(to->Ptr,from->Ptr,to->str_length);
679
  to->str_charset=from->str_charset;
680
  return to;
681
}
682
683
684
/****************************************************************************
685
  Help functions
686
****************************************************************************/
687
688
/*
689
  copy a string from one character set to another
690
  
691
  SYNOPSIS
692
    copy_and_convert()
693
    to			Store result here
694
    to_cs		Character set of result string
695
    from		Copy from here
696
    from_length		Length of from string
697
    from_cs		From character set
698
699
  NOTES
700
    'to' must be big enough as form_length * to_cs->mbmaxlen
701
702
  RETURN
703
    length of bytes copied to 'to'
704
*/
705
706
707
static uint32
708
copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
709
                          const char *from, uint32 from_length,
710
                          CHARSET_INFO *from_cs,
711
                          uint *errors)
712
{
713
  int         cnvres;
714
  my_wc_t     wc;
715
  const uchar *from_end= (const uchar*) from+from_length;
716
  char *to_start= to;
717
  uchar *to_end= (uchar*) to+to_length;
718
  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
719
  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
720
  uint error_count= 0;
721
722
  while (1)
723
  {
724
    if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
725
				      from_end)) > 0)
726
      from+= cnvres;
727
    else if (cnvres == MY_CS_ILSEQ)
728
    {
729
      error_count++;
730
      from++;
731
      wc= '?';
732
    }
733
    else if (cnvres > MY_CS_TOOSMALL)
734
    {
735
      /*
736
        A correct multibyte sequence detected
737
        But it doesn't have Unicode mapping.
738
      */
739
      error_count++;
740
      from+= (-cnvres);
741
      wc= '?';
742
    }
743
    else
744
      break;  // Not enough characters
745
746
outp:
747
    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
748
      to+= cnvres;
749
    else if (cnvres == MY_CS_ILUNI && wc != '?')
750
    {
751
      error_count++;
752
      wc= '?';
753
      goto outp;
754
    }
755
    else
756
      break;
757
  }
758
  *errors= error_count;
759
  return (uint32) (to - to_start);
760
}
761
762
763
/*
764
  Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
765
*/
766
uint32
767
copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs, 
768
                 const char *from, uint32 from_length, CHARSET_INFO *from_cs,
769
                 uint *errors)
770
{
771
  /*
772
    If any of the character sets is not ASCII compatible,
773
    immediately switch to slow mb_wc->wc_mb method.
774
  */
775
  if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
776
    return copy_and_convert_extended(to, to_length, to_cs,
777
                                     from, from_length, from_cs, errors);
778
779
  uint32 length= min(to_length, from_length), length2= length;
780
781
#if defined(__i386__)
782
  /*
783
    Special loop for i386, it allows to refer to a
784
    non-aligned memory block as UINT32, which makes
785
    it possible to copy four bytes at once. This
786
    gives about 10% performance improvement comparing
787
    to byte-by-byte loop.
788
  */
789
  for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
790
  {
791
    if ((*(uint32*)from) & 0x80808080)
792
      break;
793
    *((uint32*) to)= *((const uint32*) from);
794
  }
795
#endif
796
797
  for (; ; *to++= *from++, length--)
798
  {
799
    if (!length)
800
    {
801
      *errors= 0;
802
      return length2;
803
    }
804
    if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
805
    {
806
      uint32 copied_length= length2 - length;
807
      to_length-= copied_length;
808
      from_length-= copied_length;
809
      return copied_length + copy_and_convert_extended(to, to_length,
810
                                                       to_cs,
811
                                                       from, from_length,
812
                                                       from_cs,
813
                                                       errors);
814
    }
815
  }
816
817
  DBUG_ASSERT(FALSE); // Should never get to here
818
  return 0;           // Make compiler happy
819
}
820
821
822
/**
823
  Copy string with HEX-encoding of "bad" characters.
824
825
  @details This functions copies the string pointed by "src"
826
  to the string pointed by "dst". Not more than "srclen" bytes
827
  are read from "src". Any sequences of bytes representing
828
  a not-well-formed substring (according to cs) are hex-encoded,
829
  and all well-formed substrings (according to cs) are copied as is.
830
  Not more than "dstlen" bytes are written to "dst". The number 
831
  of bytes written to "dst" is returned.
832
  
833
   @param      cs       character set pointer of the destination string
834
   @param[out] dst      destination string
835
   @param      dstlen   size of dst
836
   @param      src      source string
837
   @param      srclen   length of src
838
839
   @retval     result length
840
*/
841
842
size_t
843
my_copy_with_hex_escaping(CHARSET_INFO *cs,
844
                          char *dst, size_t dstlen,
845
                          const char *src, size_t srclen)
846
{
847
  const char *srcend= src + srclen;
848
  char *dst0= dst;
849
850
  for ( ; src < srcend ; )
851
  {
852
    size_t chlen;
853
    if ((chlen= my_ismbchar(cs, src, srcend)))
854
    {
855
      if (dstlen < chlen)
856
        break; /* purecov: inspected */
857
      memcpy(dst, src, chlen);
858
      src+= chlen;
859
      dst+= chlen;
860
      dstlen-= chlen;
861
    }
862
    else if (*src & 0x80)
863
    {
864
      if (dstlen < 4)
865
        break; /* purecov: inspected */
866
      *dst++= '\\';
867
      *dst++= 'x';
868
      *dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
869
      *dst++= _dig_vec_upper[((unsigned char) *src) & 15];
870
      src++;
871
      dstlen-= 4;
872
    }
873
    else
874
    {
875
      if (dstlen < 1)
876
        break; /* purecov: inspected */
877
      *dst++= *src++;
878
      dstlen--;
879
    }
880
  }
881
  return dst - dst0;
882
}
883
884
/*
885
  copy a string,
886
  with optional character set conversion,
887
  with optional left padding (for binary -> UCS2 conversion)
888
  
889
  SYNOPSIS
890
    well_formed_copy_nchars()
891
    to			     Store result here
892
    to_length                Maxinum length of "to" string
893
    to_cs		     Character set of "to" string
894
    from		     Copy from here
895
    from_length		     Length of from string
896
    from_cs		     From character set
897
    nchars                   Copy not more that nchars characters
898
    well_formed_error_pos    Return position when "from" is not well formed
899
                             or NULL otherwise.
900
    cannot_convert_error_pos Return position where a not convertable
901
                             character met, or NULL otherwise.
902
    from_end_pos             Return position where scanning of "from"
903
                             string stopped.
904
  NOTES
905
906
  RETURN
907
    length of bytes copied to 'to'
908
*/
909
910
911
uint32
912
well_formed_copy_nchars(CHARSET_INFO *to_cs,
913
                        char *to, uint to_length,
914
                        CHARSET_INFO *from_cs,
915
                        const char *from, uint from_length,
916
                        uint nchars,
917
                        const char **well_formed_error_pos,
918
                        const char **cannot_convert_error_pos,
919
                        const char **from_end_pos)
920
{
921
  uint res;
922
923
  if ((to_cs == &my_charset_bin) || 
924
      (from_cs == &my_charset_bin) ||
925
      (to_cs == from_cs) ||
926
      my_charset_same(from_cs, to_cs))
927
  {
928
    if (to_length < to_cs->mbminlen || !nchars)
929
    {
930
      *from_end_pos= from;
931
      *cannot_convert_error_pos= NULL;
932
      *well_formed_error_pos= NULL;
933
      return 0;
934
    }
935
936
    if (to_cs == &my_charset_bin)
937
    {
938
      res= min(min(nchars, to_length), from_length);
939
      memmove(to, from, res);
940
      *from_end_pos= from + res;
941
      *well_formed_error_pos= NULL;
942
      *cannot_convert_error_pos= NULL;
943
    }
944
    else
945
    {
946
      int well_formed_error;
947
      uint from_offset;
948
949
      if ((from_offset= (from_length % to_cs->mbminlen)) &&
950
          (from_cs == &my_charset_bin))
951
      {
952
        /*
953
          Copying from BINARY to UCS2 needs to prepend zeros sometimes:
954
          INSERT INTO t1 (ucs2_column) VALUES (0x01);
955
          0x01 -> 0x0001
956
        */
957
        uint pad_length= to_cs->mbminlen - from_offset;
958
        bzero(to, pad_length);
959
        memmove(to + pad_length, from, from_offset);
960
        nchars--;
961
        from+= from_offset;
962
        from_length-= from_offset;
963
        to+= to_cs->mbminlen;
964
        to_length-= to_cs->mbminlen;
965
      }
966
967
      set_if_smaller(from_length, to_length);
968
      res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
969
                                        nchars, &well_formed_error);
970
      memmove(to, from, res);
971
      *from_end_pos= from + res;
972
      *well_formed_error_pos= well_formed_error ? from + res : NULL;
973
      *cannot_convert_error_pos= NULL;
974
      if (from_offset)
975
        res+= to_cs->mbminlen;
976
    }
977
  }
978
  else
979
  {
980
    int cnvres;
981
    my_wc_t wc;
982
    my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
983
    my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
984
    const uchar *from_end= (const uchar*) from + from_length;
985
    uchar *to_end= (uchar*) to + to_length;
986
    char *to_start= to;
987
    *well_formed_error_pos= NULL;
988
    *cannot_convert_error_pos= NULL;
989
990
    for ( ; nchars; nchars--)
991
    {
992
      const char *from_prev= from;
993
      if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
994
        from+= cnvres;
995
      else if (cnvres == MY_CS_ILSEQ)
996
      {
997
        if (!*well_formed_error_pos)
998
          *well_formed_error_pos= from;
999
        from++;
1000
        wc= '?';
1001
      }
1002
      else if (cnvres > MY_CS_TOOSMALL)
1003
      {
1004
        /*
1005
          A correct multibyte sequence detected
1006
          But it doesn't have Unicode mapping.
1007
        */
1008
        if (!*cannot_convert_error_pos)
1009
          *cannot_convert_error_pos= from;
1010
        from+= (-cnvres);
1011
        wc= '?';
1012
      }
1013
      else
1014
        break;  // Not enough characters
1015
1016
outp:
1017
      if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
1018
        to+= cnvres;
1019
      else if (cnvres == MY_CS_ILUNI && wc != '?')
1020
      {
1021
        if (!*cannot_convert_error_pos)
1022
          *cannot_convert_error_pos= from_prev;
1023
        wc= '?';
1024
        goto outp;
1025
      }
1026
      else
1027
      {
1028
        from= from_prev;
1029
        break;
1030
      }
1031
    }
1032
    *from_end_pos= from;
1033
    res= to - to_start;
1034
  }
1035
  return (uint32) res;
1036
}
1037
1038
1039
1040
1041
void String::print(String *str)
1042
{
1043
  char *st= (char*)Ptr, *end= st+str_length;
1044
  for (; st < end; st++)
1045
  {
1046
    uchar c= *st;
1047
    switch (c)
1048
    {
1049
    case '\\':
1050
      str->append(STRING_WITH_LEN("\\\\"));
1051
      break;
1052
    case '\0':
1053
      str->append(STRING_WITH_LEN("\\0"));
1054
      break;
1055
    case '\'':
1056
      str->append(STRING_WITH_LEN("\\'"));
1057
      break;
1058
    case '\n':
1059
      str->append(STRING_WITH_LEN("\\n"));
1060
      break;
1061
    case '\r':
1062
      str->append(STRING_WITH_LEN("\\r"));
1063
      break;
1064
    case '\032': // Ctrl-Z
1065
      str->append(STRING_WITH_LEN("\\Z"));
1066
      break;
1067
    default:
1068
      str->append(c);
1069
    }
1070
  }
1071
}
1072
1073
1074
/*
1075
  Exchange state of this object and argument.
1076
1077
  SYNOPSIS
1078
    String::swap()
1079
1080
  RETURN
1081
    Target string will contain state of this object and vice versa.
1082
*/
1083
1084
void String::swap(String &s)
1085
{
1086
  swap_variables(char *, Ptr, s.Ptr);
1087
  swap_variables(uint32, str_length, s.str_length);
1088
  swap_variables(uint32, Alloced_length, s.Alloced_length);
1089
  swap_variables(bool, alloced, s.alloced);
1090
  swap_variables(CHARSET_INFO*, str_charset, s.str_charset);
1091
}