1
/* Copyright (C) 2000 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
/* This file is originally from the mysql distribution. Coded by monty */
18
#ifdef USE_PRAGMA_IMPLEMENTATION
19
#pragma implementation // gcc: Class implementation
22
#include <my_global.h>
27
#include <floatingpoint.h>
31
The following extern declarations are ok as these are interface functions
32
required by the string function
35
extern uchar* sql_alloc(unsigned size);
36
extern void sql_element_free(void *ptr);
38
#include "sql_string.h"
40
/*****************************************************************************
42
*****************************************************************************/
44
bool String::real_alloc(uint32 arg_length)
46
arg_length=ALIGN_SIZE(arg_length+1);
48
if (Alloced_length < arg_length)
51
if (!(Ptr=(char*) my_malloc(arg_length,MYF(MY_WME))))
53
Alloced_length=arg_length;
62
** Check that string is big enough. Set string[alloc_length] to 0
66
bool String::realloc(uint32 alloc_length)
68
uint32 len=ALIGN_SIZE(alloc_length+1);
69
if (Alloced_length < len)
74
if ((new_ptr= (char*) my_realloc(Ptr,len,MYF(MY_WME))))
80
return TRUE; // Signal error
82
else if ((new_ptr= (char*) my_malloc(len,MYF(MY_WME))))
84
if (str_length) // Avoid bugs in memcpy on AIX
85
memcpy(new_ptr,Ptr,str_length);
86
new_ptr[str_length]=0;
92
return TRUE; // Signal error
94
Ptr[alloc_length]=0; // This make other funcs shorter
98
bool String::set_int(longlong num, bool unsigned_flag, CHARSET_INFO *cs)
100
uint l=20*cs->mbmaxlen+1;
101
int base= unsigned_flag ? 10 : -10;
105
str_length=(uint32) (cs->cset->longlong10_to_str)(cs,Ptr,l,base,num);
110
bool String::set_real(double num,uint decimals, CHARSET_INFO *cs)
112
char buff[FLOATING_POINT_BUFFER];
117
if (decimals >= NOT_FIXED_DEC)
119
len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
120
return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
122
len= my_fcvt(num, decimals, buff, NULL);
123
return copy(buff, (uint32) len, &my_charset_latin1, cs,
132
Alloced_length=0; // Force realloc
133
return realloc(str_length);
138
bool String::copy(const String &str)
140
if (alloc(str.str_length))
142
str_length=str.str_length;
143
bmove(Ptr,str.Ptr,str_length); // May be overlapping
145
str_charset=str.str_charset;
149
bool String::copy(const char *str,uint32 arg_length, CHARSET_INFO *cs)
151
if (alloc(arg_length))
153
if ((str_length=arg_length))
154
memcpy(Ptr,str,arg_length);
162
Checks that the source string can be just copied to the destination string
168
arg_length Length of string to copy.
169
from_cs Character set to copy from
170
to_cs Character set to copy to
171
uint32 *offset Returns number of unaligned characters.
174
0 No conversion needed
175
1 Either character set conversion or adding leading zeros
176
(e.g. for UCS-2) must be done
179
to_cs may be NULL for "no conversion" if the system variable
180
character_set_results is NULL.
183
bool String::needs_conversion(uint32 arg_length,
184
CHARSET_INFO *from_cs,
190
(to_cs == &my_charset_bin) ||
191
(to_cs == from_cs) ||
192
my_charset_same(from_cs, to_cs) ||
193
((from_cs == &my_charset_bin) &&
194
(!(*offset=(arg_length % to_cs->mbminlen)))))
201
Copy a multi-byte character sets with adding leading zeros.
207
arg_length Length of string. This should NOT be dividable with
209
offset arg_length % cs->mb_minlength
210
cs Character set for 'str'
213
For real multi-byte, ascii incompatible charactser sets,
214
like UCS-2, add leading zeros if we have an incomplete character.
217
will automatically be converted into
225
bool String::copy_aligned(const char *str,uint32 arg_length, uint32 offset,
228
/* How many bytes are in incomplete character */
229
offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */
230
DBUG_ASSERT(offset && offset != cs->mbmaxlen);
232
uint32 aligned_length= arg_length + offset;
233
if (alloc(aligned_length))
237
Note, this is only safe for big-endian UCS-2.
238
If we add little-endian UCS-2 sometimes, this code
239
will be more complicated. But it's OK for now.
241
bzero((char*) Ptr, offset);
242
memcpy(Ptr + offset, str, arg_length);
243
Ptr[aligned_length]=0;
244
/* str_length is always >= 0 as arg_length is != 0 */
245
str_length= aligned_length;
251
bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
254
/* How many bytes are in incomplete character */
255
uint32 offset= (arg_length % cs->mbminlen);
257
if (!offset) /* All characters are complete, just copy */
259
set(str, arg_length, cs);
262
return copy_aligned(str, arg_length, offset, cs);
265
/* Copy with charset conversion */
267
bool String::copy(const char *str, uint32 arg_length,
268
CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors)
271
if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
274
return copy(str, arg_length, to_cs);
276
if ((from_cs == &my_charset_bin) && offset)
279
return copy_aligned(str, arg_length, offset, to_cs);
281
uint32 new_length= to_cs->mbmaxlen*arg_length;
282
if (alloc(new_length))
284
str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
285
str, arg_length, from_cs, errors);
292
Set a string to the value of a latin1-string, keeping the original charset
296
str String of a simple charset (latin1)
297
arg_length Length of string
300
If string object is of a simple character set, set it to point to the
302
If not, make a copy and convert it to the new character set.
306
1 Could not allocate result buffer
310
bool String::set_ascii(const char *str, uint32 arg_length)
312
if (str_charset->mbminlen == 1)
314
set(str, arg_length, str_charset);
318
return copy(str, arg_length, &my_charset_latin1, str_charset, &dummy_errors);
322
/* This is used by mysql.cc */
324
bool String::fill(uint32 max_length,char fill_char)
326
if (str_length > max_length)
327
Ptr[str_length=max_length]=0;
330
if (realloc(max_length))
332
bfill(Ptr+str_length,max_length-str_length,fill_char);
333
str_length=max_length;
338
void String::strip_sp()
340
while (str_length && my_isspace(str_charset,Ptr[str_length-1]))
344
bool String::append(const String &s)
348
if (realloc(str_length+s.length()))
350
memcpy(Ptr+str_length,s.ptr(),s.length());
351
str_length+=s.length();
358
Append an ASCII string to the a string of the current character set
361
bool String::append(const char *s,uint32 arg_length)
367
For an ASCII incompatible string, e.g. UCS-2, we need to convert
369
if (str_charset->mbminlen > 1)
371
uint32 add_length=arg_length * str_charset->mbmaxlen;
373
if (realloc(str_length+ add_length))
375
str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
376
s, arg_length, &my_charset_latin1,
382
For an ASCII compatinble string we can just append.
384
if (realloc(str_length+arg_length))
386
memcpy(Ptr+str_length,s,arg_length);
387
str_length+=arg_length;
393
Append a 0-terminated ASCII string
396
bool String::append(const char *s)
398
return append(s, strlen(s));
403
Append a string in the given charset to the string
404
with character set recoding
407
bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs)
411
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
413
uint32 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
415
if (realloc(str_length + add_length))
417
str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
418
s, arg_length, cs, &dummy_errors);
422
if (realloc(str_length + arg_length))
424
memcpy(Ptr + str_length, s, arg_length);
425
str_length+= arg_length;
432
bool String::append(FILE* file, uint32 arg_length, myf my_flags)
434
if (realloc(str_length+arg_length))
436
if (my_fread(file, (uchar*) Ptr + str_length, arg_length, my_flags))
441
str_length+=arg_length;
446
bool String::append(IO_CACHE* file, uint32 arg_length)
448
if (realloc(str_length+arg_length))
450
if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
455
str_length+=arg_length;
459
bool String::append_with_prefill(const char *s,uint32 arg_length,
460
uint32 full_length, char fill_char)
462
int t_length= arg_length > full_length ? arg_length : full_length;
464
if (realloc(str_length + t_length))
466
t_length= full_length - arg_length;
469
bfill(Ptr+str_length, t_length, fill_char);
470
str_length=str_length + t_length;
472
append(s, arg_length);
476
uint32 String::numchars()
478
return str_charset->cset->numchars(str_charset, Ptr, Ptr+str_length);
481
int String::charpos(int i,uint32 offset)
485
return str_charset->cset->charpos(str_charset,Ptr+offset,Ptr+str_length,i);
488
int String::strstr(const String &s,uint32 offset)
490
if (s.length()+offset <= str_length)
493
return ((int) offset); // Empty string is always found
495
register const char *str = Ptr+offset;
496
register const char *search=s.ptr();
497
const char *end=Ptr+str_length-s.length()+1;
498
const char *search_end=s.ptr()+s.length();
502
if (*str++ == *search)
505
i=(char*) str; j=(char*) search+1;
506
while (j != search_end)
507
if (*i++ != *j++) goto skip;
508
return (int) (str-Ptr) -1;
516
** Search string from end. Offset is offset to the end of string
519
int String::strrstr(const String &s,uint32 offset)
521
if (s.length() <= offset && offset <= str_length)
524
return offset; // Empty string is always found
525
register const char *str = Ptr+offset-1;
526
register const char *search=s.ptr()+s.length()-1;
528
const char *end=Ptr+s.length()-2;
529
const char *search_end=s.ptr()-1;
533
if (*str-- == *search)
536
i=(char*) str; j=(char*) search-1;
537
while (j != search_end)
538
if (*i-- != *j--) goto skip;
539
return (int) (i-Ptr) +1;
547
Replace substring with string
548
If wrong parameter or not enough memory, do nothing
551
bool String::replace(uint32 offset,uint32 arg_length,const String &to)
553
return replace(offset,arg_length,to.ptr(),to.length());
556
bool String::replace(uint32 offset,uint32 arg_length,
557
const char *to, uint32 to_length)
559
long diff = (long) to_length-(long) arg_length;
560
if (offset+arg_length <= str_length)
565
memcpy(Ptr+offset,to,to_length);
566
bmove(Ptr+offset+to_length,Ptr+offset+arg_length,
567
str_length-offset-arg_length);
573
if (realloc(str_length+(uint32) diff))
575
bmove_upp((uchar*) Ptr+str_length+diff, (uchar*) Ptr+str_length,
576
str_length-offset-arg_length);
579
memcpy(Ptr+offset,to,to_length);
581
str_length+=(uint32) diff;
587
// added by Holyfoot for "geometry" needs
588
int String::reserve(uint32 space_needed, uint32 grow_by)
590
if (Alloced_length < str_length + space_needed)
592
if (realloc(Alloced_length + max(space_needed, grow_by) - 1))
598
void String::qs_append(const char *str, uint32 len)
600
memcpy(Ptr + str_length, str, len + 1);
604
void String::qs_append(double d)
606
char *buff = Ptr + str_length;
607
str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff, NULL);
610
void String::qs_append(double *d)
613
float8get(ld, (char*) d);
617
void String::qs_append(int i)
619
char *buff= Ptr + str_length;
620
char *end= int10_to_str(i, buff, -10);
621
str_length+= (int) (end-buff);
624
void String::qs_append(uint i)
626
char *buff= Ptr + str_length;
627
char *end= int10_to_str(i, buff, 10);
628
str_length+= (int) (end-buff);
632
Compare strings according to collation, without end space.
641
Normally this is case sensitive comparison
650
int sortcmp(const String *s,const String *t, CHARSET_INFO *cs)
652
return cs->coll->strnncollsp(cs,
653
(uchar *) s->ptr(),s->length(),
654
(uchar *) t->ptr(),t->length(), 0);
659
Compare strings byte by byte. End spaces are also compared.
667
Strings are compared as a stream of uchars
676
int stringcmp(const String *s,const String *t)
678
uint32 s_len=s->length(),t_len=t->length(),len=min(s_len,t_len);
679
int cmp= memcmp(s->ptr(), t->ptr(), len);
680
return (cmp) ? cmp : (int) (s_len - t_len);
684
String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
686
if (from->Alloced_length >= from_length)
688
if (from->alloced || !to || from == to)
690
(void) from->realloc(from_length);
693
if (to->realloc(from_length))
694
return from; // Actually an error
695
if ((to->str_length=min(from->str_length,from_length)))
696
memcpy(to->Ptr,from->Ptr,to->str_length);
697
to->str_charset=from->str_charset;
702
/****************************************************************************
704
****************************************************************************/
707
copy a string from one character set to another
712
to_cs Character set of result string
714
from_length Length of from string
715
from_cs From character set
718
'to' must be big enough as form_length * to_cs->mbmaxlen
721
length of bytes copied to 'to'
726
copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs,
727
const char *from, uint32 from_length,
728
CHARSET_INFO *from_cs,
733
const uchar *from_end= (const uchar*) from+from_length;
735
uchar *to_end= (uchar*) to+to_length;
736
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
737
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
742
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
745
else if (cnvres == MY_CS_ILSEQ)
751
else if (cnvres > MY_CS_TOOSMALL)
754
A correct multibyte sequence detected
755
But it doesn't have Unicode mapping.
762
break; // Not enough characters
765
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
767
else if (cnvres == MY_CS_ILUNI && wc != '?')
776
*errors= error_count;
777
return (uint32) (to - to_start);
782
Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
785
copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
786
const char *from, uint32 from_length, CHARSET_INFO *from_cs,
790
If any of the character sets is not ASCII compatible,
791
immediately switch to slow mb_wc->wc_mb method.
793
if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
794
return copy_and_convert_extended(to, to_length, to_cs,
795
from, from_length, from_cs, errors);
797
uint32 length= min(to_length, from_length), length2= length;
799
#if defined(__i386__)
801
Special loop for i386, it allows to refer to a
802
non-aligned memory block as UINT32, which makes
803
it possible to copy four bytes at once. This
804
gives about 10% performance improvement comparing
805
to byte-by-byte loop.
807
for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
809
if ((*(uint32*)from) & 0x80808080)
811
*((uint32*) to)= *((const uint32*) from);
815
for (; ; *to++= *from++, length--)
822
if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
824
uint32 copied_length= length2 - length;
825
to_length-= copied_length;
826
from_length-= copied_length;
827
return copied_length + copy_and_convert_extended(to, to_length,
835
DBUG_ASSERT(FALSE); // Should never get to here
836
return 0; // Make compiler happy
841
Copy string with HEX-encoding of "bad" characters.
843
@details This functions copies the string pointed by "src"
844
to the string pointed by "dst". Not more than "srclen" bytes
845
are read from "src". Any sequences of bytes representing
846
a not-well-formed substring (according to cs) are hex-encoded,
847
and all well-formed substrings (according to cs) are copied as is.
848
Not more than "dstlen" bytes are written to "dst". The number
849
of bytes written to "dst" is returned.
851
@param cs character set pointer of the destination string
852
@param[out] dst destination string
853
@param dstlen size of dst
854
@param src source string
855
@param srclen length of src
857
@retval result length
861
my_copy_with_hex_escaping(CHARSET_INFO *cs,
862
char *dst, size_t dstlen,
863
const char *src, size_t srclen)
865
const char *srcend= src + srclen;
868
for ( ; src < srcend ; )
871
if ((chlen= my_ismbchar(cs, src, srcend)))
874
break; /* purecov: inspected */
875
memcpy(dst, src, chlen);
880
else if (*src & 0x80)
883
break; /* purecov: inspected */
886
*dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
887
*dst++= _dig_vec_upper[((unsigned char) *src) & 15];
894
break; /* purecov: inspected */
904
with optional character set conversion,
905
with optional left padding (for binary -> UCS2 conversion)
908
well_formed_copy_nchars()
910
to_length Maxinum length of "to" string
911
to_cs Character set of "to" string
913
from_length Length of from string
914
from_cs From character set
915
nchars Copy not more that nchars characters
916
well_formed_error_pos Return position when "from" is not well formed
918
cannot_convert_error_pos Return position where a not convertable
919
character met, or NULL otherwise.
920
from_end_pos Return position where scanning of "from"
925
length of bytes copied to 'to'
930
well_formed_copy_nchars(CHARSET_INFO *to_cs,
931
char *to, uint to_length,
932
CHARSET_INFO *from_cs,
933
const char *from, uint from_length,
935
const char **well_formed_error_pos,
936
const char **cannot_convert_error_pos,
937
const char **from_end_pos)
941
if ((to_cs == &my_charset_bin) ||
942
(from_cs == &my_charset_bin) ||
943
(to_cs == from_cs) ||
944
my_charset_same(from_cs, to_cs))
946
if (to_length < to_cs->mbminlen || !nchars)
949
*cannot_convert_error_pos= NULL;
950
*well_formed_error_pos= NULL;
954
if (to_cs == &my_charset_bin)
956
res= min(min(nchars, to_length), from_length);
957
memmove(to, from, res);
958
*from_end_pos= from + res;
959
*well_formed_error_pos= NULL;
960
*cannot_convert_error_pos= NULL;
964
int well_formed_error;
967
if ((from_offset= (from_length % to_cs->mbminlen)) &&
968
(from_cs == &my_charset_bin))
971
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
972
INSERT INTO t1 (ucs2_column) VALUES (0x01);
975
uint pad_length= to_cs->mbminlen - from_offset;
976
bzero(to, pad_length);
977
memmove(to + pad_length, from, from_offset);
980
from_length-= from_offset;
981
to+= to_cs->mbminlen;
982
to_length-= to_cs->mbminlen;
985
set_if_smaller(from_length, to_length);
986
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
987
nchars, &well_formed_error);
988
memmove(to, from, res);
989
*from_end_pos= from + res;
990
*well_formed_error_pos= well_formed_error ? from + res : NULL;
991
*cannot_convert_error_pos= NULL;
993
res+= to_cs->mbminlen;
1000
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
1001
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
1002
const uchar *from_end= (const uchar*) from + from_length;
1003
uchar *to_end= (uchar*) to + to_length;
1005
*well_formed_error_pos= NULL;
1006
*cannot_convert_error_pos= NULL;
1008
for ( ; nchars; nchars--)
1010
const char *from_prev= from;
1011
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
1013
else if (cnvres == MY_CS_ILSEQ)
1015
if (!*well_formed_error_pos)
1016
*well_formed_error_pos= from;
1020
else if (cnvres > MY_CS_TOOSMALL)
1023
A correct multibyte sequence detected
1024
But it doesn't have Unicode mapping.
1026
if (!*cannot_convert_error_pos)
1027
*cannot_convert_error_pos= from;
1032
break; // Not enough characters
1035
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
1037
else if (cnvres == MY_CS_ILUNI && wc != '?')
1039
if (!*cannot_convert_error_pos)
1040
*cannot_convert_error_pos= from_prev;
1050
*from_end_pos= from;
1053
return (uint32) res;
1059
void String::print(String *str)
1061
char *st= (char*)Ptr, *end= st+str_length;
1062
for (; st < end; st++)
1068
str->append(STRING_WITH_LEN("\\\\"));
1071
str->append(STRING_WITH_LEN("\\0"));
1074
str->append(STRING_WITH_LEN("\\'"));
1077
str->append(STRING_WITH_LEN("\\n"));
1080
str->append(STRING_WITH_LEN("\\r"));
1082
case '\032': // Ctrl-Z
1083
str->append(STRING_WITH_LEN("\\Z"));
1093
Exchange state of this object and argument.
1099
Target string will contain state of this object and vice versa.
1102
void String::swap(String &s)
1104
swap_variables(char *, Ptr, s.Ptr);
1105
swap_variables(uint32, str_length, s.str_length);
1106
swap_variables(uint32, Alloced_length, s.Alloced_length);
1107
swap_variables(bool, alloced, s.alloced);
1108
swap_variables(CHARSET_INFO*, str_charset, s.str_charset);