102
bool String::set_real(double num,uint decimals, const CHARSET_INFO * const cs)
108
bool String::set_real(double num,uint32_t decimals, const CHARSET_INFO * const cs)
104
110
char buff[FLOATING_POINT_BUFFER];
111
uint32_t dummy_errors;
109
115
if (decimals >= NOT_FIXED_DEC)
111
117
len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
112
return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
118
return copy(buff, len, &my_charset_utf8_general_ci, cs, &dummy_errors);
114
120
len= my_fcvt(num, decimals, buff, NULL);
115
return copy(buff, (uint32_t) len, &my_charset_latin1, cs,
121
return copy(buff, (uint32_t) len, &my_charset_utf8_general_ci, cs,
193
Copy a multi-byte character sets with adding leading zeros.
199
arg_length Length of string. This should NOT be dividable with
201
offset arg_length % cs->mb_minlength
202
cs Character set for 'str'
205
For real multi-byte, ascii incompatible charactser sets,
206
like UCS-2, add leading zeros if we have an incomplete character.
209
will automatically be converted into
217
bool String::copy_aligned(const char *str,uint32_t arg_length, uint32_t offset,
218
const CHARSET_INFO * const cs)
220
/* How many bytes are in incomplete character */
221
offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */
222
assert(offset && offset != cs->mbmaxlen);
224
uint32_t aligned_length= arg_length + offset;
225
if (alloc(aligned_length))
229
Note, this is only safe for big-endian UCS-2.
230
If we add little-endian UCS-2 sometimes, this code
231
will be more complicated. But it's OK for now.
233
memset(Ptr, 0, offset);
234
memcpy(Ptr + offset, str, arg_length);
235
Ptr[aligned_length]=0;
236
/* str_length is always >= 0 as arg_length is != 0 */
237
str_length= aligned_length;
243
200
bool String::set_or_copy_aligned(const char *str,uint32_t arg_length,
244
201
const CHARSET_INFO * const cs)
246
203
/* How many bytes are in incomplete character */
247
uint32_t offset= (arg_length % cs->mbminlen);
249
if (!offset) /* All characters are complete, just copy */
251
set(str, arg_length, cs);
254
return copy_aligned(str, arg_length, offset, cs);
204
uint32_t offset= (arg_length % cs->mbminlen);
206
assert(!offset); /* All characters are complete, just copy */
208
set(str, arg_length, cs);
257
212
/* Copy with charset conversion */
259
214
bool String::copy(const char *str, uint32_t arg_length,
260
const CHARSET_INFO * const from_cs,
261
const CHARSET_INFO * const to_cs, uint *errors)
215
const CHARSET_INFO * const,
216
const CHARSET_INFO * const to_cs, uint32_t *errors)
264
if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
267
return copy(str, arg_length, to_cs);
269
if ((from_cs == &my_charset_bin) && offset)
272
return copy_aligned(str, arg_length, offset, to_cs);
274
uint32_t new_length= to_cs->mbmaxlen*arg_length;
275
if (alloc(new_length))
277
str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
278
str, arg_length, from_cs, errors);
219
return copy(str, arg_length, to_cs);
285
224
Set a string to the value of a latin1-string, keeping the original charset
289
228
str String of a simple charset (latin1)
391
298
with character set recoding
394
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const cs)
396
uint32_t dummy_offset;
398
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
400
uint32_t add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
402
if (realloc(str_length + add_length))
404
str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
405
s, arg_length, cs, &dummy_errors);
409
if (realloc(str_length + arg_length))
411
memcpy(Ptr + str_length, s, arg_length);
412
str_length+= arg_length;
418
bool String::append(IO_CACHE* file, uint32_t arg_length)
420
if (realloc(str_length+arg_length))
422
if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
427
str_length+=arg_length;
301
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const)
303
if (realloc(str_length + arg_length))
305
memcpy(Ptr + str_length, s, arg_length);
306
str_length+= arg_length;
431
312
bool String::append_with_prefill(const char *s,uint32_t arg_length,
432
313
uint32_t full_length, char fill_char)
559
// added by Holyfoot for "geometry" needs
560
int String::reserve(uint32_t space_needed, uint32_t grow_by)
562
if (Alloced_length < str_length + space_needed)
564
if (realloc(Alloced_length + max(space_needed, grow_by) - 1))
570
void String::qs_append(const char *str, uint32_t len)
572
memcpy(Ptr + str_length, str, len + 1);
576
void String::qs_append(double d)
578
char *buff = Ptr + str_length;
579
str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff, NULL);
582
void String::qs_append(double *d)
585
float8get(ld, (char*) d);
589
void String::qs_append(int i)
591
char *buff= Ptr + str_length;
592
char *end= int10_to_str(i, buff, -10);
593
str_length+= (int) (end-buff);
596
void String::qs_append(uint i)
598
char *buff= Ptr + str_length;
599
char *end= int10_to_str(i, buff, 10);
600
str_length+= (int) (end-buff);
604
442
Compare strings according to collation, without end space.
676
514
****************************************************************************/
679
copy a string from one character set to another
684
to_cs Character set of result string
686
from_length Length of from string
687
from_cs From character set
690
'to' must be big enough as form_length * to_cs->mbmaxlen
693
length of bytes copied to 'to'
698
copy_and_convert_extended(char *to, uint32_t to_length,
699
const CHARSET_INFO * const to_cs,
700
const char *from, uint32_t from_length,
701
const CHARSET_INFO * const from_cs,
706
const uchar *from_end= (const uchar*) from+from_length;
708
uchar *to_end= (uchar*) to+to_length;
709
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
710
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
715
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
718
else if (cnvres == MY_CS_ILSEQ)
724
else if (cnvres > MY_CS_TOOSMALL)
727
A correct multibyte sequence detected
728
But it doesn't have Unicode mapping.
735
break; // Not enough characters
738
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
740
else if (cnvres == MY_CS_ILUNI && wc != '?')
749
*errors= error_count;
750
return (uint32_t) (to - to_start);
755
Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
758
copy_and_convert(char *to, uint32_t to_length, const CHARSET_INFO * const to_cs,
759
const char *from, uint32_t from_length,
760
const CHARSET_INFO * const from_cs, uint *errors)
763
If any of the character sets is not ASCII compatible,
764
immediately switch to slow mb_wc->wc_mb method.
766
if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
767
return copy_and_convert_extended(to, to_length, to_cs,
768
from, from_length, from_cs, errors);
770
uint32_t length= min(to_length, from_length), length2= length;
772
#if defined(__i386__)
774
Special loop for i386, it allows to refer to a
775
non-aligned memory block as UINT32, which makes
776
it possible to copy four bytes at once. This
777
gives about 10% performance improvement comparing
778
to byte-by-byte loop.
780
for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
782
if ((*(uint32_t*)from) & 0x80808080)
784
*((uint32_t*) to)= *((const uint32_t*) from);
788
for (; ; *to++= *from++, length--)
795
if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
797
uint32_t copied_length= length2 - length;
798
to_length-= copied_length;
799
from_length-= copied_length;
800
return copied_length + copy_and_convert_extended(to, to_length,
808
assert(false); // Should never get to here
809
return 0; // Make compiler happy
903
608
well_formed_copy_nchars(const CHARSET_INFO * const to_cs,
904
char *to, uint to_length,
609
char *to, uint32_t to_length,
905
610
const CHARSET_INFO * const from_cs,
906
const char *from, uint from_length,
611
const char *from, uint32_t from_length,
908
613
const char **well_formed_error_pos,
909
614
const char **cannot_convert_error_pos,
910
615
const char **from_end_pos)
914
if ((to_cs == &my_charset_bin) ||
915
(from_cs == &my_charset_bin) ||
916
(to_cs == from_cs) ||
917
my_charset_same(from_cs, to_cs))
919
if (to_length < to_cs->mbminlen || !nchars)
922
*cannot_convert_error_pos= NULL;
923
*well_formed_error_pos= NULL;
927
if (to_cs == &my_charset_bin)
929
res= min(min(nchars, to_length), from_length);
930
memmove(to, from, res);
931
*from_end_pos= from + res;
932
*well_formed_error_pos= NULL;
933
*cannot_convert_error_pos= NULL;
937
int well_formed_error;
940
if ((from_offset= (from_length % to_cs->mbminlen)) &&
941
(from_cs == &my_charset_bin))
944
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
945
INSERT INTO t1 (ucs2_column) VALUES (0x01);
948
uint pad_length= to_cs->mbminlen - from_offset;
949
memset(to, 0, pad_length);
950
memmove(to + pad_length, from, from_offset);
953
from_length-= from_offset;
954
to+= to_cs->mbminlen;
955
to_length-= to_cs->mbminlen;
958
set_if_smaller(from_length, to_length);
959
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
960
nchars, &well_formed_error);
961
memmove(to, from, res);
962
*from_end_pos= from + res;
963
*well_formed_error_pos= well_formed_error ? from + res : NULL;
964
*cannot_convert_error_pos= NULL;
966
res+= to_cs->mbminlen;
973
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
974
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
975
const uchar *from_end= (const uchar*) from + from_length;
976
uchar *to_end= (uchar*) to + to_length;
978
*well_formed_error_pos= NULL;
979
*cannot_convert_error_pos= NULL;
981
for ( ; nchars; nchars--)
983
const char *from_prev= from;
984
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
986
else if (cnvres == MY_CS_ILSEQ)
988
if (!*well_formed_error_pos)
989
*well_formed_error_pos= from;
993
else if (cnvres > MY_CS_TOOSMALL)
996
A correct multibyte sequence detected
997
But it doesn't have Unicode mapping.
999
if (!*cannot_convert_error_pos)
1000
*cannot_convert_error_pos= from;
1005
break; // Not enough characters
1008
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
1010
else if (cnvres == MY_CS_ILUNI && wc != '?')
1012
if (!*cannot_convert_error_pos)
1013
*cannot_convert_error_pos= from_prev;
619
assert((to_cs == &my_charset_bin) ||
620
(from_cs == &my_charset_bin) ||
621
(to_cs == from_cs) ||
622
my_charset_same(from_cs, to_cs));
624
if (to_length < to_cs->mbminlen || !nchars)
1023
626
*from_end_pos= from;
1026
return (uint32_t) res;
627
*cannot_convert_error_pos= NULL;
628
*well_formed_error_pos= NULL;
632
if (to_cs == &my_charset_bin)
634
res= min(min(nchars, to_length), from_length);
635
memmove(to, from, res);
636
*from_end_pos= from + res;
637
*well_formed_error_pos= NULL;
638
*cannot_convert_error_pos= NULL;
642
int well_formed_error;
643
uint32_t from_offset;
645
if ((from_offset= (from_length % to_cs->mbminlen)) &&
646
(from_cs == &my_charset_bin))
649
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
650
INSERT INTO t1 (ucs2_column) VALUES (0x01);
653
uint32_t pad_length= to_cs->mbminlen - from_offset;
654
memset(to, 0, pad_length);
655
memmove(to + pad_length, from, from_offset);
658
from_length-= from_offset;
659
to+= to_cs->mbminlen;
660
to_length-= to_cs->mbminlen;
663
set_if_smaller(from_length, to_length);
664
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
665
nchars, &well_formed_error);
666
memmove(to, from, res);
667
*from_end_pos= from + res;
668
*well_formed_error_pos= well_formed_error ? from + res : NULL;
669
*cannot_convert_error_pos= NULL;
671
res+= to_cs->mbminlen;
713
Quote the given identifier.
714
If the given identifier is empty, it will be quoted.
718
name the identifier to be appended
719
name_length length of the appending identifier
722
/* Factor the extern out */
723
extern const CHARSET_INFO *system_charset_info, *files_charset_info;
725
void String::append_identifier(const char *name, uint32_t in_length)
727
const char *name_end;
732
The identifier must be quoted as it includes a quote character or
736
reserve(in_length*2 + 2);
737
quote_char= (char) q;
738
append("e_char, 1, system_charset_info);
740
for (name_end= name+in_length ; name < name_end ; name+= in_length)
742
unsigned char chr= (unsigned char) *name;
743
in_length= my_mbcharlen(system_charset_info, chr);
745
my_mbcharlen can return 0 on a wrong multibyte
746
sequence. It is possible when upgrading from 4.0,
747
and identifier contains some accented characters.
748
The manual says it does not work. So we'll just
749
change length to 1 not to hang in the endless loop.
753
if (in_length == 1 && chr == (unsigned char) quote_char)
754
append("e_char, 1, system_charset_info);
755
append(name, in_length, system_charset_info);
757
append("e_char, 1, system_charset_info);
1066
762
Exchange state of this object and argument.
1075
771
void String::swap(String &s)
1077
swap_variables(char *, Ptr, s.Ptr);
1078
swap_variables(uint32_t, str_length, s.str_length);
1079
swap_variables(uint32_t, Alloced_length, s.Alloced_length);
1080
swap_variables(bool, alloced, s.alloced);
1081
swap_variables(const CHARSET_INFO *, str_charset, s.str_charset);
773
std::swap(Ptr, s.Ptr);
774
std::swap(str_length, s.str_length);
775
std::swap(Alloced_length, s.Alloced_length);
776
std::swap(alloced, s.alloced);
777
std::swap(str_charset, s.str_charset);
781
bool operator==(const String &s1, const String &s2)
783
return stringcmp(&s1,&s2) == 0;
786
bool operator!=(const String &s1, const String &s2)