16
16
/* This file is originally from the mysql distribution. Coded by monty */
18
#ifdef USE_PRAGMA_IMPLEMENTATION
19
#pragma implementation // gcc: Class implementation
22
#include <my_global.h>
28
The following extern declarations are ok as these are interface functions
29
required by the string function
32
extern uchar* sql_alloc(unsigned size);
33
extern void sql_element_free(void *ptr);
35
#include "sql_string.h"
20
#include "drizzled/internal/my_sys.h"
21
#include "drizzled/internal/m_string.h"
22
#include "drizzled/charset.h"
26
#include "drizzled/sql_string.h"
28
using namespace drizzled;
37
31
/*****************************************************************************
38
32
** String functions
39
33
*****************************************************************************/
40
str_charset(&my_charset_bin)
44
String::String(uint32_t length_arg)
49
str_charset(&my_charset_bin)
51
(void) real_alloc(length_arg);
54
String::String(const char *str, const CHARSET_INFO * const cs)
55
: Ptr(const_cast<char *>(str)),
56
str_length(static_cast<uint32_t>(strlen(str))),
63
String::String(const char *str, uint32_t len, const CHARSET_INFO * const cs)
64
: Ptr(const_cast<char *>(str)),
72
String::String(char *str, uint32_t len, const CHARSET_INFO * const cs)
81
String::String(const String &str)
83
str_length(str.str_length),
84
Alloced_length(str.Alloced_length),
86
str_charset(str.str_charset)
90
void *String::operator new(size_t size, memory::Root *mem_root)
92
return alloc_root(mem_root, static_cast<uint32_t>(size));
95
String::~String() { free(); }
41
97
bool String::real_alloc(uint32_t arg_length)
43
99
arg_length=ALIGN_SIZE(arg_length+1);
107
bool String::set_real(double num,uint decimals, CHARSET_INFO *cs)
163
bool String::set_real(double num,uint32_t decimals, const CHARSET_INFO * const cs)
109
165
char buff[FLOATING_POINT_BUFFER];
166
uint32_t dummy_errors;
114
170
if (decimals >= NOT_FIXED_DEC)
116
172
len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
117
return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
173
return copy(buff, len, &my_charset_utf8_general_ci, cs, &dummy_errors);
119
175
len= my_fcvt(num, decimals, buff, NULL);
120
return copy(buff, (uint32_t) len, &my_charset_latin1, cs,
176
return copy(buff, (uint32_t) len, &my_charset_utf8_general_ci, cs,
198
Copy a multi-byte character sets with adding leading zeros.
204
arg_length Length of string. This should NOT be dividable with
206
offset arg_length % cs->mb_minlength
207
cs Character set for 'str'
210
For real multi-byte, ascii incompatible charactser sets,
211
like UCS-2, add leading zeros if we have an incomplete character.
214
will automatically be converted into
222
bool String::copy_aligned(const char *str,uint32_t arg_length, uint32_t offset,
225
/* How many bytes are in incomplete character */
226
offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */
227
assert(offset && offset != cs->mbmaxlen);
229
uint32_t aligned_length= arg_length + offset;
230
if (alloc(aligned_length))
234
Note, this is only safe for big-endian UCS-2.
235
If we add little-endian UCS-2 sometimes, this code
236
will be more complicated. But it's OK for now.
238
bzero((char*) Ptr, offset);
239
memcpy(Ptr + offset, str, arg_length);
240
Ptr[aligned_length]=0;
241
/* str_length is always >= 0 as arg_length is != 0 */
242
str_length= aligned_length;
248
255
bool String::set_or_copy_aligned(const char *str,uint32_t arg_length,
256
const CHARSET_INFO * const cs)
251
258
/* How many bytes are in incomplete character */
252
uint32_t offset= (arg_length % cs->mbminlen);
254
if (!offset) /* All characters are complete, just copy */
256
set(str, arg_length, cs);
259
return copy_aligned(str, arg_length, offset, cs);
259
uint32_t offset= (arg_length % cs->mbminlen);
261
assert(!offset); /* All characters are complete, just copy */
263
set(str, arg_length, cs);
262
267
/* Copy with charset conversion */
264
269
bool String::copy(const char *str, uint32_t arg_length,
265
CHARSET_INFO *from_cs, CHARSET_INFO *to_cs, uint *errors)
270
const CHARSET_INFO * const,
271
const CHARSET_INFO * const to_cs, uint32_t *errors)
268
if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
271
return copy(str, arg_length, to_cs);
273
if ((from_cs == &my_charset_bin) && offset)
276
return copy_aligned(str, arg_length, offset, to_cs);
278
uint32_t new_length= to_cs->mbmaxlen*arg_length;
279
if (alloc(new_length))
281
str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
282
str, arg_length, from_cs, errors);
274
return copy(str, arg_length, to_cs);
289
279
Set a string to the value of a latin1-string, keeping the original charset
293
283
str String of a simple charset (latin1)
401
353
with character set recoding
404
bool String::append(const char *s,uint32_t arg_length, CHARSET_INFO *cs)
406
uint32_t dummy_offset;
408
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
410
uint32_t add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
412
if (realloc(str_length + add_length))
414
str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
415
s, arg_length, cs, &dummy_errors);
419
if (realloc(str_length + arg_length))
421
memcpy(Ptr + str_length, s, arg_length);
422
str_length+= arg_length;
428
bool String::append(IO_CACHE* file, uint32_t arg_length)
430
if (realloc(str_length+arg_length))
432
if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
437
str_length+=arg_length;
356
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const)
358
if (realloc(str_length + arg_length))
360
memcpy(Ptr + str_length, s, arg_length);
361
str_length+= arg_length;
441
367
bool String::append_with_prefill(const char *s,uint32_t arg_length,
442
368
uint32_t full_length, char fill_char)
569
// added by Holyfoot for "geometry" needs
570
int String::reserve(uint32_t space_needed, uint32_t grow_by)
572
if (Alloced_length < str_length + space_needed)
574
if (realloc(Alloced_length + max(space_needed, grow_by) - 1))
580
void String::qs_append(const char *str, uint32_t len)
582
memcpy(Ptr + str_length, str, len + 1);
586
void String::qs_append(double d)
588
char *buff = Ptr + str_length;
589
str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff, NULL);
592
void String::qs_append(double *d)
595
float8get(ld, (char*) d);
599
void String::qs_append(int i)
601
char *buff= Ptr + str_length;
602
char *end= int10_to_str(i, buff, -10);
603
str_length+= (int) (end-buff);
606
void String::qs_append(uint i)
608
char *buff= Ptr + str_length;
609
char *end= int10_to_str(i, buff, 10);
610
str_length+= (int) (end-buff);
614
497
Compare strings according to collation, without end space.
686
569
****************************************************************************/
689
copy a string from one character set to another
694
to_cs Character set of result string
696
from_length Length of from string
697
from_cs From character set
700
'to' must be big enough as form_length * to_cs->mbmaxlen
703
length of bytes copied to 'to'
708
copy_and_convert_extended(char *to, uint32_t to_length, CHARSET_INFO *to_cs,
709
const char *from, uint32_t from_length,
710
CHARSET_INFO *from_cs,
715
const uchar *from_end= (const uchar*) from+from_length;
717
uchar *to_end= (uchar*) to+to_length;
718
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
719
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
724
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
727
else if (cnvres == MY_CS_ILSEQ)
733
else if (cnvres > MY_CS_TOOSMALL)
736
A correct multibyte sequence detected
737
But it doesn't have Unicode mapping.
744
break; // Not enough characters
747
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
749
else if (cnvres == MY_CS_ILUNI && wc != '?')
758
*errors= error_count;
759
return (uint32_t) (to - to_start);
764
Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
767
copy_and_convert(char *to, uint32_t to_length, CHARSET_INFO *to_cs,
768
const char *from, uint32_t from_length, CHARSET_INFO *from_cs,
772
If any of the character sets is not ASCII compatible,
773
immediately switch to slow mb_wc->wc_mb method.
775
if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
776
return copy_and_convert_extended(to, to_length, to_cs,
777
from, from_length, from_cs, errors);
779
uint32_t length= min(to_length, from_length), length2= length;
781
#if defined(__i386__)
783
Special loop for i386, it allows to refer to a
784
non-aligned memory block as UINT32, which makes
785
it possible to copy four bytes at once. This
786
gives about 10% performance improvement comparing
787
to byte-by-byte loop.
789
for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
791
if ((*(uint32_t*)from) & 0x80808080)
793
*((uint32_t*) to)= *((const uint32_t*) from);
797
for (; ; *to++= *from++, length--)
804
if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
806
uint32_t copied_length= length2 - length;
807
to_length-= copied_length;
808
from_length-= copied_length;
809
return copied_length + copy_and_convert_extended(to, to_length,
817
assert(false); // Should never get to here
818
return 0; // Make compiler happy
823
Copy string with HEX-encoding of "bad" characters.
825
@details This functions copies the string pointed by "src"
826
to the string pointed by "dst". Not more than "srclen" bytes
827
are read from "src". Any sequences of bytes representing
828
a not-well-formed substring (according to cs) are hex-encoded,
829
and all well-formed substrings (according to cs) are copied as is.
830
Not more than "dstlen" bytes are written to "dst". The number
831
of bytes written to "dst" is returned.
833
@param cs character set pointer of the destination string
834
@param[out] dst destination string
835
@param dstlen size of dst
836
@param src source string
837
@param srclen length of src
839
@retval result length
843
my_copy_with_hex_escaping(CHARSET_INFO *cs,
844
char *dst, size_t dstlen,
845
const char *src, size_t srclen)
847
const char *srcend= src + srclen;
850
for ( ; src < srcend ; )
853
if ((chlen= my_ismbchar(cs, src, srcend)))
856
break; /* purecov: inspected */
857
memcpy(dst, src, chlen);
862
else if (*src & 0x80)
865
break; /* purecov: inspected */
868
*dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
869
*dst++= _dig_vec_upper[((unsigned char) *src) & 15];
876
break; /* purecov: inspected */
886
573
with optional character set conversion,
887
574
with optional left padding (for binary -> UCS2 conversion)
890
577
well_formed_copy_nchars()
891
578
to Store result here
912
well_formed_copy_nchars(CHARSET_INFO *to_cs,
913
char *to, uint to_length,
914
CHARSET_INFO *from_cs,
915
const char *from, uint from_length,
599
well_formed_copy_nchars(const CHARSET_INFO * const to_cs,
600
char *to, uint32_t to_length,
601
const CHARSET_INFO * const from_cs,
602
const char *from, uint32_t from_length,
917
604
const char **well_formed_error_pos,
918
605
const char **cannot_convert_error_pos,
919
606
const char **from_end_pos)
923
if ((to_cs == &my_charset_bin) ||
924
(from_cs == &my_charset_bin) ||
925
(to_cs == from_cs) ||
926
my_charset_same(from_cs, to_cs))
928
if (to_length < to_cs->mbminlen || !nchars)
931
*cannot_convert_error_pos= NULL;
932
*well_formed_error_pos= NULL;
936
if (to_cs == &my_charset_bin)
938
res= min(min(nchars, to_length), from_length);
939
memmove(to, from, res);
940
*from_end_pos= from + res;
941
*well_formed_error_pos= NULL;
942
*cannot_convert_error_pos= NULL;
946
int well_formed_error;
949
if ((from_offset= (from_length % to_cs->mbminlen)) &&
950
(from_cs == &my_charset_bin))
953
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
954
INSERT INTO t1 (ucs2_column) VALUES (0x01);
957
uint pad_length= to_cs->mbminlen - from_offset;
958
bzero(to, pad_length);
959
memmove(to + pad_length, from, from_offset);
962
from_length-= from_offset;
963
to+= to_cs->mbminlen;
964
to_length-= to_cs->mbminlen;
967
set_if_smaller(from_length, to_length);
968
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
969
nchars, &well_formed_error);
970
memmove(to, from, res);
971
*from_end_pos= from + res;
972
*well_formed_error_pos= well_formed_error ? from + res : NULL;
973
*cannot_convert_error_pos= NULL;
975
res+= to_cs->mbminlen;
982
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
983
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
984
const uchar *from_end= (const uchar*) from + from_length;
985
uchar *to_end= (uchar*) to + to_length;
987
*well_formed_error_pos= NULL;
988
*cannot_convert_error_pos= NULL;
990
for ( ; nchars; nchars--)
992
const char *from_prev= from;
993
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
995
else if (cnvres == MY_CS_ILSEQ)
997
if (!*well_formed_error_pos)
998
*well_formed_error_pos= from;
1002
else if (cnvres > MY_CS_TOOSMALL)
1005
A correct multibyte sequence detected
1006
But it doesn't have Unicode mapping.
1008
if (!*cannot_convert_error_pos)
1009
*cannot_convert_error_pos= from;
1014
break; // Not enough characters
1017
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
1019
else if (cnvres == MY_CS_ILUNI && wc != '?')
1021
if (!*cannot_convert_error_pos)
1022
*cannot_convert_error_pos= from_prev;
610
assert((to_cs == &my_charset_bin) ||
611
(from_cs == &my_charset_bin) ||
612
(to_cs == from_cs) ||
613
my_charset_same(from_cs, to_cs));
615
if (to_length < to_cs->mbminlen || !nchars)
1032
617
*from_end_pos= from;
1035
return (uint32_t) res;
618
*cannot_convert_error_pos= NULL;
619
*well_formed_error_pos= NULL;
623
if (to_cs == &my_charset_bin)
625
res= min(min(nchars, to_length), from_length);
626
memmove(to, from, res);
627
*from_end_pos= from + res;
628
*well_formed_error_pos= NULL;
629
*cannot_convert_error_pos= NULL;
633
int well_formed_error;
634
uint32_t from_offset;
636
if ((from_offset= (from_length % to_cs->mbminlen)) &&
637
(from_cs == &my_charset_bin))
640
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
641
INSERT INTO t1 (ucs2_column) VALUES (0x01);
644
uint32_t pad_length= to_cs->mbminlen - from_offset;
645
memset(to, 0, pad_length);
646
memmove(to + pad_length, from, from_offset);
649
from_length-= from_offset;
650
to+= to_cs->mbminlen;
651
to_length-= to_cs->mbminlen;
654
set_if_smaller(from_length, to_length);
655
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
656
nchars, &well_formed_error);
657
memmove(to, from, res);
658
*from_end_pos= from + res;
659
*well_formed_error_pos= well_formed_error ? from + res : NULL;
660
*cannot_convert_error_pos= NULL;
662
res+= to_cs->mbminlen;
704
Quote the given identifier.
705
If the given identifier is empty, it will be quoted.
709
name the identifier to be appended
710
name_length length of the appending identifier
713
/* Factor the extern out */
714
extern const CHARSET_INFO *system_charset_info, *files_charset_info;
716
void String::append_identifier(const char *name, uint32_t in_length)
718
const char *name_end;
723
The identifier must be quoted as it includes a quote character or
727
reserve(in_length*2 + 2);
728
quote_char= (char) q;
729
append("e_char, 1, system_charset_info);
731
for (name_end= name+in_length ; name < name_end ; name+= in_length)
733
unsigned char chr= (unsigned char) *name;
734
in_length= my_mbcharlen(system_charset_info, chr);
736
my_mbcharlen can return 0 on a wrong multibyte
737
sequence. It is possible when upgrading from 4.0,
738
and identifier contains some accented characters.
739
The manual says it does not work. So we'll just
740
change length to 1 not to hang in the endless loop.
744
if (in_length == 1 && chr == (unsigned char) quote_char)
745
append("e_char, 1, system_charset_info);
746
append(name, in_length, system_charset_info);
748
append("e_char, 1, system_charset_info);
1075
753
Exchange state of this object and argument.
1084
762
void String::swap(String &s)
1086
swap_variables(char *, Ptr, s.Ptr);
1087
swap_variables(uint32_t, str_length, s.str_length);
1088
swap_variables(uint32_t, Alloced_length, s.Alloced_length);
1089
swap_variables(bool, alloced, s.alloced);
1090
swap_variables(CHARSET_INFO*, str_charset, s.str_charset);
764
std::swap(Ptr, s.Ptr);
765
std::swap(str_length, s.str_length);
766
std::swap(Alloced_length, s.Alloced_length);
767
std::swap(alloced, s.alloced);
768
std::swap(str_charset, s.str_charset);
772
bool operator==(const String &s1, const String &s2)
774
return stringcmp(&s1,&s2) == 0;
777
bool operator!=(const String &s1, const String &s2)
782
bool check_if_only_end_space(const CHARSET_INFO * const cs, char *str,
785
return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;