12
12
You should have received a copy of the GNU General Public License
13
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
16
/* This file is originally from the mysql distribution. Coded by monty */
20
#include "drizzled/internal/my_sys.h"
21
#include "drizzled/internal/m_string.h"
22
#include "drizzled/charset.h"
23
#include "drizzled/global_charset_info.h"
19
#include <mysys/my_sys.h>
20
#include <mystrings/m_string.h>
25
22
#include <algorithm>
27
#include "drizzled/sql_string.h"
34
// Converstion functions to and from std::string.
36
std::string String_to_std_string(String const& s)
38
return std::string(s.ptr(), s.length());
41
String* set_String_from_std_string(String* s, std::string const& cs)
43
s->set_ascii(cs.c_str(), cs.length());
25
The following extern declarations are ok as these are interface functions
26
required by the string function
29
extern unsigned char* sql_alloc(unsigned size);
30
extern void sql_element_free(void *ptr);
32
#include "sql_string.h"
48
34
/*****************************************************************************
49
35
** String functions
50
36
*****************************************************************************/
57
str_charset(&my_charset_bin)
61
String::String(size_t length_arg)
66
str_charset(&my_charset_bin)
68
(void) real_alloc(length_arg);
71
String::String(const char *str, const CHARSET_INFO * const cs)
72
: Ptr(const_cast<char *>(str)),
73
str_length(static_cast<size_t>(strlen(str))),
80
String::String(const char *str, size_t len, const CHARSET_INFO * const cs)
81
: Ptr(const_cast<char *>(str)),
89
String::String(char *str, size_t len, const CHARSET_INFO * const cs)
98
String::String(const String &str)
100
str_length(str.str_length),
101
Alloced_length(str.Alloced_length),
103
str_charset(str.str_charset)
107
void *String::operator new(size_t size, memory::Root *mem_root)
109
return mem_root->alloc_root(static_cast<size_t>(size));
112
String::~String() { free(); }
114
bool String::real_alloc(size_t arg_length)
38
bool String::real_alloc(uint32_t arg_length)
116
40
arg_length=ALIGN_SIZE(arg_length+1);
118
42
if (Alloced_length < arg_length)
120
if (Alloced_length > 0)
122
45
if (!(Ptr=(char*) malloc(arg_length)))
124
47
Alloced_length=arg_length;
169
92
bool String::set_int(int64_t num, bool unsigned_flag, const CHARSET_INFO * const cs)
171
size_t l=20*cs->mbmaxlen+1;
94
uint32_t l=20*cs->mbmaxlen+1;
172
95
int base= unsigned_flag ? 10 : -10;
176
str_length=(size_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
99
str_length=(uint32_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
181
bool String::set_real(double num,size_t decimals, const CHARSET_INFO * const cs)
104
bool String::set_real(double num,uint32_t decimals, const CHARSET_INFO * const cs)
183
106
char buff[FLOATING_POINT_BUFFER];
107
uint32_t dummy_errors;
188
111
if (decimals >= NOT_FIXED_DEC)
190
len= internal::my_gcvt(num,
191
internal::MY_GCVT_ARG_DOUBLE,
192
sizeof(buff) - 1, buff, NULL);
113
len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
193
114
return copy(buff, len, &my_charset_utf8_general_ci, cs, &dummy_errors);
195
len= internal::my_fcvt(num, decimals, buff, NULL);
196
return copy(buff, (size_t) len, &my_charset_utf8_general_ci, cs,
116
len= my_fcvt(num, decimals, buff, NULL);
117
return copy(buff, (uint32_t) len, &my_charset_utf8_general_ci, cs,
288
bool String::set_or_copy_aligned(const char *str,size_t arg_length,
195
Copy a multi-byte character sets with adding leading zeros.
201
arg_length Length of string. This should NOT be dividable with
203
offset arg_length % cs->mb_minlength
204
cs Character set for 'str'
207
For real multi-byte, ascii incompatible charactser sets,
208
like UCS-2, add leading zeros if we have an incomplete character.
211
will automatically be converted into
219
bool String::copy_aligned(const char *str,uint32_t arg_length, uint32_t offset,
220
const CHARSET_INFO * const cs)
222
/* How many bytes are in incomplete character */
223
offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */
224
assert(offset && offset != cs->mbmaxlen);
226
uint32_t aligned_length= arg_length + offset;
227
if (alloc(aligned_length))
231
Note, this is only safe for big-endian UCS-2.
232
If we add little-endian UCS-2 sometimes, this code
233
will be more complicated. But it's OK for now.
235
memset(Ptr, 0, offset);
236
memcpy(Ptr + offset, str, arg_length);
237
Ptr[aligned_length]=0;
238
/* str_length is always >= 0 as arg_length is != 0 */
239
str_length= aligned_length;
245
bool String::set_or_copy_aligned(const char *str,uint32_t arg_length,
289
246
const CHARSET_INFO * const cs)
291
248
/* How many bytes are in incomplete character */
292
size_t offset= (arg_length % cs->mbminlen);
294
assert(!offset); /* All characters are complete, just copy */
296
set(str, arg_length, cs);
249
uint32_t offset= (arg_length % cs->mbminlen);
251
if (!offset) /* All characters are complete, just copy */
253
set(str, arg_length, cs);
256
return copy_aligned(str, arg_length, offset, cs);
300
259
/* Copy with charset conversion */
302
bool String::copy(const char *str, size_t arg_length,
303
const CHARSET_INFO * const,
304
const CHARSET_INFO * const to_cs, size_t *errors)
261
bool String::copy(const char *str, uint32_t arg_length,
262
const CHARSET_INFO * const from_cs,
263
const CHARSET_INFO * const to_cs, uint32_t *errors)
307
return copy(str, arg_length, to_cs);
266
if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
269
return copy(str, arg_length, to_cs);
271
if ((from_cs == &my_charset_bin) && offset)
274
return copy_aligned(str, arg_length, offset, to_cs);
276
uint32_t new_length= to_cs->mbmaxlen*arg_length;
277
if (alloc(new_length))
279
str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
280
str, arg_length, from_cs, errors);
386
393
with character set recoding
389
bool String::append(const char *s,size_t arg_length, const CHARSET_INFO * const)
391
if (realloc(str_length + arg_length))
393
memcpy(Ptr + str_length, s, arg_length);
394
str_length+= arg_length;
400
bool String::append_with_prefill(const char *s,size_t arg_length,
401
size_t full_length, char fill_char)
396
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const cs)
398
uint32_t dummy_offset;
400
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
402
uint32_t add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
403
uint32_t dummy_errors;
404
if (realloc(str_length + add_length))
406
str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
407
s, arg_length, cs, &dummy_errors);
411
if (realloc(str_length + arg_length))
413
memcpy(Ptr + str_length, s, arg_length);
414
str_length+= arg_length;
420
bool String::append(IO_CACHE* file, uint32_t arg_length)
422
if (realloc(str_length+arg_length))
424
if (my_b_read(file, (unsigned char*) Ptr + str_length, arg_length))
429
str_length+=arg_length;
433
bool String::append_with_prefill(const char *s,uint32_t arg_length,
434
uint32_t full_length, char fill_char)
403
436
int t_length= arg_length > full_length ? arg_length : full_length;
514
if (realloc(str_length+(size_t) diff))
547
if (realloc(str_length+(uint32_t) diff))
516
internal::bmove_upp((unsigned char*) Ptr+str_length+diff,
517
(unsigned char*) Ptr+str_length,
518
str_length-offset-arg_length);
549
bmove_upp((unsigned char*) Ptr+str_length+diff, (unsigned char*) Ptr+str_length,
550
str_length-offset-arg_length);
521
553
memcpy(Ptr+offset,to,to_length);
523
str_length+=(size_t) diff;
555
str_length+=(uint32_t) diff;
561
// added by Holyfoot for "geometry" needs
562
int String::reserve(uint32_t space_needed, uint32_t grow_by)
564
if (Alloced_length < str_length + space_needed)
566
if (realloc(Alloced_length + cmax(space_needed, grow_by) - 1))
572
void String::qs_append(const char *str, uint32_t len)
574
memcpy(Ptr + str_length, str, len + 1);
578
void String::qs_append(double d)
580
char *buff = Ptr + str_length;
581
str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff, NULL);
584
void String::qs_append(double *d)
587
float8get(ld, (char*) d);
591
void String::qs_append(int i)
593
char *buff= Ptr + str_length;
594
char *end= int10_to_str(i, buff, -10);
595
str_length+= (int) (end-buff);
598
void String::qs_append(uint32_t i)
600
char *buff= Ptr + str_length;
601
char *end= int10_to_str(i, buff, 10);
602
str_length+= (int) (end-buff);
531
606
Compare strings according to collation, without end space.
603
678
****************************************************************************/
681
copy a string from one character set to another
686
to_cs Character set of result string
688
from_length Length of from string
689
from_cs From character set
692
'to' must be big enough as form_length * to_cs->mbmaxlen
695
length of bytes copied to 'to'
700
copy_and_convert_extended(char *to, uint32_t to_length,
701
const CHARSET_INFO * const to_cs,
702
const char *from, uint32_t from_length,
703
const CHARSET_INFO * const from_cs,
708
const unsigned char *from_end= (const unsigned char*) from+from_length;
710
unsigned char *to_end= (unsigned char*) to+to_length;
711
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
712
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
713
uint32_t error_count= 0;
717
if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from,
720
else if (cnvres == MY_CS_ILSEQ)
726
else if (cnvres > MY_CS_TOOSMALL)
729
A correct multibyte sequence detected
730
But it doesn't have Unicode mapping.
737
break; // Not enough characters
740
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
742
else if (cnvres == MY_CS_ILUNI && wc != '?')
751
*errors= error_count;
752
return (uint32_t) (to - to_start);
757
Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
760
copy_and_convert(char *to, uint32_t to_length, const CHARSET_INFO * const to_cs,
761
const char *from, uint32_t from_length,
762
const CHARSET_INFO * const from_cs, uint32_t *errors)
765
If any of the character sets is not ASCII compatible,
766
immediately switch to slow mb_wc->wc_mb method.
768
if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
769
return copy_and_convert_extended(to, to_length, to_cs,
770
from, from_length, from_cs, errors);
772
uint32_t length= cmin(to_length, from_length), length2= length;
774
#if defined(__i386__)
776
Special loop for i386, it allows to refer to a
777
non-aligned memory block as UINT32, which makes
778
it possible to copy four bytes at once. This
779
gives about 10% performance improvement comparing
780
to byte-by-byte loop.
782
for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
784
if ((*(uint32_t*)from) & 0x80808080)
786
*((uint32_t*) to)= *((const uint32_t*) from);
790
for (; ; *to++= *from++, length--)
797
if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
799
uint32_t copied_length= length2 - length;
800
to_length-= copied_length;
801
from_length-= copied_length;
802
return copied_length + copy_and_convert_extended(to, to_length,
810
assert(false); // Should never get to here
811
return 0; // Make compiler happy
816
Copy string with HEX-encoding of "bad" characters.
818
@details This functions copies the string pointed by "src"
819
to the string pointed by "dst". Not more than "srclen" bytes
820
are read from "src". Any sequences of bytes representing
821
a not-well-formed substring (according to cs) are hex-encoded,
822
and all well-formed substrings (according to cs) are copied as is.
823
Not more than "dstlen" bytes are written to "dst". The number
824
of bytes written to "dst" is returned.
826
@param cs character set pointer of the destination string
827
@param[out] dst destination string
828
@param dstlen size of dst
829
@param src source string
830
@param srclen length of src
832
@retval result length
836
my_copy_with_hex_escaping(const CHARSET_INFO * const cs,
837
char *dst, size_t dstlen,
838
const char *src, size_t srclen)
840
const char *srcend= src + srclen;
843
for ( ; src < srcend ; )
846
if ((chlen= my_ismbchar(cs, src, srcend)))
849
break; /* purecov: inspected */
850
memcpy(dst, src, chlen);
855
else if (*src & 0x80)
858
break; /* purecov: inspected */
861
*dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
862
*dst++= _dig_vec_upper[((unsigned char) *src) & 15];
869
break; /* purecov: inspected */
607
879
with optional character set conversion,
608
880
with optional left padding (for binary -> UCS2 conversion)
633
905
well_formed_copy_nchars(const CHARSET_INFO * const to_cs,
634
char *to, size_t to_length,
906
char *to, uint32_t to_length,
635
907
const CHARSET_INFO * const from_cs,
636
const char *from, size_t from_length,
908
const char *from, uint32_t from_length,
638
910
const char **well_formed_error_pos,
639
911
const char **cannot_convert_error_pos,
640
912
const char **from_end_pos)
644
assert((to_cs == &my_charset_bin) ||
645
(from_cs == &my_charset_bin) ||
646
(to_cs == from_cs) ||
647
my_charset_same(from_cs, to_cs));
649
if (to_length < to_cs->mbminlen || !nchars)
916
if ((to_cs == &my_charset_bin) ||
917
(from_cs == &my_charset_bin) ||
918
(to_cs == from_cs) ||
919
my_charset_same(from_cs, to_cs))
921
if (to_length < to_cs->mbminlen || !nchars)
924
*cannot_convert_error_pos= NULL;
925
*well_formed_error_pos= NULL;
929
if (to_cs == &my_charset_bin)
931
res= cmin(cmin(nchars, to_length), from_length);
932
memmove(to, from, res);
933
*from_end_pos= from + res;
934
*well_formed_error_pos= NULL;
935
*cannot_convert_error_pos= NULL;
939
int well_formed_error;
940
uint32_t from_offset;
942
if ((from_offset= (from_length % to_cs->mbminlen)) &&
943
(from_cs == &my_charset_bin))
946
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
947
INSERT INTO t1 (ucs2_column) VALUES (0x01);
950
uint32_t pad_length= to_cs->mbminlen - from_offset;
951
memset(to, 0, pad_length);
952
memmove(to + pad_length, from, from_offset);
955
from_length-= from_offset;
956
to+= to_cs->mbminlen;
957
to_length-= to_cs->mbminlen;
960
set_if_smaller(from_length, to_length);
961
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
962
nchars, &well_formed_error);
963
memmove(to, from, res);
964
*from_end_pos= from + res;
965
*well_formed_error_pos= well_formed_error ? from + res : NULL;
966
*cannot_convert_error_pos= NULL;
968
res+= to_cs->mbminlen;
975
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
976
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
977
const unsigned char *from_end= (const unsigned char*) from + from_length;
978
unsigned char *to_end= (unsigned char*) to + to_length;
980
*well_formed_error_pos= NULL;
981
*cannot_convert_error_pos= NULL;
983
for ( ; nchars; nchars--)
985
const char *from_prev= from;
986
if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from, from_end)) > 0)
988
else if (cnvres == MY_CS_ILSEQ)
990
if (!*well_formed_error_pos)
991
*well_formed_error_pos= from;
995
else if (cnvres > MY_CS_TOOSMALL)
998
A correct multibyte sequence detected
999
But it doesn't have Unicode mapping.
1001
if (!*cannot_convert_error_pos)
1002
*cannot_convert_error_pos= from;
1007
break; // Not enough characters
1010
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
1012
else if (cnvres == MY_CS_ILUNI && wc != '?')
1014
if (!*cannot_convert_error_pos)
1015
*cannot_convert_error_pos= from_prev;
651
1025
*from_end_pos= from;
652
*cannot_convert_error_pos= NULL;
653
*well_formed_error_pos= NULL;
657
if (to_cs == &my_charset_bin)
659
res= min(min(nchars, to_length), from_length);
660
memmove(to, from, res);
661
*from_end_pos= from + res;
662
*well_formed_error_pos= NULL;
663
*cannot_convert_error_pos= NULL;
667
int well_formed_error;
670
if ((from_offset= (from_length % to_cs->mbminlen)) &&
671
(from_cs == &my_charset_bin))
674
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
675
INSERT INTO t1 (ucs2_column) VALUES (0x01);
678
size_t pad_length= to_cs->mbminlen - from_offset;
679
memset(to, 0, pad_length);
680
memmove(to + pad_length, from, from_offset);
683
from_length-= from_offset;
684
to+= to_cs->mbminlen;
685
to_length-= to_cs->mbminlen;
688
set_if_smaller(from_length, to_length);
689
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
690
nchars, &well_formed_error);
691
memmove(to, from, res);
692
*from_end_pos= from + res;
693
*well_formed_error_pos= well_formed_error ? from + res : NULL;
694
*cannot_convert_error_pos= NULL;
696
res+= to_cs->mbminlen;
1028
return (uint32_t) res;
738
Quote the given identifier.
739
If the given identifier is empty, it will be quoted.
743
name the identifier to be appended
744
name_length length of the appending identifier
747
/* Factor the extern out */
748
extern const CHARSET_INFO *system_charset_info, *files_charset_info;
750
void String::append_identifier(const char *name, size_t in_length)
752
const char *name_end;
757
The identifier must be quoted as it includes a quote character or
761
reserve(in_length*2 + 2);
762
quote_char= (char) q;
763
append("e_char, 1, system_charset_info);
765
for (name_end= name+in_length ; name < name_end ; name+= in_length)
767
unsigned char chr= (unsigned char) *name;
768
in_length= my_mbcharlen(system_charset_info, chr);
770
my_mbcharlen can return 0 on a wrong multibyte
771
sequence. It is possible when upgrading from 4.0,
772
and identifier contains some accented characters.
773
The manual says it does not work. So we'll just
774
change length to 1 not to hang in the endless loop.
778
if (in_length == 1 && chr == (unsigned char) quote_char)
779
append("e_char, 1, system_charset_info);
780
append(name, in_length, system_charset_info);
782
append("e_char, 1, system_charset_info);
787
1068
Exchange state of this object and argument.
802
1083
std::swap(str_charset, s.str_charset);
805
void String::q_append(const size_t n)
807
int8store(Ptr + str_length, n);
810
void String::q_append(double d)
812
float8store(Ptr + str_length, d);
815
void String::q_append(double *d)
817
float8store(Ptr + str_length, *d);
820
void String::q_append(const char *data, size_t data_len)
822
memcpy(Ptr + str_length, data, data_len);
823
str_length += data_len;
826
void String::write_at_position(int position, size_t value)
828
int8store(Ptr + position,value);
830
bool check_if_only_end_space(const CHARSET_INFO * const cs, char *str,
833
return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
836
std::ostream& operator<<(std::ostream& output, const String &str)
838
output << "String:(";
839
output << const_cast<String&>(str).c_str();
841
output << str.length();
844
return output; // for multiple << operators.
847
} /* namespace drizzled */
849
bool operator==(const drizzled::String &s1, const drizzled::String &s2)
1087
bool operator==(const String &s1, const String &s2)
851
1089
return stringcmp(&s1,&s2) == 0;
854
bool operator!=(const drizzled::String &s1, const drizzled::String &s2)
1092
bool operator!=(const String &s1, const String &s2)
856
1094
return !(s1 == s2);