16
16
/* This file is originally from the mysql distribution. Coded by monty */
20
#include "drizzled/internal/my_sys.h"
21
#include "drizzled/internal/m_string.h"
22
#include "drizzled/charset.h"
23
#include "drizzled/global_charset_info.h"
19
#include <mysys/my_sys.h>
20
#include <mystrings/m_string.h>
25
22
#include <algorithm>
27
#include "drizzled/sql_string.h"
34
// Converstion functions to and from std::string.
36
std::string String_to_std_string(String const& s)
38
return std::string(s.ptr(), s.length());
41
String* set_String_from_std_string(String* s, std::string const& cs)
43
s->set_ascii(cs.c_str(), cs.length());
25
The following extern declarations are ok as these are interface functions
26
required by the string function
29
extern unsigned char* sql_alloc(unsigned size);
30
extern void sql_element_free(void *ptr);
32
#include "sql_string.h"
48
34
/*****************************************************************************
49
35
** String functions
50
36
*****************************************************************************/
57
str_charset(&my_charset_bin)
61
String::String(uint32_t length_arg)
66
str_charset(&my_charset_bin)
68
(void) real_alloc(length_arg);
71
String::String(const char *str, const CHARSET_INFO * const cs)
72
: Ptr(const_cast<char *>(str)),
73
str_length(static_cast<uint32_t>(strlen(str))),
80
String::String(const char *str, uint32_t len, const CHARSET_INFO * const cs)
81
: Ptr(const_cast<char *>(str)),
89
String::String(char *str, uint32_t len, const CHARSET_INFO * const cs)
98
String::String(const String &str)
100
str_length(str.str_length),
101
Alloced_length(str.Alloced_length),
103
str_charset(str.str_charset)
107
void *String::operator new(size_t size, memory::Root *mem_root)
109
return alloc_root(mem_root, static_cast<uint32_t>(size));
112
String::~String() { free(); }
114
38
bool String::real_alloc(uint32_t arg_length)
116
40
arg_length=ALIGN_SIZE(arg_length+1);
187
111
if (decimals >= NOT_FIXED_DEC)
189
len= internal::my_gcvt(num,
190
internal::MY_GCVT_ARG_DOUBLE,
191
sizeof(buff) - 1, buff, NULL);
113
len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
192
114
return copy(buff, len, &my_charset_utf8_general_ci, cs, &dummy_errors);
194
len= internal::my_fcvt(num, decimals, buff, NULL);
116
len= my_fcvt(num, decimals, buff, NULL);
195
117
return copy(buff, (uint32_t) len, &my_charset_utf8_general_ci, cs,
195
Copy a multi-byte character sets with adding leading zeros.
201
arg_length Length of string. This should NOT be dividable with
203
offset arg_length % cs->mb_minlength
204
cs Character set for 'str'
207
For real multi-byte, ascii incompatible charactser sets,
208
like UCS-2, add leading zeros if we have an incomplete character.
211
will automatically be converted into
219
bool String::copy_aligned(const char *str,uint32_t arg_length, uint32_t offset,
220
const CHARSET_INFO * const cs)
222
/* How many bytes are in incomplete character */
223
offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */
224
assert(offset && offset != cs->mbmaxlen);
226
uint32_t aligned_length= arg_length + offset;
227
if (alloc(aligned_length))
231
Note, this is only safe for big-endian UCS-2.
232
If we add little-endian UCS-2 sometimes, this code
233
will be more complicated. But it's OK for now.
235
memset(Ptr, 0, offset);
236
memcpy(Ptr + offset, str, arg_length);
237
Ptr[aligned_length]=0;
238
/* str_length is always >= 0 as arg_length is != 0 */
239
str_length= aligned_length;
274
245
bool String::set_or_copy_aligned(const char *str,uint32_t arg_length,
275
246
const CHARSET_INFO * const cs)
277
248
/* How many bytes are in incomplete character */
278
uint32_t offset= (arg_length % cs->mbminlen);
280
assert(!offset); /* All characters are complete, just copy */
282
set(str, arg_length, cs);
249
uint32_t offset= (arg_length % cs->mbminlen);
251
if (!offset) /* All characters are complete, just copy */
253
set(str, arg_length, cs);
256
return copy_aligned(str, arg_length, offset, cs);
286
259
/* Copy with charset conversion */
288
261
bool String::copy(const char *str, uint32_t arg_length,
289
const CHARSET_INFO * const,
262
const CHARSET_INFO * const from_cs,
290
263
const CHARSET_INFO * const to_cs, uint32_t *errors)
293
return copy(str, arg_length, to_cs);
266
if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
269
return copy(str, arg_length, to_cs);
271
if ((from_cs == &my_charset_bin) && offset)
274
return copy_aligned(str, arg_length, offset, to_cs);
276
uint32_t new_length= to_cs->mbmaxlen*arg_length;
277
if (alloc(new_length))
279
str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
280
str, arg_length, from_cs, errors);
298
287
Set a string to the value of a latin1-string, keeping the original charset
302
291
str String of a simple charset (latin1)
372
393
with character set recoding
375
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const)
377
if (realloc(str_length + arg_length))
379
memcpy(Ptr + str_length, s, arg_length);
380
str_length+= arg_length;
396
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const cs)
398
uint32_t dummy_offset;
400
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
402
uint32_t add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
403
uint32_t dummy_errors;
404
if (realloc(str_length + add_length))
406
str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
407
s, arg_length, cs, &dummy_errors);
411
if (realloc(str_length + arg_length))
413
memcpy(Ptr + str_length, s, arg_length);
414
str_length+= arg_length;
420
bool String::append(IO_CACHE* file, uint32_t arg_length)
422
if (realloc(str_length+arg_length))
424
if (my_b_read(file, (unsigned char*) Ptr + str_length, arg_length))
429
str_length+=arg_length;
386
433
bool String::append_with_prefill(const char *s,uint32_t arg_length,
387
434
uint32_t full_length, char fill_char)
561
// added by Holyfoot for "geometry" needs
562
int String::reserve(uint32_t space_needed, uint32_t grow_by)
564
if (Alloced_length < str_length + space_needed)
566
if (realloc(Alloced_length + cmax(space_needed, grow_by) - 1))
572
void String::qs_append(const char *str, uint32_t len)
574
memcpy(Ptr + str_length, str, len + 1);
578
void String::qs_append(double d)
580
char *buff = Ptr + str_length;
581
str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff, NULL);
584
void String::qs_append(double *d)
587
float8get(ld, (char*) d);
591
void String::qs_append(int i)
593
char *buff= Ptr + str_length;
594
char *end= int10_to_str(i, buff, -10);
595
str_length+= (int) (end-buff);
598
void String::qs_append(uint32_t i)
600
char *buff= Ptr + str_length;
601
char *end= int10_to_str(i, buff, 10);
602
str_length+= (int) (end-buff);
517
606
Compare strings according to collation, without end space.
589
678
****************************************************************************/
681
copy a string from one character set to another
686
to_cs Character set of result string
688
from_length Length of from string
689
from_cs From character set
692
'to' must be big enough as form_length * to_cs->mbmaxlen
695
length of bytes copied to 'to'
700
copy_and_convert_extended(char *to, uint32_t to_length,
701
const CHARSET_INFO * const to_cs,
702
const char *from, uint32_t from_length,
703
const CHARSET_INFO * const from_cs,
708
const unsigned char *from_end= (const unsigned char*) from+from_length;
710
unsigned char *to_end= (unsigned char*) to+to_length;
711
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
712
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
713
uint32_t error_count= 0;
717
if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from,
720
else if (cnvres == MY_CS_ILSEQ)
726
else if (cnvres > MY_CS_TOOSMALL)
729
A correct multibyte sequence detected
730
But it doesn't have Unicode mapping.
737
break; // Not enough characters
740
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
742
else if (cnvres == MY_CS_ILUNI && wc != '?')
751
*errors= error_count;
752
return (uint32_t) (to - to_start);
757
Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
760
copy_and_convert(char *to, uint32_t to_length, const CHARSET_INFO * const to_cs,
761
const char *from, uint32_t from_length,
762
const CHARSET_INFO * const from_cs, uint32_t *errors)
765
If any of the character sets is not ASCII compatible,
766
immediately switch to slow mb_wc->wc_mb method.
768
if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
769
return copy_and_convert_extended(to, to_length, to_cs,
770
from, from_length, from_cs, errors);
772
uint32_t length= cmin(to_length, from_length), length2= length;
774
#if defined(__i386__)
776
Special loop for i386, it allows to refer to a
777
non-aligned memory block as UINT32, which makes
778
it possible to copy four bytes at once. This
779
gives about 10% performance improvement comparing
780
to byte-by-byte loop.
782
for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
784
if ((*(uint32_t*)from) & 0x80808080)
786
*((uint32_t*) to)= *((const uint32_t*) from);
790
for (; ; *to++= *from++, length--)
797
if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
799
uint32_t copied_length= length2 - length;
800
to_length-= copied_length;
801
from_length-= copied_length;
802
return copied_length + copy_and_convert_extended(to, to_length,
810
assert(false); // Should never get to here
811
return 0; // Make compiler happy
816
Copy string with HEX-encoding of "bad" characters.
818
@details This functions copies the string pointed by "src"
819
to the string pointed by "dst". Not more than "srclen" bytes
820
are read from "src". Any sequences of bytes representing
821
a not-well-formed substring (according to cs) are hex-encoded,
822
and all well-formed substrings (according to cs) are copied as is.
823
Not more than "dstlen" bytes are written to "dst". The number
824
of bytes written to "dst" is returned.
826
@param cs character set pointer of the destination string
827
@param[out] dst destination string
828
@param dstlen size of dst
829
@param src source string
830
@param srclen length of src
832
@retval result length
836
my_copy_with_hex_escaping(const CHARSET_INFO * const cs,
837
char *dst, size_t dstlen,
838
const char *src, size_t srclen)
840
const char *srcend= src + srclen;
843
for ( ; src < srcend ; )
846
if ((chlen= my_ismbchar(cs, src, srcend)))
849
break; /* purecov: inspected */
850
memcpy(dst, src, chlen);
855
else if (*src & 0x80)
858
break; /* purecov: inspected */
861
*dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
862
*dst++= _dig_vec_upper[((unsigned char) *src) & 15];
869
break; /* purecov: inspected */
593
879
with optional character set conversion,
594
880
with optional left padding (for binary -> UCS2 conversion)
597
883
well_formed_copy_nchars()
598
884
to Store result here
630
assert((to_cs == &my_charset_bin) ||
631
(from_cs == &my_charset_bin) ||
632
(to_cs == from_cs) ||
633
my_charset_same(from_cs, to_cs));
635
if (to_length < to_cs->mbminlen || !nchars)
916
if ((to_cs == &my_charset_bin) ||
917
(from_cs == &my_charset_bin) ||
918
(to_cs == from_cs) ||
919
my_charset_same(from_cs, to_cs))
921
if (to_length < to_cs->mbminlen || !nchars)
924
*cannot_convert_error_pos= NULL;
925
*well_formed_error_pos= NULL;
929
if (to_cs == &my_charset_bin)
931
res= cmin(cmin(nchars, to_length), from_length);
932
memmove(to, from, res);
933
*from_end_pos= from + res;
934
*well_formed_error_pos= NULL;
935
*cannot_convert_error_pos= NULL;
939
int well_formed_error;
940
uint32_t from_offset;
942
if ((from_offset= (from_length % to_cs->mbminlen)) &&
943
(from_cs == &my_charset_bin))
946
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
947
INSERT INTO t1 (ucs2_column) VALUES (0x01);
950
uint32_t pad_length= to_cs->mbminlen - from_offset;
951
memset(to, 0, pad_length);
952
memmove(to + pad_length, from, from_offset);
955
from_length-= from_offset;
956
to+= to_cs->mbminlen;
957
to_length-= to_cs->mbminlen;
960
set_if_smaller(from_length, to_length);
961
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
962
nchars, &well_formed_error);
963
memmove(to, from, res);
964
*from_end_pos= from + res;
965
*well_formed_error_pos= well_formed_error ? from + res : NULL;
966
*cannot_convert_error_pos= NULL;
968
res+= to_cs->mbminlen;
975
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
976
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
977
const unsigned char *from_end= (const unsigned char*) from + from_length;
978
unsigned char *to_end= (unsigned char*) to + to_length;
980
*well_formed_error_pos= NULL;
981
*cannot_convert_error_pos= NULL;
983
for ( ; nchars; nchars--)
985
const char *from_prev= from;
986
if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from, from_end)) > 0)
988
else if (cnvres == MY_CS_ILSEQ)
990
if (!*well_formed_error_pos)
991
*well_formed_error_pos= from;
995
else if (cnvres > MY_CS_TOOSMALL)
998
A correct multibyte sequence detected
999
But it doesn't have Unicode mapping.
1001
if (!*cannot_convert_error_pos)
1002
*cannot_convert_error_pos= from;
1007
break; // Not enough characters
1010
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
1012
else if (cnvres == MY_CS_ILUNI && wc != '?')
1014
if (!*cannot_convert_error_pos)
1015
*cannot_convert_error_pos= from_prev;
637
1025
*from_end_pos= from;
638
*cannot_convert_error_pos= NULL;
639
*well_formed_error_pos= NULL;
643
if (to_cs == &my_charset_bin)
645
res= min(min(nchars, to_length), from_length);
646
memmove(to, from, res);
647
*from_end_pos= from + res;
648
*well_formed_error_pos= NULL;
649
*cannot_convert_error_pos= NULL;
653
int well_formed_error;
654
uint32_t from_offset;
656
if ((from_offset= (from_length % to_cs->mbminlen)) &&
657
(from_cs == &my_charset_bin))
660
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
661
INSERT INTO t1 (ucs2_column) VALUES (0x01);
664
uint32_t pad_length= to_cs->mbminlen - from_offset;
665
memset(to, 0, pad_length);
666
memmove(to + pad_length, from, from_offset);
669
from_length-= from_offset;
670
to+= to_cs->mbminlen;
671
to_length-= to_cs->mbminlen;
674
set_if_smaller(from_length, to_length);
675
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
676
nchars, &well_formed_error);
677
memmove(to, from, res);
678
*from_end_pos= from + res;
679
*well_formed_error_pos= well_formed_error ? from + res : NULL;
680
*cannot_convert_error_pos= NULL;
682
res+= to_cs->mbminlen;
1028
return (uint32_t) res;
724
Quote the given identifier.
725
If the given identifier is empty, it will be quoted.
729
name the identifier to be appended
730
name_length length of the appending identifier
733
/* Factor the extern out */
734
extern const CHARSET_INFO *system_charset_info, *files_charset_info;
736
void String::append_identifier(const char *name, uint32_t in_length)
738
const char *name_end;
743
The identifier must be quoted as it includes a quote character or
747
reserve(in_length*2 + 2);
748
quote_char= (char) q;
749
append("e_char, 1, system_charset_info);
751
for (name_end= name+in_length ; name < name_end ; name+= in_length)
753
unsigned char chr= (unsigned char) *name;
754
in_length= my_mbcharlen(system_charset_info, chr);
756
my_mbcharlen can return 0 on a wrong multibyte
757
sequence. It is possible when upgrading from 4.0,
758
and identifier contains some accented characters.
759
The manual says it does not work. So we'll just
760
change length to 1 not to hang in the endless loop.
764
if (in_length == 1 && chr == (unsigned char) quote_char)
765
append("e_char, 1, system_charset_info);
766
append(name, in_length, system_charset_info);
768
append("e_char, 1, system_charset_info);
773
1068
Exchange state of this object and argument.
787
1082
std::swap(alloced, s.alloced);
788
1083
std::swap(str_charset, s.str_charset);
791
void String::q_append(const uint32_t n)
793
int4store(Ptr + str_length, n);
796
void String::q_append(double d)
798
float8store(Ptr + str_length, d);
801
void String::q_append(double *d)
803
float8store(Ptr + str_length, *d);
806
void String::q_append(const char *data, uint32_t data_len)
808
memcpy(Ptr + str_length, data, data_len);
809
str_length += data_len;
812
void String::write_at_position(int position, uint32_t value)
814
int4store(Ptr + position,value);
816
bool check_if_only_end_space(const CHARSET_INFO * const cs, char *str,
819
return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
822
} /* namespace drizzled */
824
bool operator==(const drizzled::String &s1, const drizzled::String &s2)
826
return stringcmp(&s1,&s2) == 0;
829
bool operator!=(const drizzled::String &s1, const drizzled::String &s2)