12
12
You should have received a copy of the GNU General Public License
13
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
16
/* This file is originally from the mysql distribution. Coded by monty */
20
#include <drizzled/internal/my_sys.h>
21
#include <drizzled/internal/m_string.h>
22
#include <drizzled/charset.h>
23
#include <drizzled/global_charset_info.h>
19
#include <mysys/my_sys.h>
20
#include <mystrings/m_string.h>
25
22
#include <algorithm>
27
#include <drizzled/sql_string.h>
34
// Converstion functions to and from std::string.
36
std::string String_to_std_string(String const& s)
38
return std::string(s.ptr(), s.length());
41
String* set_String_from_std_string(String* s, std::string const& cs)
43
s->set_ascii(cs.c_str(), cs.length());
25
The following extern declarations are ok as these are interface functions
26
required by the string function
29
extern unsigned char* sql_alloc(unsigned size);
30
extern void sql_element_free(void *ptr);
32
#include "sql_string.h"
48
34
/*****************************************************************************
49
35
** String functions
50
36
*****************************************************************************/
57
str_charset(&my_charset_bin)
61
String::String(size_t length_arg)
66
str_charset(&my_charset_bin)
68
(void) real_alloc(length_arg);
71
String::String(const char *str, const CHARSET_INFO * const cs)
72
: Ptr(const_cast<char *>(str)),
73
str_length(static_cast<size_t>(strlen(str))),
80
String::String(const char *str, size_t len, const CHARSET_INFO * const cs)
81
: Ptr(const_cast<char *>(str)),
89
String::String(char *str, size_t len, const CHARSET_INFO * const cs)
98
String::String(const String &str)
100
str_length(str.str_length),
101
Alloced_length(str.Alloced_length),
103
str_charset(str.str_charset)
107
void *String::operator new(size_t size, memory::Root *mem_root)
109
return mem_root->alloc_root(static_cast<size_t>(size));
112
String::~String() { free(); }
114
bool String::real_alloc(size_t arg_length)
38
bool String::real_alloc(uint32_t arg_length)
116
40
arg_length=ALIGN_SIZE(arg_length+1);
118
42
if (Alloced_length < arg_length)
120
if (Alloced_length > 0)
122
45
if (!(Ptr=(char*) malloc(arg_length)))
124
47
Alloced_length=arg_length;
169
92
bool String::set_int(int64_t num, bool unsigned_flag, const CHARSET_INFO * const cs)
171
size_t l=20*cs->mbmaxlen+1;
94
uint32_t l=20*cs->mbmaxlen+1;
172
95
int base= unsigned_flag ? 10 : -10;
176
str_length=(size_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
99
str_length=(uint32_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
181
bool String::set_real(double num,size_t decimals, const CHARSET_INFO * const cs)
104
bool String::set_real(double num,uint32_t decimals, const CHARSET_INFO * const cs)
183
106
char buff[FLOATING_POINT_BUFFER];
107
uint32_t dummy_errors;
188
111
if (decimals >= NOT_FIXED_DEC)
190
len= internal::my_gcvt(num,
191
internal::MY_GCVT_ARG_DOUBLE,
192
sizeof(buff) - 1, buff, NULL);
113
len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
193
114
return copy(buff, len, &my_charset_utf8_general_ci, cs, &dummy_errors);
195
len= internal::my_fcvt(num, decimals, buff, NULL);
196
return copy(buff, (size_t) len, &my_charset_utf8_general_ci, cs,
116
len= my_fcvt(num, decimals, buff, NULL);
117
return copy(buff, (uint32_t) len, &my_charset_utf8_general_ci, cs,
300
208
/* Copy with charset conversion */
302
bool String::copy(const char *str, size_t arg_length,
303
const CHARSET_INFO * const,
304
const CHARSET_INFO * const to_cs, size_t *errors)
210
bool String::copy(const char *str, uint32_t arg_length,
211
const CHARSET_INFO * const from_cs,
212
const CHARSET_INFO * const to_cs, uint32_t *errors)
307
return copy(str, arg_length, to_cs);
215
if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
218
return copy(str, arg_length, to_cs);
220
if ((from_cs == &my_charset_bin) && offset)
223
assert((from_cs == &my_charset_bin) && offset);
224
return false; //copy_aligned(str, arg_length, offset, to_cs);
226
uint32_t new_length= to_cs->mbmaxlen*arg_length;
227
if (alloc(new_length))
229
str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
230
str, arg_length, from_cs, errors);
386
326
with character set recoding
389
bool String::append(const char *s,size_t arg_length, const CHARSET_INFO * const)
329
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const cs)
391
if (realloc(str_length + arg_length))
393
memcpy(Ptr + str_length, s, arg_length);
394
str_length+= arg_length;
331
uint32_t dummy_offset;
333
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
335
uint32_t add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
336
uint32_t dummy_errors;
337
if (realloc(str_length + add_length))
339
str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
340
s, arg_length, cs, &dummy_errors);
344
if (realloc(str_length + arg_length))
346
memcpy(Ptr + str_length, s, arg_length);
347
str_length+= arg_length;
400
bool String::append_with_prefill(const char *s,size_t arg_length,
401
size_t full_length, char fill_char)
353
bool String::append_with_prefill(const char *s,uint32_t arg_length,
354
uint32_t full_length, char fill_char)
403
356
int t_length= arg_length > full_length ? arg_length : full_length;
603
555
****************************************************************************/
558
copy a string from one character set to another
563
to_cs Character set of result string
565
from_length Length of from string
566
from_cs From character set
569
'to' must be big enough as form_length * to_cs->mbmaxlen
572
length of bytes copied to 'to'
577
copy_and_convert_extended(char *to, uint32_t to_length,
578
const CHARSET_INFO * const to_cs,
579
const char *from, uint32_t from_length,
580
const CHARSET_INFO * const from_cs,
585
const unsigned char *from_end= (const unsigned char*) from+from_length;
587
unsigned char *to_end= (unsigned char*) to+to_length;
588
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
589
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
590
uint32_t error_count= 0;
594
if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from,
597
else if (cnvres == MY_CS_ILSEQ)
603
else if (cnvres > MY_CS_TOOSMALL)
606
A correct multibyte sequence detected
607
But it doesn't have Unicode mapping.
614
break; // Not enough characters
617
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
619
else if (cnvres == MY_CS_ILUNI && wc != '?')
628
*errors= error_count;
629
return (uint32_t) (to - to_start);
634
Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
637
copy_and_convert(char *to, uint32_t to_length, const CHARSET_INFO * const to_cs,
638
const char *from, uint32_t from_length,
639
const CHARSET_INFO * const from_cs, uint32_t *errors)
642
If any of the character sets is not ASCII compatible,
643
immediately switch to slow mb_wc->wc_mb method.
645
if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
646
return copy_and_convert_extended(to, to_length, to_cs,
647
from, from_length, from_cs, errors);
649
uint32_t length= cmin(to_length, from_length), length2= length;
651
#if defined(__i386__)
653
Special loop for i386, it allows to refer to a
654
non-aligned memory block as UINT32, which makes
655
it possible to copy four bytes at once. This
656
gives about 10% performance improvement comparing
657
to byte-by-byte loop.
659
for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
661
if ((*(uint32_t*)from) & 0x80808080)
663
*((uint32_t*) to)= *((const uint32_t*) from);
667
for (; ; *to++= *from++, length--)
674
if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
676
uint32_t copied_length= length2 - length;
677
to_length-= copied_length;
678
from_length-= copied_length;
679
return copied_length + copy_and_convert_extended(to, to_length,
688
return 0; // Make compiler happy
694
Copy string with HEX-encoding of "bad" characters.
696
@details This functions copies the string pointed by "src"
697
to the string pointed by "dst". Not more than "srclen" bytes
698
are read from "src". Any sequences of bytes representing
699
a not-well-formed substring (according to cs) are hex-encoded,
700
and all well-formed substrings (according to cs) are copied as is.
701
Not more than "dstlen" bytes are written to "dst". The number
702
of bytes written to "dst" is returned.
704
@param cs character set pointer of the destination string
705
@param[out] dst destination string
706
@param dstlen size of dst
707
@param src source string
708
@param srclen length of src
710
@retval result length
714
my_copy_with_hex_escaping(const CHARSET_INFO * const cs,
715
char *dst, size_t dstlen,
716
const char *src, size_t srclen)
718
const char *srcend= src + srclen;
721
for ( ; src < srcend ; )
724
if ((chlen= my_ismbchar(cs, src, srcend)))
727
break; /* purecov: inspected */
728
memcpy(dst, src, chlen);
733
else if (*src & 0x80)
736
break; /* purecov: inspected */
739
*dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
740
*dst++= _dig_vec_upper[((unsigned char) *src) & 15];
747
break; /* purecov: inspected */
607
757
with optional character set conversion,
608
758
with optional left padding (for binary -> UCS2 conversion)
633
783
well_formed_copy_nchars(const CHARSET_INFO * const to_cs,
634
char *to, size_t to_length,
784
char *to, uint32_t to_length,
635
785
const CHARSET_INFO * const from_cs,
636
const char *from, size_t from_length,
786
const char *from, uint32_t from_length,
638
788
const char **well_formed_error_pos,
639
789
const char **cannot_convert_error_pos,
640
790
const char **from_end_pos)
644
assert((to_cs == &my_charset_bin) ||
645
(from_cs == &my_charset_bin) ||
646
(to_cs == from_cs) ||
647
my_charset_same(from_cs, to_cs));
649
if (to_length < to_cs->mbminlen || !nchars)
794
if ((to_cs == &my_charset_bin) ||
795
(from_cs == &my_charset_bin) ||
796
(to_cs == from_cs) ||
797
my_charset_same(from_cs, to_cs))
799
if (to_length < to_cs->mbminlen || !nchars)
802
*cannot_convert_error_pos= NULL;
803
*well_formed_error_pos= NULL;
807
if (to_cs == &my_charset_bin)
809
res= cmin(cmin(nchars, to_length), from_length);
810
memmove(to, from, res);
811
*from_end_pos= from + res;
812
*well_formed_error_pos= NULL;
813
*cannot_convert_error_pos= NULL;
817
int well_formed_error;
818
uint32_t from_offset;
820
if ((from_offset= (from_length % to_cs->mbminlen)) &&
821
(from_cs == &my_charset_bin))
824
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
825
INSERT INTO t1 (ucs2_column) VALUES (0x01);
828
uint32_t pad_length= to_cs->mbminlen - from_offset;
829
memset(to, 0, pad_length);
830
memmove(to + pad_length, from, from_offset);
833
from_length-= from_offset;
834
to+= to_cs->mbminlen;
835
to_length-= to_cs->mbminlen;
838
set_if_smaller(from_length, to_length);
839
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
840
nchars, &well_formed_error);
841
memmove(to, from, res);
842
*from_end_pos= from + res;
843
*well_formed_error_pos= well_formed_error ? from + res : NULL;
844
*cannot_convert_error_pos= NULL;
846
res+= to_cs->mbminlen;
853
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
854
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
855
const unsigned char *from_end= (const unsigned char*) from + from_length;
856
unsigned char *to_end= (unsigned char*) to + to_length;
858
*well_formed_error_pos= NULL;
859
*cannot_convert_error_pos= NULL;
861
for ( ; nchars; nchars--)
863
const char *from_prev= from;
864
if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from, from_end)) > 0)
866
else if (cnvres == MY_CS_ILSEQ)
868
if (!*well_formed_error_pos)
869
*well_formed_error_pos= from;
873
else if (cnvres > MY_CS_TOOSMALL)
876
A correct multibyte sequence detected
877
But it doesn't have Unicode mapping.
879
if (!*cannot_convert_error_pos)
880
*cannot_convert_error_pos= from;
885
break; // Not enough characters
888
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
890
else if (cnvres == MY_CS_ILUNI && wc != '?')
892
if (!*cannot_convert_error_pos)
893
*cannot_convert_error_pos= from_prev;
651
903
*from_end_pos= from;
652
*cannot_convert_error_pos= NULL;
653
*well_formed_error_pos= NULL;
657
if (to_cs == &my_charset_bin)
659
res= min(min(nchars, to_length), from_length);
660
memmove(to, from, res);
661
*from_end_pos= from + res;
662
*well_formed_error_pos= NULL;
663
*cannot_convert_error_pos= NULL;
667
int well_formed_error;
670
if ((from_offset= (from_length % to_cs->mbminlen)) &&
671
(from_cs == &my_charset_bin))
674
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
675
INSERT INTO t1 (ucs2_column) VALUES (0x01);
678
size_t pad_length= to_cs->mbminlen - from_offset;
679
memset(to, 0, pad_length);
680
memmove(to + pad_length, from, from_offset);
683
from_length-= from_offset;
684
to+= to_cs->mbminlen;
685
to_length-= to_cs->mbminlen;
688
set_if_smaller(from_length, to_length);
689
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
690
nchars, &well_formed_error);
691
memmove(to, from, res);
692
*from_end_pos= from + res;
693
*well_formed_error_pos= well_formed_error ? from + res : NULL;
694
*cannot_convert_error_pos= NULL;
696
res+= to_cs->mbminlen;
906
return (uint32_t) res;
802
1009
std::swap(str_charset, s.str_charset);
805
void String::q_append(const size_t n)
807
int8store(Ptr + str_length, n);
810
void String::q_append(double d)
812
float8store(Ptr + str_length, d);
815
void String::q_append(double *d)
817
float8store(Ptr + str_length, *d);
820
void String::q_append(const char *data, size_t data_len)
822
memcpy(Ptr + str_length, data, data_len);
823
str_length += data_len;
826
void String::write_at_position(int position, size_t value)
828
int8store(Ptr + position,value);
830
bool check_if_only_end_space(const CHARSET_INFO * const cs, char *str,
833
return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
836
std::ostream& operator<<(std::ostream& output, const String &str)
838
output << "String:(";
839
output << const_cast<String&>(str).c_str();
841
output << str.length();
844
return output; // for multiple << operators.
847
} /* namespace drizzled */
849
bool operator==(const drizzled::String &s1, const drizzled::String &s2)
1013
bool operator==(const String &s1, const String &s2)
851
1015
return stringcmp(&s1,&s2) == 0;
854
bool operator!=(const drizzled::String &s1, const drizzled::String &s2)
1018
bool operator!=(const String &s1, const String &s2)
856
1020
return !(s1 == s2);