12
12
You should have received a copy of the GNU General Public License
13
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
16
/* This file is originally from the mysql distribution. Coded by monty */
20
#include "drizzled/internal/my_sys.h"
21
#include "drizzled/internal/m_string.h"
22
#include "drizzled/charset.h"
23
#include "drizzled/global_charset_info.h"
19
#include <mysys/my_sys.h>
20
#include <mystrings/m_string.h>
25
22
#include <algorithm>
27
#include "drizzled/sql_string.h"
34
// Converstion functions to and from std::string.
36
std::string String_to_std_string(String const& s)
38
return std::string(s.ptr(), s.length());
41
String* set_String_from_std_string(String* s, std::string const& cs)
43
s->set_ascii(cs.c_str(), cs.length());
25
The following extern declarations are ok as these are interface functions
26
required by the string function
29
extern unsigned char* sql_alloc(unsigned size);
30
extern void sql_element_free(void *ptr);
32
#include "sql_string.h"
48
34
/*****************************************************************************
49
35
** String functions
50
36
*****************************************************************************/
57
str_charset(&my_charset_bin)
61
String::String(size_t length_arg)
66
str_charset(&my_charset_bin)
68
(void) real_alloc(length_arg);
71
String::String(const char *str, const CHARSET_INFO * const cs)
72
: Ptr(const_cast<char *>(str)),
73
str_length(static_cast<size_t>(strlen(str))),
80
String::String(const char *str, size_t len, const CHARSET_INFO * const cs)
81
: Ptr(const_cast<char *>(str)),
89
String::String(char *str, size_t len, const CHARSET_INFO * const cs)
98
String::String(const String &str)
100
str_length(str.str_length),
101
Alloced_length(str.Alloced_length),
103
str_charset(str.str_charset)
107
void *String::operator new(size_t size, memory::Root *mem_root)
109
return mem_root->alloc_root(static_cast<size_t>(size));
112
String::~String() { free(); }
114
bool String::real_alloc(size_t arg_length)
38
bool String::real_alloc(uint32_t arg_length)
116
40
arg_length=ALIGN_SIZE(arg_length+1);
118
42
if (Alloced_length < arg_length)
120
if (Alloced_length > 0)
122
45
if (!(Ptr=(char*) malloc(arg_length)))
124
47
Alloced_length=arg_length;
169
92
bool String::set_int(int64_t num, bool unsigned_flag, const CHARSET_INFO * const cs)
171
size_t l=20*cs->mbmaxlen+1;
94
uint32_t l=20*cs->mbmaxlen+1;
172
95
int base= unsigned_flag ? 10 : -10;
176
str_length=(size_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
99
str_length=(uint32_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
181
bool String::set_real(double num,size_t decimals, const CHARSET_INFO * const cs)
104
bool String::set_real(double num,uint32_t decimals, const CHARSET_INFO * const cs)
183
106
char buff[FLOATING_POINT_BUFFER];
107
uint32_t dummy_errors;
188
111
if (decimals >= NOT_FIXED_DEC)
190
len= internal::my_gcvt(num,
191
internal::MY_GCVT_ARG_DOUBLE,
192
sizeof(buff) - 1, buff, NULL);
113
len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
193
114
return copy(buff, len, &my_charset_utf8_general_ci, cs, &dummy_errors);
195
len= internal::my_fcvt(num, decimals, buff, NULL);
196
return copy(buff, (size_t) len, &my_charset_utf8_general_ci, cs,
116
len= my_fcvt(num, decimals, buff, NULL);
117
return copy(buff, (uint32_t) len, &my_charset_utf8_general_ci, cs,
286
208
/* Copy with charset conversion */
288
bool String::copy(const char *str, size_t arg_length,
289
const CHARSET_INFO * const,
290
const CHARSET_INFO * const to_cs, size_t *errors)
210
bool String::copy(const char *str, uint32_t arg_length,
211
const CHARSET_INFO * const from_cs,
212
const CHARSET_INFO * const to_cs, uint32_t *errors)
293
return copy(str, arg_length, to_cs);
215
if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
218
return copy(str, arg_length, to_cs);
220
if ((from_cs == &my_charset_bin) && offset)
223
assert((from_cs == &my_charset_bin) && offset);
224
return false; //copy_aligned(str, arg_length, offset, to_cs);
226
uint32_t new_length= to_cs->mbmaxlen*arg_length;
227
if (alloc(new_length))
229
str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
230
str, arg_length, from_cs, errors);
372
326
with character set recoding
375
bool String::append(const char *s,size_t arg_length, const CHARSET_INFO * const)
329
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const cs)
377
if (realloc(str_length + arg_length))
379
memcpy(Ptr + str_length, s, arg_length);
380
str_length+= arg_length;
331
uint32_t dummy_offset;
333
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
335
uint32_t add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
336
uint32_t dummy_errors;
337
if (realloc(str_length + add_length))
339
str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
340
s, arg_length, cs, &dummy_errors);
344
if (realloc(str_length + arg_length))
346
memcpy(Ptr + str_length, s, arg_length);
347
str_length+= arg_length;
386
bool String::append_with_prefill(const char *s,size_t arg_length,
387
size_t full_length, char fill_char)
353
bool String::append_with_prefill(const char *s,uint32_t arg_length,
354
uint32_t full_length, char fill_char)
389
356
int t_length= arg_length > full_length ? arg_length : full_length;
589
555
****************************************************************************/
558
copy a string from one character set to another
563
to_cs Character set of result string
565
from_length Length of from string
566
from_cs From character set
569
'to' must be big enough as form_length * to_cs->mbmaxlen
572
length of bytes copied to 'to'
577
copy_and_convert_extended(char *to, uint32_t to_length,
578
const CHARSET_INFO * const to_cs,
579
const char *from, uint32_t from_length,
580
const CHARSET_INFO * const from_cs,
585
const unsigned char *from_end= (const unsigned char*) from+from_length;
587
unsigned char *to_end= (unsigned char*) to+to_length;
588
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
589
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
590
uint32_t error_count= 0;
594
if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from,
597
else if (cnvres == MY_CS_ILSEQ)
603
else if (cnvres > MY_CS_TOOSMALL)
606
A correct multibyte sequence detected
607
But it doesn't have Unicode mapping.
614
break; // Not enough characters
617
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
619
else if (cnvres == MY_CS_ILUNI && wc != '?')
628
*errors= error_count;
629
return (uint32_t) (to - to_start);
634
Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
637
copy_and_convert(char *to, uint32_t to_length, const CHARSET_INFO * const to_cs,
638
const char *from, uint32_t from_length,
639
const CHARSET_INFO * const from_cs, uint32_t *errors)
642
If any of the character sets is not ASCII compatible,
643
immediately switch to slow mb_wc->wc_mb method.
645
if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
646
return copy_and_convert_extended(to, to_length, to_cs,
647
from, from_length, from_cs, errors);
649
uint32_t length= cmin(to_length, from_length), length2= length;
651
#if defined(__i386__)
653
Special loop for i386, it allows to refer to a
654
non-aligned memory block as UINT32, which makes
655
it possible to copy four bytes at once. This
656
gives about 10% performance improvement comparing
657
to byte-by-byte loop.
659
for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
661
if ((*(uint32_t*)from) & 0x80808080)
663
*((uint32_t*) to)= *((const uint32_t*) from);
667
for (; ; *to++= *from++, length--)
674
if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
676
uint32_t copied_length= length2 - length;
677
to_length-= copied_length;
678
from_length-= copied_length;
679
return copied_length + copy_and_convert_extended(to, to_length,
688
return 0; // Make compiler happy
694
Copy string with HEX-encoding of "bad" characters.
696
@details This functions copies the string pointed by "src"
697
to the string pointed by "dst". Not more than "srclen" bytes
698
are read from "src". Any sequences of bytes representing
699
a not-well-formed substring (according to cs) are hex-encoded,
700
and all well-formed substrings (according to cs) are copied as is.
701
Not more than "dstlen" bytes are written to "dst". The number
702
of bytes written to "dst" is returned.
704
@param cs character set pointer of the destination string
705
@param[out] dst destination string
706
@param dstlen size of dst
707
@param src source string
708
@param srclen length of src
710
@retval result length
714
my_copy_with_hex_escaping(const CHARSET_INFO * const cs,
715
char *dst, size_t dstlen,
716
const char *src, size_t srclen)
718
const char *srcend= src + srclen;
721
for ( ; src < srcend ; )
724
if ((chlen= my_ismbchar(cs, src, srcend)))
727
break; /* purecov: inspected */
728
memcpy(dst, src, chlen);
733
else if (*src & 0x80)
736
break; /* purecov: inspected */
739
*dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
740
*dst++= _dig_vec_upper[((unsigned char) *src) & 15];
747
break; /* purecov: inspected */
593
757
with optional character set conversion,
594
758
with optional left padding (for binary -> UCS2 conversion)
619
783
well_formed_copy_nchars(const CHARSET_INFO * const to_cs,
620
char *to, size_t to_length,
784
char *to, uint32_t to_length,
621
785
const CHARSET_INFO * const from_cs,
622
const char *from, size_t from_length,
786
const char *from, uint32_t from_length,
624
788
const char **well_formed_error_pos,
625
789
const char **cannot_convert_error_pos,
626
790
const char **from_end_pos)
630
assert((to_cs == &my_charset_bin) ||
631
(from_cs == &my_charset_bin) ||
632
(to_cs == from_cs) ||
633
my_charset_same(from_cs, to_cs));
635
if (to_length < to_cs->mbminlen || !nchars)
794
if ((to_cs == &my_charset_bin) ||
795
(from_cs == &my_charset_bin) ||
796
(to_cs == from_cs) ||
797
my_charset_same(from_cs, to_cs))
799
if (to_length < to_cs->mbminlen || !nchars)
802
*cannot_convert_error_pos= NULL;
803
*well_formed_error_pos= NULL;
807
if (to_cs == &my_charset_bin)
809
res= cmin(cmin(nchars, to_length), from_length);
810
memmove(to, from, res);
811
*from_end_pos= from + res;
812
*well_formed_error_pos= NULL;
813
*cannot_convert_error_pos= NULL;
817
int well_formed_error;
818
uint32_t from_offset;
820
if ((from_offset= (from_length % to_cs->mbminlen)) &&
821
(from_cs == &my_charset_bin))
824
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
825
INSERT INTO t1 (ucs2_column) VALUES (0x01);
828
uint32_t pad_length= to_cs->mbminlen - from_offset;
829
memset(to, 0, pad_length);
830
memmove(to + pad_length, from, from_offset);
833
from_length-= from_offset;
834
to+= to_cs->mbminlen;
835
to_length-= to_cs->mbminlen;
838
set_if_smaller(from_length, to_length);
839
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
840
nchars, &well_formed_error);
841
memmove(to, from, res);
842
*from_end_pos= from + res;
843
*well_formed_error_pos= well_formed_error ? from + res : NULL;
844
*cannot_convert_error_pos= NULL;
846
res+= to_cs->mbminlen;
853
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
854
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
855
const unsigned char *from_end= (const unsigned char*) from + from_length;
856
unsigned char *to_end= (unsigned char*) to + to_length;
858
*well_formed_error_pos= NULL;
859
*cannot_convert_error_pos= NULL;
861
for ( ; nchars; nchars--)
863
const char *from_prev= from;
864
if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from, from_end)) > 0)
866
else if (cnvres == MY_CS_ILSEQ)
868
if (!*well_formed_error_pos)
869
*well_formed_error_pos= from;
873
else if (cnvres > MY_CS_TOOSMALL)
876
A correct multibyte sequence detected
877
But it doesn't have Unicode mapping.
879
if (!*cannot_convert_error_pos)
880
*cannot_convert_error_pos= from;
885
break; // Not enough characters
888
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
890
else if (cnvres == MY_CS_ILUNI && wc != '?')
892
if (!*cannot_convert_error_pos)
893
*cannot_convert_error_pos= from_prev;
637
903
*from_end_pos= from;
638
*cannot_convert_error_pos= NULL;
639
*well_formed_error_pos= NULL;
643
if (to_cs == &my_charset_bin)
645
res= min(min(nchars, to_length), from_length);
646
memmove(to, from, res);
647
*from_end_pos= from + res;
648
*well_formed_error_pos= NULL;
649
*cannot_convert_error_pos= NULL;
653
int well_formed_error;
656
if ((from_offset= (from_length % to_cs->mbminlen)) &&
657
(from_cs == &my_charset_bin))
660
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
661
INSERT INTO t1 (ucs2_column) VALUES (0x01);
664
size_t pad_length= to_cs->mbminlen - from_offset;
665
memset(to, 0, pad_length);
666
memmove(to + pad_length, from, from_offset);
669
from_length-= from_offset;
670
to+= to_cs->mbminlen;
671
to_length-= to_cs->mbminlen;
674
set_if_smaller(from_length, to_length);
675
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
676
nchars, &well_formed_error);
677
memmove(to, from, res);
678
*from_end_pos= from + res;
679
*well_formed_error_pos= well_formed_error ? from + res : NULL;
680
*cannot_convert_error_pos= NULL;
682
res+= to_cs->mbminlen;
906
return (uint32_t) res;
788
1009
std::swap(str_charset, s.str_charset);
791
void String::q_append(const size_t n)
793
int8store(Ptr + str_length, n);
796
void String::q_append(double d)
798
float8store(Ptr + str_length, d);
801
void String::q_append(double *d)
803
float8store(Ptr + str_length, *d);
806
void String::q_append(const char *data, size_t data_len)
808
memcpy(Ptr + str_length, data, data_len);
809
str_length += data_len;
812
void String::write_at_position(int position, size_t value)
814
int8store(Ptr + position,value);
816
bool check_if_only_end_space(const CHARSET_INFO * const cs, char *str,
819
return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
822
} /* namespace drizzled */
824
bool operator==(const drizzled::String &s1, const drizzled::String &s2)
1013
bool operator==(const String &s1, const String &s2)
826
1015
return stringcmp(&s1,&s2) == 0;
829
bool operator!=(const drizzled::String &s1, const drizzled::String &s2)
1018
bool operator!=(const String &s1, const String &s2)
831
1020
return !(s1 == s2);