12
12
You should have received a copy of the GNU General Public License
13
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
16
/* This file is originally from the mysql distribution. Coded by monty */
20
#include "drizzled/internal/my_sys.h"
21
#include "drizzled/internal/m_string.h"
22
#include "drizzled/charset.h"
23
#include "drizzled/global_charset_info.h"
19
#include <mysys/my_sys.h>
20
#include <mystrings/m_string.h>
25
22
#include <algorithm>
27
#include "drizzled/sql_string.h"
34
// Converstion functions to and from std::string.
36
std::string String_to_std_string(String const& s)
38
return std::string(s.ptr(), s.length());
41
String* set_String_from_std_string(String* s, std::string const& cs)
43
s->set_ascii(cs.c_str(), cs.length());
25
The following extern declarations are ok as these are interface functions
26
required by the string function
29
extern unsigned char* sql_alloc(unsigned size);
30
extern void sql_element_free(void *ptr);
32
#include "sql_string.h"
48
34
/*****************************************************************************
49
35
** String functions
50
36
*****************************************************************************/
57
str_charset(&my_charset_bin)
61
String::String(size_t length_arg)
66
str_charset(&my_charset_bin)
68
(void) real_alloc(length_arg);
71
String::String(const char *str, const CHARSET_INFO * const cs)
72
: Ptr(const_cast<char *>(str)),
73
str_length(static_cast<size_t>(strlen(str))),
80
String::String(const char *str, size_t len, const CHARSET_INFO * const cs)
81
: Ptr(const_cast<char *>(str)),
89
String::String(char *str, size_t len, const CHARSET_INFO * const cs)
98
String::String(const String &str)
100
str_length(str.str_length),
101
Alloced_length(str.Alloced_length),
103
str_charset(str.str_charset)
107
void *String::operator new(size_t size, memory::Root *mem_root)
109
return mem_root->alloc_root(static_cast<size_t>(size));
112
String::~String() { free(); }
114
bool String::real_alloc(size_t arg_length)
38
bool String::real_alloc(uint32_t arg_length)
116
40
arg_length=ALIGN_SIZE(arg_length+1);
118
42
if (Alloced_length < arg_length)
120
if (Alloced_length > 0)
122
45
if (!(Ptr=(char*) malloc(arg_length)))
124
47
Alloced_length=arg_length;
169
92
bool String::set_int(int64_t num, bool unsigned_flag, const CHARSET_INFO * const cs)
171
size_t l=20*cs->mbmaxlen+1;
94
uint32_t l=20*cs->mbmaxlen+1;
172
95
int base= unsigned_flag ? 10 : -10;
176
str_length=(size_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
99
str_length=(uint32_t) (cs->cset->int64_t10_to_str)(cs,Ptr,l,base,num);
181
bool String::set_real(double num,size_t decimals, const CHARSET_INFO * const cs)
104
bool String::set_real(double num,uint32_t decimals, const CHARSET_INFO * const cs)
183
106
char buff[FLOATING_POINT_BUFFER];
107
uint32_t dummy_errors;
188
111
if (decimals >= NOT_FIXED_DEC)
190
len= internal::my_gcvt(num,
191
internal::MY_GCVT_ARG_DOUBLE,
192
sizeof(buff) - 1, buff, NULL);
113
len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
193
114
return copy(buff, len, &my_charset_utf8_general_ci, cs, &dummy_errors);
195
len= internal::my_fcvt(num, decimals, buff, NULL);
196
return copy(buff, (size_t) len, &my_charset_utf8_general_ci, cs,
116
len= my_fcvt(num, decimals, buff, NULL);
117
return copy(buff, (uint32_t) len, &my_charset_utf8_general_ci, cs,
589
525
****************************************************************************/
528
copy a string from one character set to another
533
to_cs Character set of result string
535
from_length Length of from string
536
from_cs From character set
539
'to' must be big enough as form_length * to_cs->mbmaxlen
542
length of bytes copied to 'to'
547
copy_and_convert_extended(char *to, uint32_t to_length,
548
const CHARSET_INFO * const to_cs,
549
const char *from, uint32_t from_length,
550
const CHARSET_INFO * const from_cs,
555
const unsigned char *from_end= (const unsigned char*) from+from_length;
557
unsigned char *to_end= (unsigned char*) to+to_length;
558
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
559
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
560
uint32_t error_count= 0;
564
if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from,
567
else if (cnvres == MY_CS_ILSEQ)
573
else if (cnvres > MY_CS_TOOSMALL)
576
A correct multibyte sequence detected
577
But it doesn't have Unicode mapping.
584
break; // Not enough characters
587
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
589
else if (cnvres == MY_CS_ILUNI && wc != '?')
598
*errors= error_count;
599
return (uint32_t) (to - to_start);
604
Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
607
copy_and_convert(char *to, uint32_t to_length, const CHARSET_INFO * const to_cs,
608
const char *from, uint32_t from_length,
609
const CHARSET_INFO * const from_cs, uint32_t *errors)
612
If any of the character sets is not ASCII compatible,
613
immediately switch to slow mb_wc->wc_mb method.
615
if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
616
return copy_and_convert_extended(to, to_length, to_cs,
617
from, from_length, from_cs, errors);
619
uint32_t length= cmin(to_length, from_length), length2= length;
621
#if defined(__i386__)
623
Special loop for i386, it allows to refer to a
624
non-aligned memory block as UINT32, which makes
625
it possible to copy four bytes at once. This
626
gives about 10% performance improvement comparing
627
to byte-by-byte loop.
629
for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
631
if ((*(uint32_t*)from) & 0x80808080)
633
*((uint32_t*) to)= *((const uint32_t*) from);
637
for (; ; *to++= *from++, length--)
644
if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
646
uint32_t copied_length= length2 - length;
647
to_length-= copied_length;
648
from_length-= copied_length;
649
return copied_length + copy_and_convert_extended(to, to_length,
658
return 0; // Make compiler happy
664
Copy string with HEX-encoding of "bad" characters.
666
@details This functions copies the string pointed by "src"
667
to the string pointed by "dst". Not more than "srclen" bytes
668
are read from "src". Any sequences of bytes representing
669
a not-well-formed substring (according to cs) are hex-encoded,
670
and all well-formed substrings (according to cs) are copied as is.
671
Not more than "dstlen" bytes are written to "dst". The number
672
of bytes written to "dst" is returned.
674
@param cs character set pointer of the destination string
675
@param[out] dst destination string
676
@param dstlen size of dst
677
@param src source string
678
@param srclen length of src
680
@retval result length
684
my_copy_with_hex_escaping(const CHARSET_INFO * const cs,
685
char *dst, size_t dstlen,
686
const char *src, size_t srclen)
688
const char *srcend= src + srclen;
691
for ( ; src < srcend ; )
694
if ((chlen= my_ismbchar(cs, src, srcend)))
697
break; /* purecov: inspected */
698
memcpy(dst, src, chlen);
703
else if (*src & 0x80)
706
break; /* purecov: inspected */
709
*dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
710
*dst++= _dig_vec_upper[((unsigned char) *src) & 15];
717
break; /* purecov: inspected */
593
727
with optional character set conversion,
594
728
with optional left padding (for binary -> UCS2 conversion)
619
753
well_formed_copy_nchars(const CHARSET_INFO * const to_cs,
620
char *to, size_t to_length,
754
char *to, uint32_t to_length,
621
755
const CHARSET_INFO * const from_cs,
622
const char *from, size_t from_length,
756
const char *from, uint32_t from_length,
624
758
const char **well_formed_error_pos,
625
759
const char **cannot_convert_error_pos,
626
760
const char **from_end_pos)
630
assert((to_cs == &my_charset_bin) ||
631
(from_cs == &my_charset_bin) ||
632
(to_cs == from_cs) ||
633
my_charset_same(from_cs, to_cs));
635
if (to_length < to_cs->mbminlen || !nchars)
764
if ((to_cs == &my_charset_bin) ||
765
(from_cs == &my_charset_bin) ||
766
(to_cs == from_cs) ||
767
my_charset_same(from_cs, to_cs))
769
if (to_length < to_cs->mbminlen || !nchars)
772
*cannot_convert_error_pos= NULL;
773
*well_formed_error_pos= NULL;
777
if (to_cs == &my_charset_bin)
779
res= cmin(cmin(nchars, to_length), from_length);
780
memmove(to, from, res);
781
*from_end_pos= from + res;
782
*well_formed_error_pos= NULL;
783
*cannot_convert_error_pos= NULL;
787
int well_formed_error;
788
uint32_t from_offset;
790
if ((from_offset= (from_length % to_cs->mbminlen)) &&
791
(from_cs == &my_charset_bin))
794
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
795
INSERT INTO t1 (ucs2_column) VALUES (0x01);
798
uint32_t pad_length= to_cs->mbminlen - from_offset;
799
memset(to, 0, pad_length);
800
memmove(to + pad_length, from, from_offset);
803
from_length-= from_offset;
804
to+= to_cs->mbminlen;
805
to_length-= to_cs->mbminlen;
808
set_if_smaller(from_length, to_length);
809
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
810
nchars, &well_formed_error);
811
memmove(to, from, res);
812
*from_end_pos= from + res;
813
*well_formed_error_pos= well_formed_error ? from + res : NULL;
814
*cannot_convert_error_pos= NULL;
816
res+= to_cs->mbminlen;
823
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
824
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
825
const unsigned char *from_end= (const unsigned char*) from + from_length;
826
unsigned char *to_end= (unsigned char*) to + to_length;
828
*well_formed_error_pos= NULL;
829
*cannot_convert_error_pos= NULL;
831
for ( ; nchars; nchars--)
833
const char *from_prev= from;
834
if ((cnvres= (*mb_wc)(from_cs, &wc, (unsigned char*) from, from_end)) > 0)
836
else if (cnvres == MY_CS_ILSEQ)
838
if (!*well_formed_error_pos)
839
*well_formed_error_pos= from;
843
else if (cnvres > MY_CS_TOOSMALL)
846
A correct multibyte sequence detected
847
But it doesn't have Unicode mapping.
849
if (!*cannot_convert_error_pos)
850
*cannot_convert_error_pos= from;
855
break; // Not enough characters
858
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
860
else if (cnvres == MY_CS_ILUNI && wc != '?')
862
if (!*cannot_convert_error_pos)
863
*cannot_convert_error_pos= from_prev;
637
873
*from_end_pos= from;
638
*cannot_convert_error_pos= NULL;
639
*well_formed_error_pos= NULL;
643
if (to_cs == &my_charset_bin)
645
res= min(min(nchars, to_length), from_length);
646
memmove(to, from, res);
647
*from_end_pos= from + res;
648
*well_formed_error_pos= NULL;
649
*cannot_convert_error_pos= NULL;
653
int well_formed_error;
656
if ((from_offset= (from_length % to_cs->mbminlen)) &&
657
(from_cs == &my_charset_bin))
660
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
661
INSERT INTO t1 (ucs2_column) VALUES (0x01);
664
size_t pad_length= to_cs->mbminlen - from_offset;
665
memset(to, 0, pad_length);
666
memmove(to + pad_length, from, from_offset);
669
from_length-= from_offset;
670
to+= to_cs->mbminlen;
671
to_length-= to_cs->mbminlen;
674
set_if_smaller(from_length, to_length);
675
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
676
nchars, &well_formed_error);
677
memmove(to, from, res);
678
*from_end_pos= from + res;
679
*well_formed_error_pos= well_formed_error ? from + res : NULL;
680
*cannot_convert_error_pos= NULL;
682
res+= to_cs->mbminlen;
876
return (uint32_t) res;
788
979
std::swap(str_charset, s.str_charset);
791
void String::q_append(const size_t n)
793
int8store(Ptr + str_length, n);
796
void String::q_append(double d)
798
float8store(Ptr + str_length, d);
801
void String::q_append(double *d)
803
float8store(Ptr + str_length, *d);
806
void String::q_append(const char *data, size_t data_len)
808
memcpy(Ptr + str_length, data, data_len);
809
str_length += data_len;
812
void String::write_at_position(int position, size_t value)
814
int8store(Ptr + position,value);
816
bool check_if_only_end_space(const CHARSET_INFO * const cs, char *str,
819
return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
822
} /* namespace drizzled */
824
bool operator==(const drizzled::String &s1, const drizzled::String &s2)
983
bool operator==(const String &s1, const String &s2)
826
985
return stringcmp(&s1,&s2) == 0;
829
bool operator!=(const drizzled::String &s1, const drizzled::String &s2)
988
bool operator!=(const String &s1, const String &s2)
831
990
return !(s1 == s2);