16
16
/* This file is originally from the mysql distribution. Coded by monty */
19
#include <mysys/my_sys.h>
20
#include <mystrings/m_string.h>
23
The following extern declarations are ok as these are interface functions
24
required by the string function
27
extern uchar* sql_alloc(unsigned size);
28
extern void sql_element_free(void *ptr);
30
#include "sql_string.h"
20
#include "drizzled/internal/my_sys.h"
21
#include "drizzled/internal/m_string.h"
22
#include "drizzled/charset.h"
23
#include "drizzled/global_charset_info.h"
27
#include "drizzled/sql_string.h"
34
// Converstion functions to and from std::string.
36
std::string String_to_std_string(String const& s)
38
return std::string(s.ptr(), s.length());
41
String* set_String_from_std_string(String* s, std::string const& cs)
43
s->set_ascii(cs.c_str(), cs.length());
32
48
/*****************************************************************************
33
49
** String functions
34
50
*****************************************************************************/
57
str_charset(&my_charset_bin)
61
String::String(uint32_t length_arg)
66
str_charset(&my_charset_bin)
68
(void) real_alloc(length_arg);
71
String::String(const char *str, const CHARSET_INFO * const cs)
72
: Ptr(const_cast<char *>(str)),
73
str_length(static_cast<uint32_t>(strlen(str))),
80
String::String(const char *str, uint32_t len, const CHARSET_INFO * const cs)
81
: Ptr(const_cast<char *>(str)),
89
String::String(char *str, uint32_t len, const CHARSET_INFO * const cs)
98
String::String(const String &str)
100
str_length(str.str_length),
101
Alloced_length(str.Alloced_length),
103
str_charset(str.str_charset)
107
void *String::operator new(size_t size, memory::Root *mem_root)
109
return alloc_root(mem_root, static_cast<uint32_t>(size));
112
String::~String() { free(); }
36
114
bool String::real_alloc(uint32_t arg_length)
38
116
arg_length=ALIGN_SIZE(arg_length+1);
102
bool String::set_real(double num,uint decimals, const CHARSET_INFO * const cs)
180
bool String::set_real(double num,uint32_t decimals, const CHARSET_INFO * const cs)
104
182
char buff[FLOATING_POINT_BUFFER];
183
uint32_t dummy_errors;
109
187
if (decimals >= NOT_FIXED_DEC)
111
len= my_gcvt(num, MY_GCVT_ARG_DOUBLE, sizeof(buff) - 1, buff, NULL);
112
return copy(buff, len, &my_charset_latin1, cs, &dummy_errors);
189
len= internal::my_gcvt(num,
190
internal::MY_GCVT_ARG_DOUBLE,
191
sizeof(buff) - 1, buff, NULL);
192
return copy(buff, len, &my_charset_utf8_general_ci, cs, &dummy_errors);
114
len= my_fcvt(num, decimals, buff, NULL);
115
return copy(buff, (uint32_t) len, &my_charset_latin1, cs,
194
len= internal::my_fcvt(num, decimals, buff, NULL);
195
return copy(buff, (uint32_t) len, &my_charset_utf8_general_ci, cs,
193
Copy a multi-byte character sets with adding leading zeros.
199
arg_length Length of string. This should NOT be dividable with
201
offset arg_length % cs->mb_minlength
202
cs Character set for 'str'
205
For real multi-byte, ascii incompatible charactser sets,
206
like UCS-2, add leading zeros if we have an incomplete character.
209
will automatically be converted into
217
bool String::copy_aligned(const char *str,uint32_t arg_length, uint32_t offset,
218
const CHARSET_INFO * const cs)
220
/* How many bytes are in incomplete character */
221
offset= cs->mbmaxlen - offset; /* How many zeros we should prepend */
222
assert(offset && offset != cs->mbmaxlen);
224
uint32_t aligned_length= arg_length + offset;
225
if (alloc(aligned_length))
229
Note, this is only safe for big-endian UCS-2.
230
If we add little-endian UCS-2 sometimes, this code
231
will be more complicated. But it's OK for now.
233
memset(Ptr, 0, offset);
234
memcpy(Ptr + offset, str, arg_length);
235
Ptr[aligned_length]=0;
236
/* str_length is always >= 0 as arg_length is != 0 */
237
str_length= aligned_length;
243
274
bool String::set_or_copy_aligned(const char *str,uint32_t arg_length,
244
275
const CHARSET_INFO * const cs)
246
277
/* How many bytes are in incomplete character */
247
uint32_t offset= (arg_length % cs->mbminlen);
249
if (!offset) /* All characters are complete, just copy */
251
set(str, arg_length, cs);
254
return copy_aligned(str, arg_length, offset, cs);
278
uint32_t offset= (arg_length % cs->mbminlen);
280
assert(!offset); /* All characters are complete, just copy */
282
set(str, arg_length, cs);
257
286
/* Copy with charset conversion */
259
288
bool String::copy(const char *str, uint32_t arg_length,
260
const CHARSET_INFO * const from_cs,
261
const CHARSET_INFO * const to_cs, uint *errors)
289
const CHARSET_INFO * const,
290
const CHARSET_INFO * const to_cs, uint32_t *errors)
264
if (!needs_conversion(arg_length, from_cs, to_cs, &offset))
267
return copy(str, arg_length, to_cs);
269
if ((from_cs == &my_charset_bin) && offset)
272
return copy_aligned(str, arg_length, offset, to_cs);
274
uint32_t new_length= to_cs->mbmaxlen*arg_length;
275
if (alloc(new_length))
277
str_length=copy_and_convert((char*) Ptr, new_length, to_cs,
278
str, arg_length, from_cs, errors);
293
return copy(str, arg_length, to_cs);
285
298
Set a string to the value of a latin1-string, keeping the original charset
289
302
str String of a simple charset (latin1)
391
372
with character set recoding
394
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const cs)
396
uint32_t dummy_offset;
398
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
400
uint32_t add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
402
if (realloc(str_length + add_length))
404
str_length+= copy_and_convert(Ptr+str_length, add_length, str_charset,
405
s, arg_length, cs, &dummy_errors);
409
if (realloc(str_length + arg_length))
411
memcpy(Ptr + str_length, s, arg_length);
412
str_length+= arg_length;
418
bool String::append(IO_CACHE* file, uint32_t arg_length)
420
if (realloc(str_length+arg_length))
422
if (my_b_read(file, (uchar*) Ptr + str_length, arg_length))
427
str_length+=arg_length;
375
bool String::append(const char *s,uint32_t arg_length, const CHARSET_INFO * const)
377
if (realloc(str_length + arg_length))
379
memcpy(Ptr + str_length, s, arg_length);
380
str_length+= arg_length;
431
386
bool String::append_with_prefill(const char *s,uint32_t arg_length,
432
387
uint32_t full_length, char fill_char)
559
// added by Holyfoot for "geometry" needs
560
int String::reserve(uint32_t space_needed, uint32_t grow_by)
562
if (Alloced_length < str_length + space_needed)
564
if (realloc(Alloced_length + max(space_needed, grow_by) - 1))
570
void String::qs_append(const char *str, uint32_t len)
572
memcpy(Ptr + str_length, str, len + 1);
576
void String::qs_append(double d)
578
char *buff = Ptr + str_length;
579
str_length+= my_gcvt(d, MY_GCVT_ARG_DOUBLE, FLOATING_POINT_BUFFER - 1, buff, NULL);
582
void String::qs_append(double *d)
585
float8get(ld, (char*) d);
589
void String::qs_append(int i)
591
char *buff= Ptr + str_length;
592
char *end= int10_to_str(i, buff, -10);
593
str_length+= (int) (end-buff);
596
void String::qs_append(uint i)
598
char *buff= Ptr + str_length;
599
char *end= int10_to_str(i, buff, 10);
600
str_length+= (int) (end-buff);
604
517
Compare strings according to collation, without end space.
676
589
****************************************************************************/
679
copy a string from one character set to another
684
to_cs Character set of result string
686
from_length Length of from string
687
from_cs From character set
690
'to' must be big enough as form_length * to_cs->mbmaxlen
693
length of bytes copied to 'to'
698
copy_and_convert_extended(char *to, uint32_t to_length,
699
const CHARSET_INFO * const to_cs,
700
const char *from, uint32_t from_length,
701
const CHARSET_INFO * const from_cs,
706
const uchar *from_end= (const uchar*) from+from_length;
708
uchar *to_end= (uchar*) to+to_length;
709
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
710
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
715
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
718
else if (cnvres == MY_CS_ILSEQ)
724
else if (cnvres > MY_CS_TOOSMALL)
727
A correct multibyte sequence detected
728
But it doesn't have Unicode mapping.
735
break; // Not enough characters
738
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
740
else if (cnvres == MY_CS_ILUNI && wc != '?')
749
*errors= error_count;
750
return (uint32_t) (to - to_start);
755
Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
758
copy_and_convert(char *to, uint32_t to_length, const CHARSET_INFO * const to_cs,
759
const char *from, uint32_t from_length,
760
const CHARSET_INFO * const from_cs, uint *errors)
763
If any of the character sets is not ASCII compatible,
764
immediately switch to slow mb_wc->wc_mb method.
766
if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
767
return copy_and_convert_extended(to, to_length, to_cs,
768
from, from_length, from_cs, errors);
770
uint32_t length= min(to_length, from_length), length2= length;
772
#if defined(__i386__)
774
Special loop for i386, it allows to refer to a
775
non-aligned memory block as UINT32, which makes
776
it possible to copy four bytes at once. This
777
gives about 10% performance improvement comparing
778
to byte-by-byte loop.
780
for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
782
if ((*(uint32_t*)from) & 0x80808080)
784
*((uint32_t*) to)= *((const uint32_t*) from);
788
for (; ; *to++= *from++, length--)
795
if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
797
uint32_t copied_length= length2 - length;
798
to_length-= copied_length;
799
from_length-= copied_length;
800
return copied_length + copy_and_convert_extended(to, to_length,
808
assert(false); // Should never get to here
809
return 0; // Make compiler happy
814
Copy string with HEX-encoding of "bad" characters.
816
@details This functions copies the string pointed by "src"
817
to the string pointed by "dst". Not more than "srclen" bytes
818
are read from "src". Any sequences of bytes representing
819
a not-well-formed substring (according to cs) are hex-encoded,
820
and all well-formed substrings (according to cs) are copied as is.
821
Not more than "dstlen" bytes are written to "dst". The number
822
of bytes written to "dst" is returned.
824
@param cs character set pointer of the destination string
825
@param[out] dst destination string
826
@param dstlen size of dst
827
@param src source string
828
@param srclen length of src
830
@retval result length
834
my_copy_with_hex_escaping(const CHARSET_INFO * const cs,
835
char *dst, size_t dstlen,
836
const char *src, size_t srclen)
838
const char *srcend= src + srclen;
841
for ( ; src < srcend ; )
844
if ((chlen= my_ismbchar(cs, src, srcend)))
847
break; /* purecov: inspected */
848
memcpy(dst, src, chlen);
853
else if (*src & 0x80)
856
break; /* purecov: inspected */
859
*dst++= _dig_vec_upper[((unsigned char) *src) >> 4];
860
*dst++= _dig_vec_upper[((unsigned char) *src) & 15];
867
break; /* purecov: inspected */
877
593
with optional character set conversion,
878
594
with optional left padding (for binary -> UCS2 conversion)
881
597
well_formed_copy_nchars()
882
598
to Store result here
903
619
well_formed_copy_nchars(const CHARSET_INFO * const to_cs,
904
char *to, uint to_length,
620
char *to, uint32_t to_length,
905
621
const CHARSET_INFO * const from_cs,
906
const char *from, uint from_length,
622
const char *from, uint32_t from_length,
908
624
const char **well_formed_error_pos,
909
625
const char **cannot_convert_error_pos,
910
626
const char **from_end_pos)
914
if ((to_cs == &my_charset_bin) ||
915
(from_cs == &my_charset_bin) ||
916
(to_cs == from_cs) ||
917
my_charset_same(from_cs, to_cs))
919
if (to_length < to_cs->mbminlen || !nchars)
922
*cannot_convert_error_pos= NULL;
923
*well_formed_error_pos= NULL;
927
if (to_cs == &my_charset_bin)
929
res= min(min(nchars, to_length), from_length);
930
memmove(to, from, res);
931
*from_end_pos= from + res;
932
*well_formed_error_pos= NULL;
933
*cannot_convert_error_pos= NULL;
937
int well_formed_error;
940
if ((from_offset= (from_length % to_cs->mbminlen)) &&
941
(from_cs == &my_charset_bin))
944
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
945
INSERT INTO t1 (ucs2_column) VALUES (0x01);
948
uint pad_length= to_cs->mbminlen - from_offset;
949
memset(to, 0, pad_length);
950
memmove(to + pad_length, from, from_offset);
953
from_length-= from_offset;
954
to+= to_cs->mbminlen;
955
to_length-= to_cs->mbminlen;
958
set_if_smaller(from_length, to_length);
959
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
960
nchars, &well_formed_error);
961
memmove(to, from, res);
962
*from_end_pos= from + res;
963
*well_formed_error_pos= well_formed_error ? from + res : NULL;
964
*cannot_convert_error_pos= NULL;
966
res+= to_cs->mbminlen;
973
my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
974
my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
975
const uchar *from_end= (const uchar*) from + from_length;
976
uchar *to_end= (uchar*) to + to_length;
978
*well_formed_error_pos= NULL;
979
*cannot_convert_error_pos= NULL;
981
for ( ; nchars; nchars--)
983
const char *from_prev= from;
984
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
986
else if (cnvres == MY_CS_ILSEQ)
988
if (!*well_formed_error_pos)
989
*well_formed_error_pos= from;
993
else if (cnvres > MY_CS_TOOSMALL)
996
A correct multibyte sequence detected
997
But it doesn't have Unicode mapping.
999
if (!*cannot_convert_error_pos)
1000
*cannot_convert_error_pos= from;
1005
break; // Not enough characters
1008
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
1010
else if (cnvres == MY_CS_ILUNI && wc != '?')
1012
if (!*cannot_convert_error_pos)
1013
*cannot_convert_error_pos= from_prev;
630
assert((to_cs == &my_charset_bin) ||
631
(from_cs == &my_charset_bin) ||
632
(to_cs == from_cs) ||
633
my_charset_same(from_cs, to_cs));
635
if (to_length < to_cs->mbminlen || !nchars)
1023
637
*from_end_pos= from;
1026
return (uint32_t) res;
638
*cannot_convert_error_pos= NULL;
639
*well_formed_error_pos= NULL;
643
if (to_cs == &my_charset_bin)
645
res= min(min(nchars, to_length), from_length);
646
memmove(to, from, res);
647
*from_end_pos= from + res;
648
*well_formed_error_pos= NULL;
649
*cannot_convert_error_pos= NULL;
653
int well_formed_error;
654
uint32_t from_offset;
656
if ((from_offset= (from_length % to_cs->mbminlen)) &&
657
(from_cs == &my_charset_bin))
660
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
661
INSERT INTO t1 (ucs2_column) VALUES (0x01);
664
uint32_t pad_length= to_cs->mbminlen - from_offset;
665
memset(to, 0, pad_length);
666
memmove(to + pad_length, from, from_offset);
669
from_length-= from_offset;
670
to+= to_cs->mbminlen;
671
to_length-= to_cs->mbminlen;
674
set_if_smaller(from_length, to_length);
675
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
676
nchars, &well_formed_error);
677
memmove(to, from, res);
678
*from_end_pos= from + res;
679
*well_formed_error_pos= well_formed_error ? from + res : NULL;
680
*cannot_convert_error_pos= NULL;
682
res+= to_cs->mbminlen;
724
Quote the given identifier.
725
If the given identifier is empty, it will be quoted.
729
name the identifier to be appended
730
name_length length of the appending identifier
733
/* Factor the extern out */
734
extern const CHARSET_INFO *system_charset_info, *files_charset_info;
736
void String::append_identifier(const char *name, uint32_t in_length)
738
const char *name_end;
743
The identifier must be quoted as it includes a quote character or
747
reserve(in_length*2 + 2);
748
quote_char= (char) q;
749
append("e_char, 1, system_charset_info);
751
for (name_end= name+in_length ; name < name_end ; name+= in_length)
753
unsigned char chr= (unsigned char) *name;
754
in_length= my_mbcharlen(system_charset_info, chr);
756
my_mbcharlen can return 0 on a wrong multibyte
757
sequence. It is possible when upgrading from 4.0,
758
and identifier contains some accented characters.
759
The manual says it does not work. So we'll just
760
change length to 1 not to hang in the endless loop.
764
if (in_length == 1 && chr == (unsigned char) quote_char)
765
append("e_char, 1, system_charset_info);
766
append(name, in_length, system_charset_info);
768
append("e_char, 1, system_charset_info);
1066
773
Exchange state of this object and argument.
1075
782
void String::swap(String &s)
1077
swap_variables(char *, Ptr, s.Ptr);
1078
swap_variables(uint32_t, str_length, s.str_length);
1079
swap_variables(uint32_t, Alloced_length, s.Alloced_length);
1080
swap_variables(bool, alloced, s.alloced);
1081
swap_variables(const CHARSET_INFO *, str_charset, s.str_charset);
784
std::swap(Ptr, s.Ptr);
785
std::swap(str_length, s.str_length);
786
std::swap(Alloced_length, s.Alloced_length);
787
std::swap(alloced, s.alloced);
788
std::swap(str_charset, s.str_charset);
791
void String::q_append(const uint32_t n)
793
int4store(Ptr + str_length, n);
796
void String::q_append(double d)
798
float8store(Ptr + str_length, d);
801
void String::q_append(double *d)
803
float8store(Ptr + str_length, *d);
806
void String::q_append(const char *data, uint32_t data_len)
808
memcpy(Ptr + str_length, data, data_len);
809
str_length += data_len;
812
void String::write_at_position(int position, uint32_t value)
814
int4store(Ptr + position,value);
816
bool check_if_only_end_space(const CHARSET_INFO * const cs, char *str,
819
return str+ cs->cset->scan(cs, str, end, MY_SEQ_SPACES) == end;
822
} /* namespace drizzled */
824
bool operator==(const drizzled::String &s1, const drizzled::String &s2)
826
return stringcmp(&s1,&s2) == 0;
829
bool operator!=(const drizzled::String &s1, const drizzled::String &s2)