779.3.48
by Monty Taylor
Split out U8_ macros into their own header and added the IBM copyright notice to them. |
1 |
/* - mode: c; c-basic-offset: 2; indent-tabs-mode: nil; -*-
|
2 |
vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
|
|
3 |
||
1567.3.23
by Monty Taylor
Cleaned up comments. |
4 |
Copyright (C) 2010 Monty Taylor
|
5 |
||
6 |
This program is free software; you can redistribute it and/or modify
|
|
7 |
it under the terms of the GNU General Public License as published by
|
|
8 |
the Free Software Foundation; version 2 of the License.
|
|
9 |
|
|
10 |
This program is distributed in the hope that it will be useful,
|
|
11 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 |
GNU General Public License for more details.
|
|
14 |
||
15 |
You should have received a copy of the GNU General Public License
|
|
16 |
along with this program; if not, write to the Free Software
|
|
17 |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
18 |
||
1999.6.1
by kalebral at gmail
update Copyright strings to a more common format to help with creating the master debian copyright file |
19 |
Copyright (C) 1995-2006 International Business Machines Corporation and others
|
1567.3.23
by Monty Taylor
Cleaned up comments. |
20 |
|
21 |
All rights reserved.
|
|
22 |
||
23 |
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
24 |
of this software and associated documentation files (the "Software"),
|
|
25 |
to deal in the Software without restriction, including without limitation
|
|
26 |
the rights to use, copy, modify, merge, publish, distribute, and/or sell
|
|
27 |
copies of the Software, and to permit persons
|
|
28 |
to whom the Software is furnished to do so, provided that the above
|
|
29 |
copyright notice(s) and this permission notice appear in all copies
|
|
30 |
of the Software and that both the above copyright notice(s) and this
|
|
31 |
permission notice appear in supporting documentation.
|
|
32 |
||
33 |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
34 |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
35 |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
|
36 |
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
|
|
37 |
LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR
|
|
38 |
ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
|
39 |
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
40 |
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
41 |
||
42 |
Except as contained in this notice, the name of a copyright holder shall not
|
|
43 |
be used in advertising or otherwise to promote the sale, use or other dealings
|
|
44 |
in this Software without prior written authorization of the copyright holder.
|
|
779.3.48
by Monty Taylor
Split out U8_ macros into their own header and added the IBM copyright notice to them. |
45 |
*/
|
46 |
||
2234
by Brian Aker
Mass removal of ifdef/endif in favor of pragma once. |
47 |
#pragma once
|
1567.3.20
by Monty Taylor
Removed UTF-8 lib that we don't use. |
48 |
|
1567.3.21
by Monty Taylor
Moved function from drizzle client to the utf8 header where it belongs. |
49 |
#include <string> |
50 |
||
2173.2.1
by Monty Taylor
Fixes incorrect usage of include |
51 |
#include <drizzled/utf8/checked.h> |
52 |
#include <drizzled/utf8/unchecked.h> |
|
1823.4.2
by Monty Taylor
Cleaned things up a little bit namespace wise |
53 |
|
1567.3.20
by Monty Taylor
Removed UTF-8 lib that we don't use. |
54 |
namespace drizzled |
55 |
{
|
|
56 |
namespace utf8 |
|
57 |
{
|
|
779.3.48
by Monty Taylor
Split out U8_ macros into their own header and added the IBM copyright notice to them. |
58 |
|
59 |
/**
|
|
1567.3.23
by Monty Taylor
Cleaned up comments. |
60 |
* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
|
61 |
*/
|
|
62 |
static const int MAX_LENGTH= 4; |
|
63 |
||
64 |
/**
|
|
779.3.48
by Monty Taylor
Split out U8_ macros into their own header and added the IBM copyright notice to them. |
65 |
* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
|
66 |
* @param c 8-bit code unit (byte)
|
|
67 |
* @return TRUE or FALSE
|
|
68 |
*/
|
|
1567.3.20
by Monty Taylor
Removed UTF-8 lib that we don't use. |
69 |
template <class T> |
70 |
bool is_single(T c) |
|
71 |
{
|
|
72 |
return (static_cast<uint8_t>(c) & 0x80) == 0; |
|
73 |
}
|
|
779.3.48
by Monty Taylor
Split out U8_ macros into their own header and added the IBM copyright notice to them. |
74 |
|
75 |
/**
|
|
76 |
* How many code units (bytes) are used for the UTF-8 encoding
|
|
77 |
* of this Unicode code point?
|
|
78 |
* @param c 32-bit code point
|
|
79 |
* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
|
|
80 |
*/
|
|
1567.3.25
by Monty Taylor
Merged with build. |
81 |
static inline int codepoint_length(uint32_t c) |
1567.3.20
by Monty Taylor
Removed UTF-8 lib that we don't use. |
82 |
{
|
1567.3.25
by Monty Taylor
Merged with build. |
83 |
return (c <= 0x7f ? 1 : |
84 |
(c <= 0x7ff ? 2 : |
|
85 |
(c <= 0xd7ff ? 3 : |
|
86 |
(c <= 0xdfff || c>0x10ffff ? 0 : |
|
87 |
(c <= 0xffff ? 3 : 4))))); |
|
1567.3.20
by Monty Taylor
Removed UTF-8 lib that we don't use. |
88 |
}
|
779.3.48
by Monty Taylor
Split out U8_ macros into their own header and added the IBM copyright notice to them. |
89 |
|
1567.3.23
by Monty Taylor
Cleaned up comments. |
90 |
/**
|
91 |
* How many bytes are used for the UTF-8 encoding of the codepoint of which
|
|
92 |
* this is the first byte?
|
|
93 |
* @param c 8-bit leading byte
|
|
94 |
* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
|
|
95 |
*/
|
|
1567.3.20
by Monty Taylor
Removed UTF-8 lib that we don't use. |
96 |
template <class T> |
97 |
int sequence_length(T c) |
|
98 |
{
|
|
99 |
return (static_cast<uint8_t>(c) < 0x80 ? 1 : |
|
100 |
((static_cast<uint8_t>(c) >> 5) == 0x6 ? 2 : |
|
101 |
((static_cast<uint8_t>(c) >> 4) == 0xe ? 3 : |
|
102 |
((static_cast<uint8_t>(c) >> 3) == 0x1e ? 4 : 0)))); |
|
103 |
}
|
|
1567.3.18
by Monty Taylor
Fixed utf-8 space handling in client output. |
104 |
|
1567.3.21
by Monty Taylor
Moved function from drizzle client to the utf8 header where it belongs. |
105 |
|
1567.3.23
by Monty Taylor
Cleaned up comments. |
106 |
/**
|
107 |
* How many logical characters does the given UTF-8 string occupy? Useful when
|
|
108 |
* needing to calculate char length rather than byte length of a string
|
|
109 |
* @param in_string string to measure
|
|
110 |
* @return length in characters of given string
|
|
111 |
*/
|
|
1567.3.21
by Monty Taylor
Moved function from drizzle client to the utf8 header where it belongs. |
112 |
static inline uint32_t char_length(const std::string &in_string) |
113 |
{
|
|
114 |
uint32_t length= 0; |
|
1839.1.1
by Andrew Hutchings
Output of unhex('ff') caused an endless loop due to 0-byte length accounting. |
115 |
int seq_length= 0; |
1567.3.21
by Monty Taylor
Moved function from drizzle client to the utf8 header where it belongs. |
116 |
std::string::const_iterator iter= in_string.begin(); |
117 |
while (iter < in_string.end()) |
|
118 |
{
|
|
119 |
length++; |
|
1839.1.1
by Andrew Hutchings
Output of unhex('ff') caused an endless loop due to 0-byte length accounting. |
120 |
seq_length= sequence_length(*iter); |
121 |
iter += (seq_length > 0) ? seq_length : 1; |
|
1567.3.21
by Monty Taylor
Moved function from drizzle client to the utf8 header where it belongs. |
122 |
}
|
123 |
return length; |
|
124 |
}
|
|
125 |
||
1567.3.23
by Monty Taylor
Cleaned up comments. |
126 |
/**
|
127 |
* How many logical characters does the given UTF-8 string occupy? Useful when
|
|
128 |
* needing to calculate char length rather than byte length of a string
|
|
129 |
* @param in_string string to measure
|
|
130 |
* @return length in characters of given string
|
|
131 |
*/
|
|
1567.3.21
by Monty Taylor
Moved function from drizzle client to the utf8 header where it belongs. |
132 |
static inline uint32_t char_length(const char *in_string) |
133 |
{
|
|
134 |
const std::string process_string(in_string); |
|
135 |
return char_length(process_string); |
|
136 |
}
|
|
137 |
||
138 |
||
1567.3.20
by Monty Taylor
Removed UTF-8 lib that we don't use. |
139 |
} /* namespace utf8 */ |
140 |
} /* namespace drizzled */ |
|
141 |