~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
#include <my_global.h>
17
#include "m_string.h"
18
#include "m_ctype.h"
19
20
static uchar ctype_latin1[] = {
21
    0,
22
   32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
23
   32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
24
   72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
25
  132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
26
   16,129,129,129,129,129,129,  1,  1,  1,  1,  1,  1,  1,  1,  1,
27
    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
28
   16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
29
    2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
30
   16,  0, 16,  2, 16, 16, 16, 16, 16, 16,  1, 16,  1,  0,  1,  0,
31
    0, 16, 16, 16, 16, 16, 16, 16, 16, 16,  2, 16,  2,  0,  2,  1,
32
   72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
33
   16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
34
    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
35
    1,  1,  1,  1,  1,  1,  1, 16,  1,  1,  1,  1,  1,  1,  1,  2,
36
    2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
37
    2,  2,  2,  2,  2,  2,  2, 16,  2,  2,  2,  2,  2,  2,  2,  2
38
};
39
40
static uchar to_lower_latin1[] = {
41
    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
42
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
43
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
44
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
45
   64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
46
  112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
47
   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
48
  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
49
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
50
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
51
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
52
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
53
  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
54
  240,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223,
55
  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
56
  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
57
};
58
59
static uchar to_upper_latin1[] = {
60
    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
61
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
62
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
63
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64
   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
65
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
66
   96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
67
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
68
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
69
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
70
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
71
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
72
  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
73
  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
74
  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
75
  208,209,210,211,212,213,214,247,216,217,218,219,220,221,222,255
76
};
77
78
static uchar sort_order_latin1[] = {
79
    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
80
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
81
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
82
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
83
   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
84
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
85
   96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
86
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
87
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
88
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
89
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
90
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
91
   65, 65, 65, 65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
92
   68, 78, 79, 79, 79, 79, 93,215,216, 85, 85, 85, 89, 89,222,223,
93
   65, 65, 65, 65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
94
   68, 78, 79, 79, 79, 79, 93,247,216, 85, 85, 85, 89, 89,222,255
95
};
96
97
/*
98
  WL#1494 notes:
99
  
100
  We'll use cp1252 instead of iso-8859-1.
101
  cp1252 contains printable characters in the range 0x80-0x9F.
102
  In ISO 8859-1, these code points have no associated printable
103
  characters. Therefore, by converting from CP1252 to ISO 8859-1,
104
  one would lose the euro (for instance). Since most people are
105
  unaware of the difference, and since we don't really want a
106
  "Windows ANSI" to differ from a "Unix ANSI", we will:
107
   
108
   - continue to pretend the latin1 character set is ISO 8859-1
109
   - actually allow the storage of euro etc. so it's actually cp1252
110
111
  Also we'll map these five undefined cp1252 character:
112
    0x81, 0x8D, 0x8F, 0x90, 0x9D
113
  into corresponding control characters:
114
     U+0081, U+008D, U+008F, U+0090, U+009D.
115
  like ISO-8859-1 does. Otherwise, loading "mysqldump"
116
  output doesn't reproduce these undefined characters.
117
*/
118
119
unsigned short cs_to_uni[256]={
120
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
121
0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
122
0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
123
0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
124
0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
125
0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
126
0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
127
0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
128
0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
129
0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
130
0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
131
0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
132
0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
133
0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
134
0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
135
0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
136
0x20AC,0x0081,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021,
137
0x02C6,0x2030,0x0160,0x2039,0x0152,0x008D,0x017D,0x008F,
138
0x0090,0x2018,0x2019,0x201C,0x201D,0x2022,0x2013,0x2014,
139
0x02DC,0x2122,0x0161,0x203A,0x0153,0x009D,0x017E,0x0178,
140
0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,
141
0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF,
142
0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,
143
0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF,
144
0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
145
0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
146
0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,
147
0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,
148
0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
149
0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
150
0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,
151
0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF
152
};
153
uchar pl00[256]={
154
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
155
0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
156
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
157
0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
158
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
159
0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
160
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
161
0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
162
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
163
0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
164
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
165
0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
166
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
167
0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
168
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
169
0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
170
0x00,0x81,0x00,0x00,0x00,0x00,0x00,0x00,
171
0x00,0x00,0x00,0x00,0x00,0x8D,0x00,0x8F,
172
0x90,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
173
0x00,0x00,0x00,0x00,0x00,0x9D,0x00,0x00,
174
0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
175
0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
176
0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
177
0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
178
0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
179
0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
180
0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
181
0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
182
0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
183
0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
184
0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
185
0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
186
};
187
uchar pl01[256]={
188
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
189
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
190
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
191
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
192
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
193
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
194
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
195
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
196
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
197
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
198
0x00,0x00,0x8C,0x9C,0x00,0x00,0x00,0x00,
199
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
200
0x8A,0x9A,0x00,0x00,0x00,0x00,0x00,0x00,
201
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
202
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
203
0x9F,0x00,0x00,0x00,0x00,0x8E,0x9E,0x00,
204
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
205
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
206
0x00,0x00,0x83,0x00,0x00,0x00,0x00,0x00,
207
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
208
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
209
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
210
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
211
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
212
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
213
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
214
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
215
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
216
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
217
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
218
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
219
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
220
};
221
uchar pl02[256]={
222
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
223
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
224
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
225
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
226
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
227
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
228
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
229
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
230
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
231
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
232
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
233
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
234
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
235
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
236
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
237
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
238
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
239
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
240
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
241
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
242
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
243
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
244
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
245
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
246
0x00,0x00,0x00,0x00,0x00,0x00,0x88,0x00,
247
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
248
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
249
0x00,0x00,0x00,0x00,0x98,0x00,0x00,0x00,
250
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
251
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
252
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
253
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
254
};
255
uchar pl20[256]={
256
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
257
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
258
0x00,0x00,0x00,0x96,0x97,0x00,0x00,0x00,
259
0x91,0x92,0x82,0x00,0x93,0x94,0x84,0x00,
260
0x86,0x87,0x95,0x00,0x00,0x00,0x85,0x00,
261
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
262
0x89,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
263
0x00,0x8B,0x9B,0x00,0x00,0x00,0x00,0x00,
264
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
265
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
266
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
267
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
268
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
269
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
270
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
271
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
272
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
273
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
274
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
275
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
276
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
277
0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,
278
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
279
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
280
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
281
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
282
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
283
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
287
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
288
};
289
uchar pl21[256]={
290
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
292
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
293
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294
0x00,0x00,0x99,0x00,0x00,0x00,0x00,0x00,
295
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
297
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
299
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
302
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
304
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
307
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
308
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
309
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
312
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
313
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
314
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
317
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
318
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
319
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
322
};
323
uchar *uni_to_cs[256]={
324
pl00,pl01,pl02,NULL,NULL,NULL,NULL,NULL,
325
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
326
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
327
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
328
pl20,pl21,NULL,NULL,NULL,NULL,NULL,NULL,
329
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
330
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
331
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
332
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
333
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
334
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
335
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
336
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
337
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
338
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
339
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
340
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
341
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
342
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
343
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
344
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
345
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
346
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
347
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
348
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
349
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
350
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
351
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
352
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
353
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
354
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
355
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL
356
};
357
358
static
359
int my_mb_wc_latin1(CHARSET_INFO *cs  __attribute__((unused)),
360
		    my_wc_t *wc,
361
		    const uchar *str,
362
		    const uchar *end __attribute__((unused)))
363
{
364
  if (str >= end)
365
    return MY_CS_TOOSMALL;
366
  
367
  *wc=cs_to_uni[*str];
368
  return (!wc[0] && str[0]) ? -1 : 1;
369
}
370
371
static
372
int my_wc_mb_latin1(CHARSET_INFO *cs  __attribute__((unused)),
373
		    my_wc_t wc,
374
		    uchar *str,
375
		    uchar *end __attribute__((unused)))
376
{
377
  uchar *pl;
378
  
379
  if (str >= end)
380
    return MY_CS_TOOSMALL;
381
  
382
  pl= uni_to_cs[(wc>>8) & 0xFF];
383
  str[0]= pl ? pl[wc & 0xFF] : '\0';
384
  return (!str[0] && wc) ? MY_CS_ILUNI : 1;
385
}
386
387
static MY_CHARSET_HANDLER my_charset_handler=
388
{
389
    NULL,			/* init */
390
    NULL,
391
    my_mbcharlen_8bit,
392
    my_numchars_8bit,
393
    my_charpos_8bit,
394
    my_well_formed_len_8bit,
395
    my_lengthsp_8bit,
396
    my_numcells_8bit,
397
    my_mb_wc_latin1,
398
    my_wc_mb_latin1,
399
    my_mb_ctype_8bit,
400
    my_caseup_str_8bit,
401
    my_casedn_str_8bit,
402
    my_caseup_8bit,
403
    my_casedn_8bit,
404
    my_snprintf_8bit,
405
    my_long10_to_str_8bit,
406
    my_longlong10_to_str_8bit,
407
    my_fill_8bit,
408
    my_strntol_8bit,
409
    my_strntoul_8bit,
410
    my_strntoll_8bit,
411
    my_strntoull_8bit,
412
    my_strntod_8bit,
413
    my_strtoll10_8bit,
414
    my_strntoull10rnd_8bit,
415
    my_scan_8bit
416
};
417
418
419
CHARSET_INFO my_charset_latin1=
420
{
421
    8,0,0,				/* number    */
422
    MY_CS_COMPILED | MY_CS_PRIMARY,	/* state     */
423
    "latin1",				/* cs name    */
424
    "latin1_swedish_ci",		/* name      */
425
    "",					/* comment   */
426
    NULL,				/* tailoring */
427
    ctype_latin1,
428
    to_lower_latin1,
429
    to_upper_latin1,
430
    sort_order_latin1,
431
    NULL,		/* contractions */
432
    NULL,		/* sort_order_big*/
433
    cs_to_uni,		/* tab_to_uni   */
434
    NULL,		/* tab_from_uni */
435
    my_unicase_default, /* caseinfo     */
436
    NULL,		/* state_map    */
437
    NULL,		/* ident_map    */
438
    1,			/* strxfrm_multiply */
439
    1,                  /* caseup_multiply  */
440
    1,                  /* casedn_multiply  */
441
    1,			/* mbminlen   */
442
    1,			/* mbmaxlen  */
443
    0,			/* min_sort_char */
444
    255,		/* max_sort_char */
445
    ' ',                /* pad char      */
446
    0,                  /* escape_with_backslash_is_dangerous */
447
    1,                  /* levels_for_compare */
448
    1,                  /* levels_for_order   */
449
    &my_charset_handler,
450
    &my_collation_8bit_simple_ci_handler
451
};
452
453
454
455
456
/*
457
 * This file is the latin1 character set with German sorting
458
 *
459
 * The modern sort order is used, where:
460
 *
461
 * 'ä'  ->  "ae"
462
 * 'ö'  ->  "oe"
463
 * 'ü'  ->  "ue"
464
 * 'ß'  ->  "ss"
465
 */
466
467
468
/*
469
 * This is a simple latin1 mapping table, which maps all accented
470
 * characters to their non-accented equivalents.  Note: in this
471
 * table, 'ä' is mapped to 'A', 'ÿ' is mapped to 'Y', etc. - all
472
 * accented characters except the following are treated the same way.
473
 * Ü, ü, Ö, ö, Ä, ä
474
 */
475
476
static uchar sort_order_latin1_de[] = {
477
    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
478
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
479
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
480
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
481
   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
482
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
483
   96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
484
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
485
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
486
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
487
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
488
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
489
   65, 65, 65, 65,196, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
490
   68, 78, 79, 79, 79, 79,214,215,216, 85, 85, 85,220, 89,222,223,
491
   65, 65, 65, 65,196, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
492
   68, 78, 79, 79, 79, 79,214,247,216, 85, 85, 85,220, 89,222, 89
493
};
494
495
496
/*
497
  same as sort_order_latin_de, but maps ALL accented chars to unaccented ones
498
*/
499
500
uchar combo1map[]={
501
    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
502
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
503
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
504
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
505
   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
506
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
507
   96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
508
   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
509
  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
510
  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
511
  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
512
  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
513
   65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
514
   68, 78, 79, 79, 79, 79, 79,215,216, 85, 85, 85, 85, 89,222, 83,
515
   65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
516
   68, 78, 79, 79, 79, 79, 79,247,216, 85, 85, 85, 85, 89,222, 89
517
};
518
519
uchar combo2map[]={
520
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
521
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
524
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
525
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528
  0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0,83, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,
529
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0, 0, 0
530
};
531
532
533
/*
534
  Some notes about the following comparison rules:
535
  By definition, my_strnncoll_latin_de must works exactly as if had called
536
  my_strnxfrm_latin_de() on both strings and compared the result strings.
537
538
  This means that:
539
  Ä must also matches ÁE and Aè, because my_strxn_frm_latin_de() will convert
540
  both to AE.
541
542
  The other option would be to not do any accent removal in
543
  sort_order_latin_de[] at all
544
*/
545
546
547
static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
548
				  const uchar *a, size_t a_length,
549
				  const uchar *b, size_t b_length,
550
                                  my_bool b_is_prefix)
551
{
552
  const uchar *a_end= a + a_length;
553
  const uchar *b_end= b + b_length;
554
  uchar a_char, a_extend= 0, b_char, b_extend= 0;
555
556
  while ((a < a_end || a_extend) && (b < b_end || b_extend))
557
  {
558
    if (a_extend)
559
    {
560
      a_char=a_extend; a_extend=0;
561
    }
562
    else
563
    {
564
      a_extend=combo2map[*a];
565
      a_char=combo1map[*a++];
566
    }
567
    if (b_extend)
568
    {
569
      b_char=b_extend; b_extend=0;
570
    }
571
    else
572
    {
573
      b_extend=combo2map[*b];
574
      b_char=combo1map[*b++];
575
    }
576
    if (a_char != b_char)
577
      return (int) a_char - (int) b_char;
578
  }
579
  /*
580
    A simple test of string lengths won't work -- we test to see
581
    which string ran out first
582
  */
583
  return ((a < a_end || a_extend) ? (b_is_prefix ? 0 : 1) :
584
	  (b < b_end || b_extend) ? -1 : 0);
585
}
586
587
588
static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
589
				    const uchar *a, size_t a_length,
590
				    const uchar *b, size_t b_length,
591
                                    my_bool diff_if_only_endspace_difference)
592
{
593
  const uchar *a_end= a + a_length, *b_end= b + b_length;
594
  uchar a_char, a_extend= 0, b_char, b_extend= 0;
595
  int res;
596
597
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
598
  diff_if_only_endspace_difference= 0;
599
#endif
600
601
  while ((a < a_end || a_extend) && (b < b_end || b_extend))
602
  {
603
    if (a_extend)
604
    {
605
      a_char=a_extend;
606
      a_extend= 0;
607
    }
608
    else
609
    {
610
      a_extend= combo2map[*a];
611
      a_char=   combo1map[*a++];
612
    }
613
    if (b_extend)
614
    {
615
      b_char= b_extend;
616
      b_extend= 0;
617
    }
618
    else
619
    {
620
      b_extend= combo2map[*b];
621
      b_char=   combo1map[*b++];
622
    }
623
    if (a_char != b_char)
624
      return (int) a_char - (int) b_char;
625
  }
626
  /* Check if double character last */
627
  if (a_extend)
628
    return 1;
629
  if (b_extend)
630
    return -1;
631
632
  res= 0;
633
  if (a != a_end || b != b_end)
634
  {
635
    int swap= 1;
636
    if (diff_if_only_endspace_difference)
637
      res= 1;                                   /* Assume 'a' is bigger */
638
    /*
639
      Check the next not space character of the longer key. If it's < ' ',
640
      then it's smaller than the other key.
641
    */
642
    if (a == a_end)
643
    {
644
      /* put shorter key in a */
645
      a_end= b_end;
646
      a= b;
647
      swap= -1;					/* swap sign of result */
648
      res= -res;
649
    }
650
    for ( ; a < a_end ; a++)
651
    {
652
      if (*a != ' ')
653
	return (*a < ' ') ? -swap : swap;
654
    }
655
  }
656
  return res;
657
}
658
659
660
static size_t
661
my_strnxfrm_latin1_de(CHARSET_INFO *cs,
662
                      uchar *dst, size_t dstlen, uint nweights,
663
                      const uchar* src, size_t srclen, uint flags)
664
{
665
  uchar *de= dst + dstlen;
666
  const uchar *se= src + srclen;
667
  uchar *d0= dst;
668
  for (; src < se && dst < de && nweights; src++, nweights--)
669
  {
670
    uchar chr= combo1map[*src];
671
    *dst++= chr;
672
    if ((chr= combo2map[*src]) && dst < de && nweights > 1)
673
    {
674
      *dst++= chr;
675
      nweights--;
676
    }
677
  }
678
  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
679
}
680
681
53.2.13 by Monty Taylor
Various static declares.
682
static void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
1 by brian
clean slate
683
			    const uchar *key, size_t len,
684
			    ulong *nr1, ulong *nr2)
685
{
686
  const uchar *end;
687
  /*
688
    Remove end space. We have to do this to be able to compare
689
    'AE' and 'Ä' as identical
690
  */
691
  end= skip_trailing_space(key, len);
692
693
  for (; key < end ; key++)
694
  {
695
    uint X= (uint) combo1map[(uint) *key];
696
    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
697
    nr2[0]+=3;
698
    if ((X= combo2map[*key]))
699
    {
700
      nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
701
      nr2[0]+=3;
702
    }
703
  }
704
}
705
706
707
static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
708
{
709
  NULL,			/* init */
710
  my_strnncoll_latin1_de,
711
  my_strnncollsp_latin1_de,
712
  my_strnxfrm_latin1_de,
713
  my_strnxfrmlen_simple,
714
  my_like_range_simple,
715
  my_wildcmp_8bit,
716
  my_strcasecmp_8bit,
717
  my_instr_simple,
718
  my_hash_sort_latin1_de,
719
  my_propagate_complex
720
};
721
722
723
CHARSET_INFO my_charset_latin1_german2_ci=
724
{
725
  31,0,0,				/* number    */
726
  MY_CS_COMPILED|MY_CS_STRNXFRM,	/* state     */
727
  "latin1",				/* cs name    */
728
  "latin1_german2_ci",			/* name      */
729
  "",					/* comment   */
730
  NULL,					/* tailoring */
731
  ctype_latin1,
732
  to_lower_latin1,
733
  to_upper_latin1,
734
  sort_order_latin1_de,
735
  NULL,					/* contractions */
736
  NULL,					/* sort_order_big*/
737
  cs_to_uni,				/* tab_to_uni   */
738
  NULL,					/* tab_from_uni */
739
  my_unicase_default,                   /* caseinfo     */
740
  NULL,					/* state_map    */
741
  NULL,					/* ident_map    */
742
  2,					/* strxfrm_multiply */
743
  1,                                    /* caseup_multiply  */
744
  1,                                    /* casedn_multiply  */
745
  1,					/* mbminlen   */
746
  1,					/* mbmaxlen  */
747
  0,					/* min_sort_char */
748
  247,					/* max_sort_char */
749
  ' ',                                  /* pad char      */
750
  0,                                    /* escape_with_backslash_is_dangerous */
751
  1,                                    /* levels_for_compare */
752
  1,                                    /* levels_for_order   */
753
  &my_charset_handler,
754
  &my_collation_german2_ci_handler
755
};
756
757
758
CHARSET_INFO my_charset_latin1_bin=
759
{
760
  47,0,0,				/* number    */
761
  MY_CS_COMPILED|MY_CS_BINSORT,		/* state     */
762
  "latin1",				/* cs name    */
763
  "latin1_bin",				/* name      */
764
  "",					/* comment   */
765
  NULL,					/* tailoring */
766
  ctype_latin1,
767
  to_lower_latin1,
768
  to_upper_latin1,
769
  NULL,					/* sort_order   */
770
  NULL,					/* contractions */
771
  NULL,					/* sort_order_big*/
772
  cs_to_uni,				/* tab_to_uni   */
773
  NULL,					/* tab_from_uni */
774
  my_unicase_default,                   /* caseinfo     */
775
  NULL,					/* state_map    */
776
  NULL,					/* ident_map    */
777
  1,					/* strxfrm_multiply */
778
  1,                                    /* caseup_multiply  */
779
  1,                                    /* casedn_multiply  */
780
  1,					/* mbminlen   */
781
  1,					/* mbmaxlen  */
782
  0,					/* min_sort_char */
783
  255,					/* max_sort_char */
784
  ' ',                                  /* pad char      */
785
  0,                                    /* escape_with_backslash_is_dangerous */
786
  1,                                    /* levels_for_compare */
787
  1,                                    /* levels_for_order   */
788
  &my_charset_handler,
789
  &my_collation_8bit_bin_handler
790
};
791