1
by brian
clean slate |
1 |
#!/bin/sh |
2 |
#<pre> |
|
3 |
( |
|
4 |
echo "DROP TABLE lang;" |
|
5 |
echo "CREATE TABLE lang (lang varchar(128), letters text character set utf8);" |
|
6 |
( |
|
7 |
grep -v "^#" << END |
|
8 |
#
|
|
9 |
Greenlandic ÁÂÃÊÍÎÔÚÛáâãêíîôúûĨĩĸŨũ |
|
10 |
#Use of these letters was abolished in a spelling reform in 1973: |
|
11 |
#Greenlandic ÅÆØåæø |
|
12 |
#Characters not found in the UCS: |
|
13 |
# K LATIN CAPITAL LETTER KRA |
|
14 |
#############################################################
|
|
15 |
#Basque ÑÜñüŔŕ |
|
16 |
#Characters not found in the UCS: |
|
17 |
# D LATIN CAPITAL LETTER D WITH MACRON |
|
18 |
# d LATIN SMALL LETTER D WITH MACRON |
|
19 |
# L LATIN CAPITAL LETTER L WITH MACRON |
|
20 |
# l LATIN SMALL LETTER L WITH MACRON |
|
21 |
# T LATIN CAPITAL LETTER T WITH MACRON |
|
22 |
# t LATIN SMALL LETTER T WITH MACRON |
|
23 |
#############################################################
|
|
24 |
#Maltese #ÀÁÂÈÉÊÌÍÎÒÓÔÙÚÛ#àáâèéêìíîòÓôùúû#ĊċĠġĦħŻżʼ |
|
25 |
#BosnianCyr ЂЈЉЊЋЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшђјљњћџ |
|
26 |
#Scots A |
|
27 |
#Scots1 ƷȜȝʒ |
|
28 |
###########################################
|
|
29 |
#### Hiragana 3040-309F |
|
30 |
Hiragana ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをん |
|
31 |
Hiragana1 ゔ゙゚ |
|
32 |
Hiragana2 ゛゜ゝゞ |
|
33 |
#### Katakana 30A0-30FF |
|
34 |
Katakana ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ |
|
35 |
Katakana1 ヷヸヹヺ |
|
36 |
Katakana2 ・ーヽヾ |
|
37 |
############################################
|
|
38 |
Albanian ÂÇËâçë |
|
39 |
Bosnian ĆćČčĐ𩹮ž |
|
40 |
Breton ÂÊÑÙÜâêñùü |
|
41 |
Catalan ÀÇÈÉÍÏÒÓÚÜàçèéíïòóúü |
|
42 |
#Catalan1 ·Ŀŀ |
|
43 |
Croatian ĆćČčĐ𩹮ž |
|
44 |
CroatianLig DZDzdzDŽDždžLJLjljNJNjnj |
|
45 |
Czech ÁÉÍÓÚÝáéíóúýČčĎďĚěŇňŘřŠšŤťŮůŽž |
|
46 |
Danish ÁÄÅÆÉÓÖØÜáäåæéóöøü |
|
47 |
Dutch ÀÂÄÆÇÈÉÊËÎÏÑÒÓÔÖÙÚÛÜàâäæçèéêëîïñòóôöùúûü |
|
48 |
Esperanto ĈĉĜĝĤĥĴĵŜŝŬŭ |
|
49 |
Estonian ÄÕÖÜäõöüŠšŽž |
|
50 |
Faroese ÅÆÐÓÖØÚÝåæðóöøúý |
|
51 |
Finnish ÄÅÖÜäåöü |
|
52 |
#Finnish1 ŠšŽž |
|
53 |
French(limited) ÀÂÆÇÈÉÊËÎÏÑÔÙÛàâæçèéêëîïñôùûÿ |
|
54 |
French ŒœŸ |
|
55 |
German ÄÖÜßäöü |
|
56 |
Hungarian ÁÉÍÓÖÚÜáéíóöúüŐőŰű |
|
57 |
Icelandic ÁÆÉÍÐÓÖÚÝÞáæéíðóöúýþ |
|
58 |
Italian ÀÈÉÌÍÏÒÓÙÚàèéìíïòóùú |
|
59 |
#Latin A |
|
60 |
Latvian ĀāČčĒēĢģĪīĶķĻļŅņŠšŪūŽž |
|
61 |
Lithuanian ĄąČčĖėĘęĮįŠšŪūŲųŽž |
|
62 |
Norwegian ÅÆØåæø |
|
63 |
Polish ÓóĄąĆćĘꣳŃńŚśŹźŻż |
|
64 |
Portuguese ÀÁÂÃÇÉÊÍÓÔÕÚÜàáâãçéêíóôõúü |
|
65 |
#http://en.wikipedia.org/wiki/Special_Romanian_Unicode_characters |
|
66 |
Romanian ÂÎâîĂăȘșȚț |
|
67 |
Romanian(ErrorST) ÂÎâîĂ㪺Ţţ |
|
68 |
Slovak ÁÄÉÍÓÔÚÝáäéíóôúýČčĎďĹ弾ŇňŔ੹ŤťŽž |
|
69 |
Slovene Č芚Žž |
|
70 |
Sorbian-Lower ĆćČčĚ죳ŃńŘřŚśŠšŹźŽž |
|
71 |
Sorbian-Upper ÓóĆćČčĚ죳ŃńŘřŠšŽž |
|
72 |
Spanish ÁÉÍÑÓÚÜáéíñóúü |
|
73 |
Swedish ÄÅÖäåö |
|
74 |
Turkish ÂÇÖÛÜâçöûüĞğİı |
|
75 |
Welsh ÀÁÂÄÈÉÊËÌÍÎÏÒÓÔÖÙÚÛÜÝàáâäèéêëìíîïòóôöùúûüýÿŴŵŶŷŸẀẁẂẃẄẅỲỳ |
|
76 |
##################################
|
|
77 |
Belarusian ЁІЎАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяёіў |
|
78 |
Bulgarian АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя |
|
79 |
Bulgarian1 ЀҭѐѝѢѣѪѫ |
|
80 |
Macedonian ЃЅЈЉЊЌЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшѓѕјљњќџ |
|
81 |
Russian ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё |
|
82 |
RussianOLD ІіѢѣѲѳѴѵ |
|
83 |
Serbian ЂЈЉЊЋЏАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШабвгдежзиклмнопрстуфхцчшђјљњћџ |
|
84 |
Ukrainian ЄІЇАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЭЮЯабвгдежзийклмнопрстуфхцчшщьэюяєіїҐґ |
|
85 |
##################################
|
|
86 |
Armenian ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՑՒՓՔՕՖ՛՜՝՞՟աբգդեֆ։ |
|
87 |
#Armenian1 ՚֊ |
|
88 |
#Characters not found in the UCS: |
|
89 |
# ARMENIAN ETERNITY SIGN |
|
90 |
#
|
|
91 |
GeorgianOld ႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅ |
|
92 |
Georgian აბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ |
|
93 |
GeorgianArc ჱჲჳჴჵჶ |
|
94 |
GeorgianPunc ჻ |
|
95 |
#
|
|
96 |
GreekExt1 ΄΅Ά·ΈΉΊ»Ό½ΎΏΐ |
|
97 |
Greek ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρστυφχψω |
|
98 |
GreekExt2 ΪΫάέήίΰϊϋόύώ |
|
99 |
GreekExt4 ς |
|
100 |
#
|
|
101 |
Hebrew אבגדהוזחטיךכלםמןנסעףפץצקרשת |
|
102 |
##################################
|
|
103 |
#Abaza |
|
104 |
#Abkhaz |
|
105 |
#Adyghe |
|
106 |
#Agul * |
|
107 |
#(Aisor) |
|
108 |
#Akhvakh * |
|
109 |
#(?lvdalska) |
|
110 |
#(Andi) * |
|
111 |
#(Aragonese) |
|
112 |
#Archi * |
|
113 |
#Arumanian |
|
114 |
#(Arvanite) |
|
115 |
#Asturian |
|
116 |
#Avar |
|
117 |
#Azerbaijani |
|
118 |
#(Bagulal) * |
|
119 |
#Balkar |
|
120 |
#Bashkir |
|
121 |
#Basque ! |
|
122 |
#Bats * |
|
123 |
#Bezhta * |
|
124 |
#(Botlikh) * |
|
125 |
#Budukh * |
|
126 |
#(Chamalal) |
|
127 |
#Chechen |
|
128 |
#Chuvash |
|
129 |
#Cornish ! |
|
130 |
#(Corsican) |
|
131 |
#Dargwa |
|
132 |
#Erzya |
|
133 |
#(Franco-Proven?al) |
|
134 |
#(Frisian, East) |
|
135 |
#(Frisian, North) |
|
136 |
#Frisian, West |
|
137 |
#Friulian |
|
138 |
#Gagauz |
|
139 |
#Gaelic, Irish ! |
|
140 |
#Gaelic, Manx ! |
|
141 |
#Gaelic, Scottish ! |
|
142 |
#Galician ! |
|
143 |
#(German, Low) ! |
|
144 |
#(German, Swiss) ! |
|
145 |
#Godoberi * |
|
146 |
#(Hinukh) * |
|
147 |
#(Hunzib) * |
|
148 |
#Ingrian |
|
149 |
#Ingush |
|
150 |
#Istro-Romanian |
|
151 |
#(Judeo-Georgian) |
|
152 |
#(Judeo-Kurdish) |
|
153 |
#(Judeo-Tati) |
|
154 |
#Kabardian |
|
155 |
#Kalmyk |
|
156 |
#Karachay |
|
157 |
#(Karaim) |
|
158 |
#(Karata) * |
|
159 |
#Karelian |
|
160 |
#Kashubian |
|
161 |
#Kazakh |
|
162 |
#Khinalug |
|
163 |
#(Khvarshi) * |
|
164 |
#(Kirmanji) |
|
165 |
#Komi |
|
166 |
#Komi-Permyak |
|
167 |
#(Kryts) |
|
168 |
#Kumyk |
|
169 |
#(Kurdish) |
|
170 |
#(Ladin) |
|
171 |
#(Ladino) |
|
172 |
#Lak |
|
173 |
#Laz |
|
174 |
#Lezgian |
|
175 |
#Livonian |
|
176 |
#(Ludian) |
|
177 |
#Luxemburgish ! |
|
178 |
#Mari, Hill |
|
179 |
#Mari, Meadow |
|
180 |
#Megleno-Romanian |
|
181 |
#(Mingrelian) |
|
182 |
#Moksha |
|
183 |
#Moldavian |
|
184 |
#Nenets, Tundra |
|
185 |
#Nogai |
|
186 |
#Occitan |
|
187 |
#Old Church Slavonic |
|
188 |
#(Olonets) |
|
189 |
#Ossetian |
|
190 |
#(Romani) |
|
191 |
#Romansch |
|
192 |
#(Rusyn) |
|
193 |
#Rutul |
|
194 |
#Sami, Inari |
|
195 |
#Sami, Kildin |
|
196 |
#Sami, Lule |
|
197 |
#Sami, Northern |
|
198 |
#Sami, Skolt |
|
199 |
#Sami, Southern |
|
200 |
#(Sami, Ter) * |
|
201 |
#(Sami, Ume) * |
|
202 |
#(Sardinian) * |
|
203 |
#Scots ! |
|
204 |
#Svan |
|
205 |
#Tabasaran |
|
206 |
#(Talysh) |
|
207 |
#Tatar, Crimean |
|
208 |
#Tatar, Kazan |
|
209 |
#Tati |
|
210 |
#(Tindi) * |
|
211 |
#(Tsakonian) * |
|
212 |
#Tsakhur * |
|
213 |
#(Tsez) * |
|
214 |
#(Turkish, Crimean) |
|
215 |
#Ubykh * |
|
216 |
#Udi |
|
217 |
#Udmurt |
|
218 |
#(V?mhusm?l) |
|
219 |
#Vepsian |
|
220 |
#Votic |
|
221 |
#(Walloon) |
|
222 |
#(Yiddish) |
|
223 |
################################
|
|
224 |
# 4 Gaelic-new-orthography |
|
225 |
# 4 Frisian |
|
226 |
# 3 Rhaeto-Romanic |
|
227 |
# 2 S&AACUTEmi-with-restrictions |
|
228 |
# 1 Rhjaeto-Romanic |
|
229 |
# 1 Gaelic-old-and-new-orthographies |
|
230 |
END
|
|
231 |
) | |
|
232 |
||
233 |
while read a b |
|
234 |
do
|
|
235 |
c=`echo $b | replace "&#x" "" ";" ""` |
|
236 |
printf "INSERT INTO lang VALUES ('$a',_ucs2 X'$c');\n" |
|
237 |
done
|
|
238 |
) | mysql -f test |
|
239 |
||
240 |
#mysql test << END |
|
241 |
#SELECT * FROM lang WHERE CONVERT(letters USING latin1) NOT LIKE _binary'%?%'; |
|
242 |
#SELECT * FROM lang WHERE CONVERT(letters USING latin2) NOT LIKE _binary'%?%'; |
|
243 |
#END |
|
244 |
||
245 |
||
246 |
||
247 |
list="big5 dec8 cp850 hp8 koi8r latin1 latin2 swe7 ascii ujis sjis hebrew euckr koi8u gb2312 greek cp1250 gbk latin5 armscii8 cp866 keybcs2 macce macroman cp852 latin7 cp1251 cp1256 cp1257 geostd8" |
|
248 |
||
249 |
for p in $list |
|
250 |
do
|
|
251 |
echo "-----------------" |
|
252 |
echo $p: |
|
253 |
mysql -sss test << END |
|
254 |
SELECT lang FROM lang WHERE CONVERT(letters USING $p) NOT LIKE _binary'%?%' ORDER BY lang; |
|
255 |
END
|
|
256 |
done
|
|
257 |