2
Copyright (c) 2010, Yahoo! Inc. All rights reserved.
3
Code licensed under the BSD License:
4
http://developer.yahoo.com/yui/license.html
8
YUI.add('text-accentfold', function(Y) {
18
* Provides a basic accent folding implementation that converts common accented
19
* letters (like "á") to their non-accented forms (like "a").
22
* @submodule text-accentfold
27
* Provides a basic accent folding implementation that converts common accented
28
* letters (like "á") to their non-accented forms (like "a").
32
* This implementation is not comprehensive, and should only be used as a last
33
* resort when accent folding can't be done on the server. A comprehensive
34
* accent folding implementation would require much more character data to be
35
* sent to the browser, resulting in a significant performance penalty. This
36
* implementation strives for a compromise between usefulness and performance.
40
* Accent folding is a destructive operation that can't be reversed, and may
41
* change or destroy the actual meaning of the text depending on the language.
42
* It should not be used on strings that will later be displayed to a user,
43
* unless this is done with the understanding that linguistic meaning may be
44
* lost and that you may in fact confuse or insult the user by doing so.
48
* When used for matching, accent folding is likely to produce erroneous matches
49
* for languages in which characters with diacritics are considered different
50
* from their base characters, or where correct folding would map to other
51
* character sequences than just stripped characters. For example, in German
52
* "ü" is a character that's clearly different from "u" and should match "ue"
53
* instead. The word "betrügen" means "to defraud", while "betrugen" is the past
54
* tense of "to behave". The name "Müller" is expected to match "Mueller", but
55
* not "Muller". On the other hand, accent folding falls short for languages
56
* where different base characters are expected to match. In Japanese, for
57
* example, hiragana and katakana characters with the same pronunciation ("あ"
58
* and "ア") are commonly treated as equivalent for lookups, but accent folding
59
* treats them as different.
62
* @class Text.AccentFold
68
FoldData = Text.Data.AccentFold,
71
// -- Public Static Methods ------------------------------------------------
74
* Returns <code>true</code> if the specified string contains one or more
75
* characters that can be folded, <code>false</code> otherwise.
78
* @param {String} string String to test.
82
canFold: function (string) {
85
for (letter in FoldData) {
86
if (FoldData.hasOwnProperty(letter) &&
87
string.search(FoldData[letter]) !== -1) {
96
* Compares the accent-folded versions of two strings and returns
97
* <code>true</code> if they're the same, <code>false</code> otherwise. If
98
* a custom comparison function is supplied, the accent-folded strings will
99
* be passed to that function for comparison.
102
* @param {String} a First string to compare.
103
* @param {String} b Second string to compare.
104
* @param {Function} func (optional) Custom comparison function. Should
105
* return a truthy or falsy value.
106
* @return {Boolean} Results of the comparison.
109
compare: function (a, b, func) {
110
var aFolded = AccentFold.fold(a),
111
bFolded = AccentFold.fold(b);
113
return func ? !!func(aFolded, bFolded) : aFolded === bFolded;
118
* Returns a copy of <em>haystack</em> containing only the strings for which
119
* the supplied function returns <code>true</code>.
123
* While comparisons will be made using accent-folded strings, the returned
124
* array of matches will contain the original strings that were passed in.
128
* @param {Array} haystack Array of strings to filter.
129
* @param {Function} func Comparison function. Will receive an accent-folded
130
* haystack string as an argument, and should return a truthy or falsy
132
* @return {Array} Filtered copy of <em>haystack</em>.
135
filter: function (haystack, func) {
136
return YArray.filter(haystack, function (item) {
137
return func(AccentFold.fold(item));
142
* Accent-folds the specified string or array of strings and returns a copy
143
* in which common accented letters have been converted to their closest
144
* non-accented, lowercase forms.
147
* @param {String|Array} input String or array of strings to be folded.
148
* @return {String|Array} Folded string or array of strings.
151
fold: function (input) {
152
if (Y.Lang.isArray(input)) {
153
return YArray.map(input, AccentFold.fold);
156
input = input.toLowerCase();
158
Y.Object.each(FoldData, function (regex, letter) {
159
input = input.replace(regex, letter);
166
Text.AccentFold = AccentFold;
169
}, '3.3.0' ,{requires:['array-extras', 'text-data-accentfold']});
170
YUI.add('text-data-accentfold', function(Y) {
172
// The following tool was very helpful in creating these mappings:
173
// http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:toNFKD%3D/^a/:]&abb=on
175
Y.namespace('Text.Data').AccentFold = {
186
a: /[ªà-åāăąǎǟǡǻȁȃȧᵃḁẚạảấầẩẫậắằẳẵặⓐa]/gi,
190
e: /[è-ëēĕėęěȅȇȩᵉḕḗḙḛḝẹẻẽếềểễệₑℯⓔe]/gi,
192
g: /[ĝğġģǧǵᵍḡℊⓖg]/gi,
193
h: /[ĥȟʰḣḥḧḩḫẖℎⓗh]/gi,
194
i: /[ì-ïĩīĭįijǐȉȋᵢḭḯỉịⁱℹⅰⓘi]/gi,
197
l: /[ĺļľŀljˡḷḹḻḽℓⅼⓛl]/gi,
199
n: /[ñńņňǹṅṇṉṋⁿⓝn]/gi,
200
o: /[ºò-öōŏőơǒǫǭȍȏȫȭȯȱᵒṍṏṑṓọỏốồổỗộớờởỡợₒℴⓞo]/gi,
203
r: /[ŕŗřȑȓʳᵣṙṛṝṟⓡr]/gi,
204
s: /[śŝşšſșˢṡṣṥṧṩẛⓢs]/gi,
205
t: /[ţťțᵗṫṭṯṱẗⓣt]/gi,
206
u: /[ù-üũūŭůűųưǔǖǘǚǜȕȗᵘᵤṳṵṷṹṻụủứừửữựⓤu]/gi,
210
y: /[ýÿŷȳʸẏẙỳỵỷỹⓨy]/gi,
216
YUI.add('text-data-wordbreak', function(Y) {
218
Y.namespace('Text.Data').WordBreak = {
219
// The UnicodeSet utility is helpful for enumerating the specific code
220
// points covered by each of these regular expressions:
221
// http://unicode.org/cldr/utility/list-unicodeset.jsp
223
// The code sets from which these regexes were derived can be generated
224
// by the UnicodeSet utility using the links here:
225
// http://unicode.org/cldr/utility/properties.jsp?a=Word_Break#Word_Break
227
aletter : '[A-Za-zªµºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬˮͰ-ʹͶͷͺ-ͽΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԧԱ-Ֆՙա-ևא-תװ-׳ؠ-يٮٯٱ-ۓەۥۦۮۯۺ-ۼۿܐܒ-ܯݍ-ޥޱߊ-ߪߴߵߺࠀ-ࠕࠚࠤࠨࡀ-ࡘऄ-हऽॐक़-ॡॱ-ॷॹ-ॿঅ-ঌএঐও-নপ-রলশ-হঽৎড়ঢ়য়-ৡৰৱਅ-ਊਏਐਓ-ਨਪ-ਰਲਲ਼ਵਸ਼ਸਹਖ਼-ੜਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલળવ-હઽૐૠૡଅ-ଌଏଐଓ-ନପ-ରଲଳଵ-ହଽଡ଼ଢ଼ୟ-ୡୱஃஅ-ஊஎ-ஐஒ-கஙசஜஞடணதந-பம-ஹௐఅ-ఌఎ-ఐఒ-నప-ళవ-హఽౘౙౠౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽೞೠೡೱೲഅ-ഌഎ-ഐഒ-ഺഽൎൠൡൺ-ൿඅ-ඖක-නඳ-රලව-ෆༀཀ-ཇཉ-ཬྈ-ྌႠ-Ⴥა-ჺჼᄀ-ቈቊ-ቍቐ-ቖቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏼᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᛮ-ᛰᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰᠠ-ᡷᢀ-ᢨᢪᢰ-ᣵᤀ-ᤜᨀ-ᨖᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮᮯᯀ-ᯥᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱⁿₐ-ₜℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎⅠ-ↈⒶ-ⓩⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⴀ-ⴥⴰ-ⵥⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ々〻〼ㄅ-ㄭㄱ-ㆎㆠ-ㆺꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪꘫꙀ-ꙮꙿ-ꚗꚠ-ꛯꜗ-ꜟꜢ-ꞈꞋ-ꞎꞐꞑꞠ-ꞩꟺ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏꨀ-ꨨꩀ-ꩂꩄ-ꩋꬁ-ꬆꬉ-ꬎꬑ-ꬖꬠ-ꬦꬨ-ꬮꯀ-ꯢ가-힣ힰ-ퟆퟋ-ퟻff-stﬓ-ﬗיִײַ-ﬨשׁ-זּטּ-לּמּנּסּףּפּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zᅠ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ]',
228
midnumlet : "['\\.‘’․﹒'.]",
229
midletter : '[:··״‧︓﹕:]',
230
midnum : '[,;;։،؍٬߸⁄︐︔﹐﹔,;]',
231
numeric : '[0-9٠-٩٫۰-۹߀-߉०-९০-৯੦-੯૦-૯୦-୯௦-௯౦-౯೦-೯൦-൯๐-๙໐-໙༠-༩၀-၉႐-႙០-៩᠐-᠙᥆-᥏᧐-᧙᪀-᪉᪐-᪙᭐-᭙᮰-᮹᱀-᱉᱐-᱙꘠-꘩꣐-꣙꤀-꤉꧐-꧙꩐-꩙꯰-꯹]',
234
newline : '[\u000B\u000C\u0085\u2028\u2029]',
235
extend : '[\u0300-\u036F\u0483-\u0489\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u0900-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u0A01-\u0A03\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A70\u0A71\u0A75\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AE2\u0AE3\u0B01-\u0B03\u0B3C\u0B3E-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B62\u0B63\u0B82\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C01-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C62\u0C63\u0C82\u0C83\u0CBC\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CE2\u0CE3\u0D02\u0D03\u0D3E-\u0D44\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D62\u0D63\u0D82\u0D83\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB\u0EBC\u0EC8-\u0ECD\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102B-\u103E\u1056-\u1059\u105E-\u1060\u1062-\u1064\u1067-\u106D\u1071-\u1074\u1082-\u108D\u108F\u109A-\u109D\u135D-\u135F\u1712-\u1714\u1732-\u1734\u1752\u1753\u1772\u1773\u17B6-\u17D3\u17DD\u180B-\u180D\u18A9\u1920-\u192B\u1930-\u193B\u19B0-\u19C0\u19C8\u19C9\u1A17-\u1A1B\u1A55-\u1A5E\u1A60-\u1A7C\u1A7F\u1B00-\u1B04\u1B34-\u1B44\u1B6B-\u1B73\u1B80-\u1B82\u1BA1-\u1BAA\u1BE6-\u1BF3\u1C24-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE8\u1CED\u1CF2\u1DC0-\u1DE6\u1DFC-\u1DFF\u200C\u200D\u20D0-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302F\u3099\u309A\uA66F-\uA672\uA67C\uA67D\uA6F0\uA6F1\uA802\uA806\uA80B\uA823-\uA827\uA880\uA881\uA8B4-\uA8C4\uA8E0-\uA8F1\uA926-\uA92D\uA947-\uA953\uA980-\uA983\uA9B3-\uA9C0\uAA29-\uAA36\uAA43\uAA4C\uAA4D\uAA7B\uAAB0\uAAB2-\uAAB4\uAAB7\uAAB8\uAABE\uAABF\uAAC1\uABE3-\uABEA\uABEC\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE26\uFF9E\uFF9F]',
236
format : '[\u00AD\u0600-\u0603\u06DD\u070F\u17B4\u17B5\u200E\u200F\u202A-\u202E\u2060-\u2064\u206A-\u206F\uFEFF\uFFF9-\uFFFB]',
237
katakana : '[〱-〵゛゜゠-ヺー-ヿㇰ-ㇿ㋐-㋾㌀-㍗ヲ-ン]',
238
extendnumlet: '[_‿⁀⁔︳︴﹍-﹏_]',
239
punctuation : '[!-#%-*,-\\/:;?@\\[-\\]_{}¡«·»¿;·՚-՟։֊־׀׃׆׳״؉؊،؍؛؞؟٪-٭۔܀-܍߷-߹࠰-࠾࡞।॥॰෴๏๚๛༄-༒༺-༽྅࿐-࿔࿙࿚၊-၏჻፡-፨᐀᙭᙮᚛᚜᛫-᛭᜵᜶។-៖៘-៚᠀-᠊᥄᥅᨞᨟᪠-᪦᪨-᪭᭚-᭠᯼-᯿᰻-᰿᱾᱿᳓‐-‧‰-⁃⁅-⁑⁓-⁞⁽⁾₍₎〈〉❨-❵⟅⟆⟦-⟯⦃-⦘⧘-⧛⧼⧽⳹-⳼⳾⳿⵰⸀-⸮⸰⸱、-〃〈-】〔-〟〰〽゠・꓾꓿꘍-꘏꙳꙾꛲-꛷꡴-꡷꣎꣏꣸-꣺꤮꤯꥟꧁-꧍꧞꧟꩜-꩟꫞꫟꯫﴾﴿︐-︙︰-﹒﹔-﹡﹣﹨﹪﹫!-#%-*,-/:;?@[-]_{}⦅-・]'
244
YUI.add('text-wordbreak', function(Y) {
247
* Provides utility methods for splitting strings on word breaks and determining
248
* whether a character index represents a word boundary.
251
* @submodule text-wordbreak
256
* Provides utility methods for splitting strings on word breaks and determining
257
* whether a character index represents a word boundary, using the generic word
258
* breaking algorithm defined in the Unicode Text Segmentation guidelines
259
* (<a href="http://unicode.org/reports/tr29/#Word_Boundaries">Unicode Standard
264
* This algorithm provides a reasonable default for many languages. However, it
265
* does not cover language or context specific requirements, and it does not
266
* provide meaningful results at all for languages that don't use spaces between
267
* words, such as Chinese, Japanese, Thai, Lao, Khmer, and others. Server-based
268
* word breaking services usually provide significantly better results with
269
* better performance.
272
* @class Text.WordBreak
277
WBData = Text.Data.WordBreak,
279
// Constants representing code point classifications.
294
// RegExp objects generated from code point data. Each regex matches a single
295
// character against a set of Unicode code points. The index of each item in
296
// this array must match its corresponding code point constant value defined
299
new RegExp(WBData.aletter),
300
new RegExp(WBData.midnumlet),
301
new RegExp(WBData.midletter),
302
new RegExp(WBData.midnum),
303
new RegExp(WBData.numeric),
304
new RegExp(WBData.cr),
305
new RegExp(WBData.lf),
306
new RegExp(WBData.newline),
307
new RegExp(WBData.extend),
308
new RegExp(WBData.format),
309
new RegExp(WBData.katakana),
310
new RegExp(WBData.extendnumlet)
314
PUNCTUATION = new RegExp('^' + WBData.punctuation + '$'),
318
// -- Public Static Methods ------------------------------------------------
321
* Splits the specified string into an array of individual words.
324
* @param {String} string String to split.
325
* @param {Object} options (optional) Options object containing zero or more
326
* of the following properties:
329
* <dt>ignoreCase (Boolean)</dt>
331
* If <code>true</code>, the string will be converted to lowercase
332
* before being split. Default is <code>false</code>.
335
* <dt>includePunctuation (Boolean)</dt>
337
* If <code>true</code>, the returned array will include punctuation
338
* characters. Default is <code>false</code>.
341
* <dt>includeWhitespace (Boolean)</dt>
343
* If <code>true</code>, the returned array will include whitespace
344
* characters. Default is <code>false</code>.
347
* @return {Array} Array of words.
350
getWords: function (string, options) {
352
map = WordBreak._classify(string),
364
if (options.ignoreCase) {
365
string = string.toLowerCase();
368
includePunctuation = options.includePunctuation;
369
includeWhitespace = options.includeWhitespace;
371
// Loop through each character in the classification map and determine
372
// whether it precedes a word boundary, building an array of distinct
374
for (; i < len; ++i) {
375
chr = string.charAt(i);
377
// Append this character to the current word.
380
// If there's a word boundary between the current character and the
381
// next character, append the current word to the words array and
382
// start building a new word.
383
if (WordBreak._isWordBoundary(map, i)) {
384
word = word.join(EMPTY_STRING);
387
(includeWhitespace || !WHITESPACE.test(word)) &&
388
(includePunctuation || !PUNCTUATION.test(word))) {
400
* Returns an array containing only unique words from the specified string.
401
* For example, the string <code>'foo bar baz foo'</code> would result in
402
* the array <code>['foo', 'bar', 'baz']</code>.
404
* @method getUniqueWords
405
* @param {String} string String to split.
406
* @param {Object} options (optional) Options (see <code>getWords()</code>
408
* @return {Array} Array of unique words.
411
getUniqueWords: function (string, options) {
412
return Y.Array.unique(WordBreak.getWords(string, options));
417
* Returns <code>true</code> if there is a word boundary between the
418
* specified character index and the next character index (or the end of the
423
* Note that there are always word breaks at the beginning and end of a
424
* string, so <code>isWordBoundary('', 0)</code> and
425
* <code>isWordBoundary('a', 0)</code> will both return <code>true</code>.
428
* @method isWordBoundary
429
* @param {String} string String to test.
430
* @param {Number} index Character index to test within the string.
431
* @return {Boolean} <code>true</code> for a word boundary,
432
* <code>false</code> otherwise.
435
isWordBoundary: function (string, index) {
436
return WordBreak._isWordBoundary(WordBreak._classify(string), index);
439
// -- Protected Static Methods ---------------------------------------------
442
* Returns a character classification map for the specified string.
445
* @param {String} string String to classify.
446
* @return {Array} Classification map.
450
_classify: function (string) {
456
stringLength = string.length,
457
setsLength = SETS.length,
460
for (; i < stringLength; ++i) {
461
chr = string.charAt(i);
464
for (j = 0; j < setsLength; ++j) {
467
if (set && set.test(chr)) {
481
* Returns <code>true</code> if there is a word boundary between the
482
* specified character index and the next character index (or the end of the
487
* Note that there are always word breaks at the beginning and end of a
488
* string, so <code>_isWordBoundary('', 0)</code> and
489
* <code>_isWordBoundary('a', 0)</code> will both return <code>true</code>.
492
* @method _isWordBoundary
493
* @param {Array} map Character classification map generated by
494
* <code>_classify</code>.
495
* @param {Number} index Character index to test.
500
_isWordBoundary: function (map, index) {
503
nextType = map[index + 1],
506
if (index < 0 || (index > map.length - 1 && index !== 0)) {
510
// WB5. Don't break between most letters.
511
if (type === ALETTER && nextType === ALETTER) {
515
nextNextType = map[index + 2];
517
// WB6. Don't break letters across certain punctuation.
518
if (type === ALETTER &&
519
(nextType === MIDLETTER || nextType === MIDNUMLET) &&
520
nextNextType === ALETTER) {
524
prevType = map[index - 1];
526
// WB7. Don't break letters across certain punctuation.
527
if ((type === MIDLETTER || type === MIDNUMLET) &&
528
nextType === ALETTER &&
529
prevType === ALETTER) {
533
// WB8/WB9/WB10. Don't break inside sequences of digits or digits
534
// adjacent to letters.
535
if ((type === NUMERIC || type === ALETTER) &&
536
(nextType === NUMERIC || nextType === ALETTER)) {
540
// WB11. Don't break inside numeric sequences like "3.2" or
542
if ((type === MIDNUM || type === MIDNUMLET) &&
543
nextType === NUMERIC &&
544
prevType === NUMERIC) {
548
// WB12. Don't break inside numeric sequences like "3.2" or
550
if (type === NUMERIC &&
551
(nextType === MIDNUM || nextType === MIDNUMLET) &&
552
nextNextType === NUMERIC) {
556
// WB4. Ignore format and extend characters.
557
if (type === EXTEND || type === FORMAT ||
558
prevType === EXTEND || prevType === FORMAT ||
559
nextType === EXTEND || nextType === FORMAT) {
563
// WB3. Don't break inside CRLF.
564
if (type === CR && nextType === LF) {
568
// WB3a. Break before newlines (including CR and LF).
569
if (type === NEWLINE || type === CR || type === LF) {
573
// WB3b. Break after newlines (including CR and LF).
574
if (nextType === NEWLINE || nextType === CR || nextType === LF) {
578
// WB13. Don't break between Katakana characters.
579
if (type === KATAKANA && nextType === KATAKANA) {
583
// WB13a. Don't break from extenders.
584
if (nextType === EXTENDNUMLET &&
585
(type === ALETTER || type === NUMERIC || type === KATAKANA ||
586
type === EXTENDNUMLET)) {
590
// WB13b. Don't break from extenders.
591
if (type === EXTENDNUMLET &&
592
(nextType === ALETTER || nextType === NUMERIC ||
593
nextType === KATAKANA)) {
597
// Break after any character not covered by the rules above.
602
Text.WordBreak = WordBreak;
605
}, '3.3.0' ,{requires:['array-extras', 'text-data-wordbreak']});
608
YUI.add('text', function(Y){}, '3.3.0' ,{use:['text-accentfold', 'text-wordbreak']});