1
by brian
clean slate |
1 |
/* Copyright (C) 2000-2005 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
14 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
15 |
||
16 |
/* Written by Sergei A. Golubchik, who has a shared copyright to this code */
|
|
17 |
||
18 |
#include "ftdefs.h" |
|
19 |
||
20 |
ulong ft_min_word_len=4; |
|
21 |
ulong ft_max_word_len=HA_FT_MAXCHARLEN; |
|
22 |
ulong ft_query_expansion_limit=5; |
|
23 |
char ft_boolean_syntax[]="+ -><()~*:\"\"&|"; |
|
24 |
||
25 |
const HA_KEYSEG ft_keysegs[FT_SEGS]={ |
|
26 |
{
|
|
27 |
0, /* charset */ |
|
28 |
HA_FT_WLEN, /* start */ |
|
29 |
0, /* null_pos */ |
|
30 |
0, /* Bit pos */ |
|
31 |
HA_VAR_LENGTH_PART | HA_PACK_KEY, /* flag */ |
|
32 |
HA_FT_MAXBYTELEN, /* length */ |
|
33 |
HA_KEYTYPE_VARTEXT2, /* type */ |
|
34 |
63, /* language (will be overwritten) */ |
|
35 |
0, /* null_bit */ |
|
36 |
2, 0, 0 /* bit_start, bit_end, bit_length */ |
|
37 |
},
|
|
38 |
{
|
|
39 |
/*
|
|
40 |
Note, this (and the last HA_KEYTYPE_END) segment should NOT
|
|
41 |
be packed in any way, otherwise w_search() won't be able to
|
|
42 |
update key entry 'in vivo'
|
|
43 |
*/
|
|
44 |
0, 0, 0, 0, HA_NO_SORT, HA_FT_WLEN, HA_FT_WTYPE, 63, 0, 0, 0, 0 |
|
45 |
}
|
|
46 |
};
|
|
47 |
||
48 |
const struct _ft_vft _ft_vft_nlq = { |
|
49 |
ft_nlq_read_next, ft_nlq_find_relevance, ft_nlq_close_search, |
|
50 |
ft_nlq_get_relevance, ft_nlq_reinit_search |
|
51 |
};
|
|
52 |
const struct _ft_vft _ft_vft_boolean = { |
|
53 |
ft_boolean_read_next, ft_boolean_find_relevance, ft_boolean_close_search, |
|
54 |
ft_boolean_get_relevance, ft_boolean_reinit_search |
|
55 |
};
|
|
56 |
||
57 |
||
58 |
FT_INFO *ft_init_search(uint flags, void *info, uint keynr, |
|
59 |
uchar *query, uint query_len, CHARSET_INFO *cs, |
|
60 |
uchar *record) |
|
61 |
{
|
|
62 |
FT_INFO *res; |
|
63 |
if (flags & FT_BOOL) |
|
64 |
res= ft_init_boolean_search((MI_INFO *)info, keynr, query, query_len,cs); |
|
65 |
else
|
|
66 |
res= ft_init_nlq_search((MI_INFO *)info, keynr, query, query_len, flags, |
|
67 |
record); |
|
68 |
return res; |
|
69 |
}
|
|
70 |
||
71 |
const char *ft_stopword_file = 0; |
|
72 |
const char *ft_precompiled_stopwords[] = { |
|
73 |
||
74 |
#ifdef COMPILE_STOPWORDS_IN
|
|
75 |
||
76 |
/* This particular stopword list was taken from SMART distribution
|
|
77 |
ftp://ftp.cs.cornell.edu/pub/smart/smart.11.0.tar.Z
|
|
78 |
it was slightly modified to my taste, though
|
|
79 |
*/
|
|
80 |
||
81 |
"a's", |
|
82 |
"able", |
|
83 |
"about", |
|
84 |
"above", |
|
85 |
"according", |
|
86 |
"accordingly", |
|
87 |
"across", |
|
88 |
"actually", |
|
89 |
"after", |
|
90 |
"afterwards", |
|
91 |
"again", |
|
92 |
"against", |
|
93 |
"ain't", |
|
94 |
"all", |
|
95 |
"allow", |
|
96 |
"allows", |
|
97 |
"almost", |
|
98 |
"alone", |
|
99 |
"along", |
|
100 |
"already", |
|
101 |
"also", |
|
102 |
"although", |
|
103 |
"always", |
|
104 |
"am", |
|
105 |
"among", |
|
106 |
"amongst", |
|
107 |
"an", |
|
108 |
"and", |
|
109 |
"another", |
|
110 |
"any", |
|
111 |
"anybody", |
|
112 |
"anyhow", |
|
113 |
"anyone", |
|
114 |
"anything", |
|
115 |
"anyway", |
|
116 |
"anyways", |
|
117 |
"anywhere", |
|
118 |
"apart", |
|
119 |
"appear", |
|
120 |
"appreciate", |
|
121 |
"appropriate", |
|
122 |
"are", |
|
123 |
"aren't", |
|
124 |
"around", |
|
125 |
"as", |
|
126 |
"aside", |
|
127 |
"ask", |
|
128 |
"asking", |
|
129 |
"associated", |
|
130 |
"at", |
|
131 |
"available", |
|
132 |
"away", |
|
133 |
"awfully", |
|
134 |
"be", |
|
135 |
"became", |
|
136 |
"because", |
|
137 |
"become", |
|
138 |
"becomes", |
|
139 |
"becoming", |
|
140 |
"been", |
|
141 |
"before", |
|
142 |
"beforehand", |
|
143 |
"behind", |
|
144 |
"being", |
|
145 |
"believe", |
|
146 |
"below", |
|
147 |
"beside", |
|
148 |
"besides", |
|
149 |
"best", |
|
150 |
"better", |
|
151 |
"between", |
|
152 |
"beyond", |
|
153 |
"both", |
|
154 |
"brief", |
|
155 |
"but", |
|
156 |
"by", |
|
157 |
"c'mon", |
|
158 |
"c's", |
|
159 |
"came", |
|
160 |
"can", |
|
161 |
"can't", |
|
162 |
"cannot", |
|
163 |
"cant", |
|
164 |
"cause", |
|
165 |
"causes", |
|
166 |
"certain", |
|
167 |
"certainly", |
|
168 |
"changes", |
|
169 |
"clearly", |
|
170 |
"co", |
|
171 |
"com", |
|
172 |
"come", |
|
173 |
"comes", |
|
174 |
"concerning", |
|
175 |
"consequently", |
|
176 |
"consider", |
|
177 |
"considering", |
|
178 |
"contain", |
|
179 |
"containing", |
|
180 |
"contains", |
|
181 |
"corresponding", |
|
182 |
"could", |
|
183 |
"couldn't", |
|
184 |
"course", |
|
185 |
"currently", |
|
186 |
"definitely", |
|
187 |
"described", |
|
188 |
"despite", |
|
189 |
"did", |
|
190 |
"didn't", |
|
191 |
"different", |
|
192 |
"do", |
|
193 |
"does", |
|
194 |
"doesn't", |
|
195 |
"doing", |
|
196 |
"don't", |
|
197 |
"done", |
|
198 |
"down", |
|
199 |
"downwards", |
|
200 |
"during", |
|
201 |
"each", |
|
202 |
"edu", |
|
203 |
"eg", |
|
204 |
"eight", |
|
205 |
"either", |
|
206 |
"else", |
|
207 |
"elsewhere", |
|
208 |
"enough", |
|
209 |
"entirely", |
|
210 |
"especially", |
|
211 |
"et", |
|
212 |
"etc", |
|
213 |
"even", |
|
214 |
"ever", |
|
215 |
"every", |
|
216 |
"everybody", |
|
217 |
"everyone", |
|
218 |
"everything", |
|
219 |
"everywhere", |
|
220 |
"ex", |
|
221 |
"exactly", |
|
222 |
"example", |
|
223 |
"except", |
|
224 |
"far", |
|
225 |
"few", |
|
226 |
"fifth", |
|
227 |
"first", |
|
228 |
"five", |
|
229 |
"followed", |
|
230 |
"following", |
|
231 |
"follows", |
|
232 |
"for", |
|
233 |
"former", |
|
234 |
"formerly", |
|
235 |
"forth", |
|
236 |
"four", |
|
237 |
"from", |
|
238 |
"further", |
|
239 |
"furthermore", |
|
240 |
"get", |
|
241 |
"gets", |
|
242 |
"getting", |
|
243 |
"given", |
|
244 |
"gives", |
|
245 |
"go", |
|
246 |
"goes", |
|
247 |
"going", |
|
248 |
"gone", |
|
249 |
"got", |
|
250 |
"gotten", |
|
251 |
"greetings", |
|
252 |
"had", |
|
253 |
"hadn't", |
|
254 |
"happens", |
|
255 |
"hardly", |
|
256 |
"has", |
|
257 |
"hasn't", |
|
258 |
"have", |
|
259 |
"haven't", |
|
260 |
"having", |
|
261 |
"he", |
|
262 |
"he's", |
|
263 |
"hello", |
|
264 |
"help", |
|
265 |
"hence", |
|
266 |
"her", |
|
267 |
"here", |
|
268 |
"here's", |
|
269 |
"hereafter", |
|
270 |
"hereby", |
|
271 |
"herein", |
|
272 |
"hereupon", |
|
273 |
"hers", |
|
274 |
"herself", |
|
275 |
"hi", |
|
276 |
"him", |
|
277 |
"himself", |
|
278 |
"his", |
|
279 |
"hither", |
|
280 |
"hopefully", |
|
281 |
"how", |
|
282 |
"howbeit", |
|
283 |
"however", |
|
284 |
"i'd", |
|
285 |
"i'll", |
|
286 |
"i'm", |
|
287 |
"i've", |
|
288 |
"ie", |
|
289 |
"if", |
|
290 |
"ignored", |
|
291 |
"immediate", |
|
292 |
"in", |
|
293 |
"inasmuch", |
|
294 |
"inc", |
|
295 |
"indeed", |
|
296 |
"indicate", |
|
297 |
"indicated", |
|
298 |
"indicates", |
|
299 |
"inner", |
|
300 |
"insofar", |
|
301 |
"instead", |
|
302 |
"into", |
|
303 |
"inward", |
|
304 |
"is", |
|
305 |
"isn't", |
|
306 |
"it", |
|
307 |
"it'd", |
|
308 |
"it'll", |
|
309 |
"it's", |
|
310 |
"its", |
|
311 |
"itself", |
|
312 |
"just", |
|
313 |
"keep", |
|
314 |
"keeps", |
|
315 |
"kept", |
|
316 |
"know", |
|
317 |
"knows", |
|
318 |
"known", |
|
319 |
"last", |
|
320 |
"lately", |
|
321 |
"later", |
|
322 |
"latter", |
|
323 |
"latterly", |
|
324 |
"least", |
|
325 |
"less", |
|
326 |
"lest", |
|
327 |
"let", |
|
328 |
"let's", |
|
329 |
"like", |
|
330 |
"liked", |
|
331 |
"likely", |
|
332 |
"little", |
|
333 |
"look", |
|
334 |
"looking", |
|
335 |
"looks", |
|
336 |
"ltd", |
|
337 |
"mainly", |
|
338 |
"many", |
|
339 |
"may", |
|
340 |
"maybe", |
|
341 |
"me", |
|
342 |
"mean", |
|
343 |
"meanwhile", |
|
344 |
"merely", |
|
345 |
"might", |
|
346 |
"more", |
|
347 |
"moreover", |
|
348 |
"most", |
|
349 |
"mostly", |
|
350 |
"much", |
|
351 |
"must", |
|
352 |
"my", |
|
353 |
"myself", |
|
354 |
"name", |
|
355 |
"namely", |
|
356 |
"nd", |
|
357 |
"near", |
|
358 |
"nearly", |
|
359 |
"necessary", |
|
360 |
"need", |
|
361 |
"needs", |
|
362 |
"neither", |
|
363 |
"never", |
|
364 |
"nevertheless", |
|
365 |
"new", |
|
366 |
"next", |
|
367 |
"nine", |
|
368 |
"no", |
|
369 |
"nobody", |
|
370 |
"non", |
|
371 |
"none", |
|
372 |
"noone", |
|
373 |
"nor", |
|
374 |
"normally", |
|
375 |
"not", |
|
376 |
"nothing", |
|
377 |
"novel", |
|
378 |
"now", |
|
379 |
"nowhere", |
|
380 |
"obviously", |
|
381 |
"of", |
|
382 |
"off", |
|
383 |
"often", |
|
384 |
"oh", |
|
385 |
"ok", |
|
386 |
"okay", |
|
387 |
"old", |
|
388 |
"on", |
|
389 |
"once", |
|
390 |
"one", |
|
391 |
"ones", |
|
392 |
"only", |
|
393 |
"onto", |
|
394 |
"or", |
|
395 |
"other", |
|
396 |
"others", |
|
397 |
"otherwise", |
|
398 |
"ought", |
|
399 |
"our", |
|
400 |
"ours", |
|
401 |
"ourselves", |
|
402 |
"out", |
|
403 |
"outside", |
|
404 |
"over", |
|
405 |
"overall", |
|
406 |
"own", |
|
407 |
"particular", |
|
408 |
"particularly", |
|
409 |
"per", |
|
410 |
"perhaps", |
|
411 |
"placed", |
|
412 |
"please", |
|
413 |
"plus", |
|
414 |
"possible", |
|
415 |
"presumably", |
|
416 |
"probably", |
|
417 |
"provides", |
|
418 |
"que", |
|
419 |
"quite", |
|
420 |
"qv", |
|
421 |
"rather", |
|
422 |
"rd", |
|
423 |
"re", |
|
424 |
"really", |
|
425 |
"reasonably", |
|
426 |
"regarding", |
|
427 |
"regardless", |
|
428 |
"regards", |
|
429 |
"relatively", |
|
430 |
"respectively", |
|
431 |
"right", |
|
432 |
"said", |
|
433 |
"same", |
|
434 |
"saw", |
|
435 |
"say", |
|
436 |
"saying", |
|
437 |
"says", |
|
438 |
"second", |
|
439 |
"secondly", |
|
440 |
"see", |
|
441 |
"seeing", |
|
442 |
"seem", |
|
443 |
"seemed", |
|
444 |
"seeming", |
|
445 |
"seems", |
|
446 |
"seen", |
|
447 |
"self", |
|
448 |
"selves", |
|
449 |
"sensible", |
|
450 |
"sent", |
|
451 |
"serious", |
|
452 |
"seriously", |
|
453 |
"seven", |
|
454 |
"several", |
|
455 |
"shall", |
|
456 |
"she", |
|
457 |
"should", |
|
458 |
"shouldn't", |
|
459 |
"since", |
|
460 |
"six", |
|
461 |
"so", |
|
462 |
"some", |
|
463 |
"somebody", |
|
464 |
"somehow", |
|
465 |
"someone", |
|
466 |
"something", |
|
467 |
"sometime", |
|
468 |
"sometimes", |
|
469 |
"somewhat", |
|
470 |
"somewhere", |
|
471 |
"soon", |
|
472 |
"sorry", |
|
473 |
"specified", |
|
474 |
"specify", |
|
475 |
"specifying", |
|
476 |
"still", |
|
477 |
"sub", |
|
478 |
"such", |
|
479 |
"sup", |
|
480 |
"sure", |
|
481 |
"t's", |
|
482 |
"take", |
|
483 |
"taken", |
|
484 |
"tell", |
|
485 |
"tends", |
|
486 |
"th", |
|
487 |
"than", |
|
488 |
"thank", |
|
489 |
"thanks", |
|
490 |
"thanx", |
|
491 |
"that", |
|
492 |
"that's", |
|
493 |
"thats", |
|
494 |
"the", |
|
495 |
"their", |
|
496 |
"theirs", |
|
497 |
"them", |
|
498 |
"themselves", |
|
499 |
"then", |
|
500 |
"thence", |
|
501 |
"there", |
|
502 |
"there's", |
|
503 |
"thereafter", |
|
504 |
"thereby", |
|
505 |
"therefore", |
|
506 |
"therein", |
|
507 |
"theres", |
|
508 |
"thereupon", |
|
509 |
"these", |
|
510 |
"they", |
|
511 |
"they'd", |
|
512 |
"they'll", |
|
513 |
"they're", |
|
514 |
"they've", |
|
515 |
"think", |
|
516 |
"third", |
|
517 |
"this", |
|
518 |
"thorough", |
|
519 |
"thoroughly", |
|
520 |
"those", |
|
521 |
"though", |
|
522 |
"three", |
|
523 |
"through", |
|
524 |
"throughout", |
|
525 |
"thru", |
|
526 |
"thus", |
|
527 |
"to", |
|
528 |
"together", |
|
529 |
"too", |
|
530 |
"took", |
|
531 |
"toward", |
|
532 |
"towards", |
|
533 |
"tried", |
|
534 |
"tries", |
|
535 |
"truly", |
|
536 |
"try", |
|
537 |
"trying", |
|
538 |
"twice", |
|
539 |
"two", |
|
540 |
"un", |
|
541 |
"under", |
|
542 |
"unfortunately", |
|
543 |
"unless", |
|
544 |
"unlikely", |
|
545 |
"until", |
|
546 |
"unto", |
|
547 |
"up", |
|
548 |
"upon", |
|
549 |
"us", |
|
550 |
"use", |
|
551 |
"used", |
|
552 |
"useful", |
|
553 |
"uses", |
|
554 |
"using", |
|
555 |
"usually", |
|
556 |
"value", |
|
557 |
"various", |
|
558 |
"very", |
|
559 |
"via", |
|
560 |
"viz", |
|
561 |
"vs", |
|
562 |
"want", |
|
563 |
"wants", |
|
564 |
"was", |
|
565 |
"wasn't", |
|
566 |
"way", |
|
567 |
"we", |
|
568 |
"we'd", |
|
569 |
"we'll", |
|
570 |
"we're", |
|
571 |
"we've", |
|
572 |
"welcome", |
|
573 |
"well", |
|
574 |
"went", |
|
575 |
"were", |
|
576 |
"weren't", |
|
577 |
"what", |
|
578 |
"what's", |
|
579 |
"whatever", |
|
580 |
"when", |
|
581 |
"whence", |
|
582 |
"whenever", |
|
583 |
"where", |
|
584 |
"where's", |
|
585 |
"whereafter", |
|
586 |
"whereas", |
|
587 |
"whereby", |
|
588 |
"wherein", |
|
589 |
"whereupon", |
|
590 |
"wherever", |
|
591 |
"whether", |
|
592 |
"which", |
|
593 |
"while", |
|
594 |
"whither", |
|
595 |
"who", |
|
596 |
"who's", |
|
597 |
"whoever", |
|
598 |
"whole", |
|
599 |
"whom", |
|
600 |
"whose", |
|
601 |
"why", |
|
602 |
"will", |
|
603 |
"willing", |
|
604 |
"wish", |
|
605 |
"with", |
|
606 |
"within", |
|
607 |
"without", |
|
608 |
"won't", |
|
609 |
"wonder", |
|
610 |
"would", |
|
611 |
"wouldn't", |
|
612 |
"yes", |
|
613 |
"yet", |
|
614 |
"you", |
|
615 |
"you'd", |
|
616 |
"you'll", |
|
617 |
"you're", |
|
618 |
"you've", |
|
619 |
"your", |
|
620 |
"yours", |
|
621 |
"yourself", |
|
622 |
"yourselves", |
|
623 |
"zero", |
|
624 |
#endif
|
|
625 |
||
626 |
NULL }; |
|
627 |
||
628 |
static int ft_default_parser_parse(MYSQL_FTPARSER_PARAM *param) |
|
629 |
{
|
|
630 |
return param->mysql_parse(param, param->doc, param->length); |
|
631 |
}
|
|
632 |
||
633 |
struct st_mysql_ftparser ft_default_parser= |
|
634 |
{
|
|
635 |
MYSQL_FTPARSER_INTERFACE_VERSION, ft_default_parser_parse, 0, 0 |
|
636 |
};
|
|
637 |