1
by brian
clean slate |
1 |
/* Copyright (C) 2001-2006 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
14 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
15 |
||
16 |
/* Written by Sergei A. Golubchik, who has a shared copyright to this code
|
|
17 |
added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
|
|
18 |
||
19 |
#include "ftdefs.h" |
|
20 |
#include <my_getopt.h> |
|
21 |
||
22 |
static void usage(); |
|
23 |
static void complain(int val); |
|
24 |
static my_bool get_one_option(int, const struct my_option *, char *); |
|
25 |
||
26 |
static int count=0, stats=0, dump=0, lstats=0; |
|
27 |
static my_bool verbose; |
|
28 |
static char *query=NULL; |
|
29 |
static uint lengths[256]; |
|
30 |
||
31 |
#define MAX_LEN (HA_FT_MAXBYTELEN+10)
|
|
32 |
#define HOW_OFTEN_TO_WRITE 10000
|
|
33 |
||
34 |
static struct my_option my_long_options[] = |
|
35 |
{
|
|
36 |
{"help", 'h', "Display help and exit.", |
|
37 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
38 |
{"help", '?', "Synonym for -h.", |
|
39 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
40 |
{"count", 'c', "Calculate per-word stats (counts and global weights).", |
|
41 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
42 |
{"dump", 'd', "Dump index (incl. data offsets and word weights).", |
|
43 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
44 |
{"length", 'l', "Report length distribution.", |
|
45 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
46 |
{"stats", 's', "Report global stats.", |
|
47 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
48 |
{"verbose", 'v', "Be verbose.", |
|
49 |
(uchar**) &verbose, (uchar**) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
50 |
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} |
|
51 |
};
|
|
52 |
||
53 |
||
54 |
int main(int argc,char *argv[]) |
|
55 |
{
|
|
56 |
int error=0, subkeys; |
|
57 |
uint keylen, keylen2=0, inx, doc_cnt=0; |
|
58 |
float weight= 1.0; |
|
59 |
double gws, min_gws=0, avg_gws=0; |
|
60 |
MI_INFO *info; |
|
61 |
char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN]; |
|
62 |
ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0; |
|
63 |
struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */ |
|
64 |
||
65 |
MY_INIT(argv[0]); |
|
66 |
if ((error= handle_options(&argc, &argv, my_long_options, get_one_option))) |
|
67 |
exit(error); |
|
68 |
if (count || dump) |
|
69 |
verbose=0; |
|
70 |
if (!count && !dump && !lstats && !query) |
|
71 |
stats=1; |
|
72 |
||
73 |
if (verbose) |
|
74 |
setbuf(stdout,NULL); |
|
75 |
||
76 |
if (argc < 2) |
|
77 |
usage(); |
|
78 |
||
79 |
{
|
|
80 |
char *end; |
|
81 |
inx= (uint) strtoll(argv[1], &end, 10); |
|
82 |
if (*end) |
|
83 |
usage(); |
|
84 |
}
|
|
85 |
||
86 |
init_key_cache(dflt_key_cache,MI_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0); |
|
87 |
||
88 |
if (!(info=mi_open(argv[0], O_RDONLY, |
|
89 |
HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER))) |
|
90 |
{
|
|
91 |
error=my_errno; |
|
92 |
goto err; |
|
93 |
}
|
|
94 |
||
95 |
*buf2=0; |
|
96 |
aio->info=info; |
|
97 |
||
98 |
if ((inx >= info->s->base.keys) || |
|
99 |
!(info->s->keyinfo[inx].flag & HA_FULLTEXT)) |
|
100 |
{
|
|
101 |
printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename); |
|
102 |
goto err; |
|
103 |
}
|
|
104 |
||
105 |
mi_lock_database(info, F_EXTRA_LCK); |
|
106 |
||
107 |
info->lastpos= HA_OFFSET_ERROR; |
|
108 |
info->update|= HA_STATE_PREV_FOUND; |
|
109 |
||
110 |
while (!(error=mi_rnext(info,NULL,inx))) |
|
111 |
{
|
|
112 |
keylen=*(info->lastkey); |
|
113 |
||
114 |
subkeys=ft_sintXkorr(info->lastkey+keylen+1); |
|
115 |
if (subkeys >= 0) |
|
116 |
weight=*(float*)&subkeys; |
|
117 |
||
118 |
#ifdef HAVE_SNPRINTF
|
|
119 |
snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1); |
|
120 |
#else
|
|
121 |
sprintf(buf,"%.*s",(int) keylen,info->lastkey+1); |
|
122 |
#endif
|
|
123 |
my_casedn_str(default_charset_info,buf); |
|
124 |
total++; |
|
125 |
lengths[keylen]++; |
|
126 |
||
127 |
if (count || stats) |
|
128 |
{
|
|
129 |
if (strcmp(buf, buf2)) |
|
130 |
{
|
|
131 |
if (*buf2) |
|
132 |
{
|
|
133 |
uniq++; |
|
134 |
avg_gws+=gws=GWS_IN_USE; |
|
135 |
if (count) |
|
136 |
printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); |
|
137 |
if (maxlen<keylen2) |
|
138 |
{
|
|
139 |
maxlen=keylen2; |
|
140 |
strmov(buf_maxlen, buf2); |
|
141 |
}
|
|
142 |
if (max_doc_cnt < doc_cnt) |
|
143 |
{
|
|
144 |
max_doc_cnt=doc_cnt; |
|
145 |
strmov(buf_min_gws, buf2); |
|
146 |
min_gws=gws; |
|
147 |
}
|
|
148 |
}
|
|
149 |
strmov(buf2, buf); |
|
150 |
keylen2=keylen; |
|
151 |
doc_cnt=0; |
|
152 |
}
|
|
153 |
doc_cnt+= (subkeys >= 0 ? 1 : -subkeys); |
|
154 |
}
|
|
155 |
if (dump) |
|
156 |
{
|
|
157 |
if (subkeys>=0) |
|
158 |
printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf); |
|
159 |
else
|
|
160 |
printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys,buf); |
|
161 |
}
|
|
162 |
if (verbose && (total%HOW_OFTEN_TO_WRITE)==0) |
|
163 |
printf("%10ld\r",total); |
|
164 |
}
|
|
165 |
mi_lock_database(info, F_UNLCK); |
|
166 |
||
167 |
if (count || stats) |
|
168 |
{
|
|
169 |
if (*buf2) |
|
170 |
{
|
|
171 |
uniq++; |
|
172 |
avg_gws+=gws=GWS_IN_USE; |
|
173 |
if (count) |
|
174 |
printf("%9u %20.7f %s\n",doc_cnt,gws,buf2); |
|
175 |
if (maxlen<keylen2) |
|
176 |
{
|
|
177 |
maxlen=keylen2; |
|
178 |
strmov(buf_maxlen, buf2); |
|
179 |
}
|
|
180 |
if (max_doc_cnt < doc_cnt) |
|
181 |
{
|
|
182 |
max_doc_cnt=doc_cnt; |
|
183 |
strmov(buf_min_gws, buf2); |
|
184 |
min_gws=gws; |
|
185 |
}
|
|
186 |
}
|
|
187 |
}
|
|
188 |
||
189 |
if (stats) |
|
190 |
{
|
|
191 |
count=0; |
|
192 |
for (inx=0;inx<256;inx++) |
|
193 |
{
|
|
194 |
count+=lengths[inx]; |
|
195 |
if ((ulong) count >= total/2) |
|
196 |
break; |
|
197 |
}
|
|
198 |
printf("Total rows: %lu\nTotal words: %lu\n" |
|
199 |
"Unique words: %lu\nLongest word: %lu chars (%s)\n" |
|
200 |
"Median length: %u\n" |
|
201 |
"Average global weight: %f\n" |
|
202 |
"Most common word: %lu times, weight: %f (%s)\n", |
|
203 |
(long) info->state->records, total, uniq, maxlen, buf_maxlen, |
|
204 |
inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws); |
|
205 |
}
|
|
206 |
if (lstats) |
|
207 |
{
|
|
208 |
count=0; |
|
209 |
for (inx=0; inx<256; inx++) |
|
210 |
{
|
|
211 |
count+=lengths[inx]; |
|
212 |
if (count && lengths[inx]) |
|
213 |
printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx, |
|
214 |
(ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count, |
|
215 |
100.0*count/total); |
|
216 |
}
|
|
217 |
}
|
|
218 |
||
219 |
err: |
|
220 |
if (error && error != HA_ERR_END_OF_FILE) |
|
221 |
printf("got error %d\n",my_errno); |
|
222 |
if (info) |
|
223 |
mi_close(info); |
|
224 |
return 0; |
|
225 |
}
|
|
226 |
||
227 |
||
228 |
static my_bool |
|
229 |
get_one_option(int optid, const struct my_option *opt __attribute__((unused)), |
|
230 |
char *argument __attribute__((unused))) |
|
231 |
{
|
|
232 |
switch(optid) { |
|
233 |
case 'd': |
|
234 |
dump=1; |
|
235 |
complain(count || query); |
|
236 |
break; |
|
237 |
case 's': |
|
238 |
stats=1; |
|
239 |
complain(query!=0); |
|
240 |
break; |
|
241 |
case 'c': |
|
242 |
count= 1; |
|
243 |
complain(dump || query); |
|
244 |
break; |
|
245 |
case 'l': |
|
246 |
lstats=1; |
|
247 |
complain(query!=0); |
|
248 |
break; |
|
249 |
case '?': |
|
250 |
case 'h': |
|
251 |
usage(); |
|
252 |
}
|
|
253 |
return 0; |
|
254 |
}
|
|
255 |
||
256 |
#include <help_start.h> |
|
257 |
||
258 |
static void usage() |
|
259 |
{
|
|
260 |
printf("Use: myisam_ftdump <table_name> <index_num>\n"); |
|
261 |
my_print_help(my_long_options); |
|
262 |
my_print_variables(my_long_options); |
|
263 |
NETWARE_SET_SCREEN_MODE(1); |
|
264 |
exit(1); |
|
265 |
}
|
|
266 |
||
267 |
#include <help_end.h> |
|
268 |
||
269 |
static void complain(int val) /* Kinda assert :-) */ |
|
270 |
{
|
|
271 |
if (val) |
|
272 |
{
|
|
273 |
printf("You cannot use these options together!\n"); |
|
274 |
exit(1); |
|
275 |
}
|
|
276 |
}
|
|
277 |
||
278 |
#include "mi_extrafunc.h" |