1
/* Copyright (C) 2001-2006 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
/* Written by Sergei A. Golubchik, who has a shared copyright to this code
17
added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
20
#include <my_getopt.h>
23
static void complain(int val);
24
static my_bool get_one_option(int, const struct my_option *, char *);
26
static int count=0, stats=0, dump=0, lstats=0;
27
static my_bool verbose;
28
static char *query=NULL;
29
static uint lengths[256];
31
#define MAX_LEN (HA_FT_MAXBYTELEN+10)
32
#define HOW_OFTEN_TO_WRITE 10000
34
static struct my_option my_long_options[] =
36
{"help", 'h', "Display help and exit.",
37
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
38
{"help", '?', "Synonym for -h.",
39
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
40
{"count", 'c', "Calculate per-word stats (counts and global weights).",
41
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
42
{"dump", 'd', "Dump index (incl. data offsets and word weights).",
43
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
44
{"length", 'l', "Report length distribution.",
45
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
46
{"stats", 's', "Report global stats.",
47
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
48
{"verbose", 'v', "Be verbose.",
49
(uchar**) &verbose, (uchar**) &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
50
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
54
int main(int argc,char *argv[])
57
uint keylen, keylen2=0, inx, doc_cnt=0;
59
double gws, min_gws=0, avg_gws=0;
61
char buf[MAX_LEN], buf2[MAX_LEN], buf_maxlen[MAX_LEN], buf_min_gws[MAX_LEN];
62
ulong total=0, maxlen=0, uniq=0, max_doc_cnt=0;
63
struct { MI_INFO *info; } aio0, *aio=&aio0; /* for GWS_IN_USE */
66
if ((error= handle_options(&argc, &argv, my_long_options, get_one_option)))
70
if (!count && !dump && !lstats && !query)
81
inx= (uint) strtoll(argv[1], &end, 10);
86
init_key_cache(dflt_key_cache,MI_KEY_BLOCK_LENGTH,USE_BUFFER_INIT, 0, 0);
88
if (!(info=mi_open(argv[0], O_RDONLY,
89
HA_OPEN_ABORT_IF_LOCKED|HA_OPEN_FROM_SQL_LAYER)))
98
if ((inx >= info->s->base.keys) ||
99
!(info->s->keyinfo[inx].flag & HA_FULLTEXT))
101
printf("Key %d in table %s is not a FULLTEXT key\n", inx, info->filename);
105
mi_lock_database(info, F_EXTRA_LCK);
107
info->lastpos= HA_OFFSET_ERROR;
108
info->update|= HA_STATE_PREV_FOUND;
110
while (!(error=mi_rnext(info,NULL,inx)))
112
keylen=*(info->lastkey);
114
subkeys=ft_sintXkorr(info->lastkey+keylen+1);
116
weight=*(float*)&subkeys;
119
snprintf(buf,MAX_LEN,"%.*s",(int) keylen,info->lastkey+1);
121
sprintf(buf,"%.*s",(int) keylen,info->lastkey+1);
123
my_casedn_str(default_charset_info,buf);
129
if (strcmp(buf, buf2))
134
avg_gws+=gws=GWS_IN_USE;
136
printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
140
strmov(buf_maxlen, buf2);
142
if (max_doc_cnt < doc_cnt)
145
strmov(buf_min_gws, buf2);
153
doc_cnt+= (subkeys >= 0 ? 1 : -subkeys);
158
printf("%9lx %20.7f %s\n", (long) info->lastpos,weight,buf);
160
printf("%9lx => %17d %s\n",(long) info->lastpos,-subkeys,buf);
162
if (verbose && (total%HOW_OFTEN_TO_WRITE)==0)
163
printf("%10ld\r",total);
165
mi_lock_database(info, F_UNLCK);
172
avg_gws+=gws=GWS_IN_USE;
174
printf("%9u %20.7f %s\n",doc_cnt,gws,buf2);
178
strmov(buf_maxlen, buf2);
180
if (max_doc_cnt < doc_cnt)
183
strmov(buf_min_gws, buf2);
192
for (inx=0;inx<256;inx++)
195
if ((ulong) count >= total/2)
198
printf("Total rows: %lu\nTotal words: %lu\n"
199
"Unique words: %lu\nLongest word: %lu chars (%s)\n"
200
"Median length: %u\n"
201
"Average global weight: %f\n"
202
"Most common word: %lu times, weight: %f (%s)\n",
203
(long) info->state->records, total, uniq, maxlen, buf_maxlen,
204
inx, avg_gws/uniq, max_doc_cnt, min_gws, buf_min_gws);
209
for (inx=0; inx<256; inx++)
212
if (count && lengths[inx])
213
printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx,
214
(ulong) lengths[inx],100.0*lengths[inx]/total,(ulong) count,
220
if (error && error != HA_ERR_END_OF_FILE)
221
printf("got error %d\n",my_errno);
229
get_one_option(int optid, const struct my_option *opt __attribute__((unused)),
230
char *argument __attribute__((unused)))
235
complain(count || query);
243
complain(dump || query);
256
#include <help_start.h>
260
printf("Use: myisam_ftdump <table_name> <index_num>\n");
261
my_print_help(my_long_options);
262
my_print_variables(my_long_options);
263
NETWARE_SET_SCREEN_MODE(1);
267
#include <help_end.h>
269
static void complain(int val) /* Kinda assert :-) */
273
printf("You cannot use these options together!\n");
278
#include "mi_extrafunc.h"