1
by brian
clean slate |
1 |
/* Copyright (C) 2000-2006 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
14 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
15 |
||
16 |
/* Pack MyISAM file */
|
|
17 |
||
18 |
#ifndef USE_MY_FUNC
|
|
19 |
#define USE_MY_FUNC /* We need at least my_malloc */ |
|
20 |
#endif
|
|
21 |
||
22 |
#include "myisamdef.h" |
|
23 |
#include <queues.h> |
|
24 |
#include <my_tree.h> |
|
25 |
#include "mysys_err.h" |
|
26 |
#ifndef __GNU_LIBRARY__
|
|
27 |
#define __GNU_LIBRARY__ /* Skip warnings in getopt.h */ |
|
28 |
#endif
|
|
29 |
#include <my_getopt.h> |
|
30 |
#include <assert.h> |
|
31 |
||
32 |
#if SIZEOF_LONG_LONG > 4
|
|
33 |
#define BITS_SAVED 64
|
|
34 |
#else
|
|
35 |
#define BITS_SAVED 32
|
|
36 |
#endif
|
|
37 |
||
38 |
#define IS_OFFSET ((uint) 32768) /* Bit if offset or char in tree */ |
|
39 |
#define HEAD_LENGTH 32
|
|
40 |
#define ALLOWED_JOIN_DIFF 256 /* Diff allowed to join trees */ |
|
41 |
||
42 |
#define DATA_TMP_EXT ".TMD"
|
|
43 |
#define OLD_EXT ".OLD"
|
|
44 |
#define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
|
|
45 |
||
46 |
struct st_file_buffer { |
|
47 |
File file; |
|
48 |
uchar *buffer,*pos,*end; |
|
49 |
my_off_t pos_in_file; |
|
50 |
int bits; |
|
151
by Brian Aker
Ulonglong to uint64_t |
51 |
uint64_t bitbucket; |
1
by brian
clean slate |
52 |
};
|
53 |
||
54 |
struct st_huff_tree; |
|
55 |
struct st_huff_element; |
|
56 |
||
57 |
typedef struct st_huff_counts { |
|
58 |
uint field_length,max_zero_fill; |
|
59 |
uint pack_type; |
|
60 |
uint max_end_space,max_pre_space,length_bits,min_space; |
|
61 |
ulong max_length; |
|
62 |
enum en_fieldtype field_type; |
|
63 |
struct st_huff_tree *tree; /* Tree for field */ |
|
64 |
my_off_t counts[256]; |
|
65 |
my_off_t end_space[8]; |
|
66 |
my_off_t pre_space[8]; |
|
67 |
my_off_t tot_end_space,tot_pre_space,zero_fields,empty_fields,bytes_packed; |
|
68 |
TREE int_tree; /* Tree for detecting distinct column values. */ |
|
69 |
uchar *tree_buff; /* Column values, 'field_length' each. */ |
|
70 |
uchar *tree_pos; /* Points to end of column values in 'tree_buff'. */ |
|
71 |
} HUFF_COUNTS; |
|
72 |
||
73 |
typedef struct st_huff_element HUFF_ELEMENT; |
|
74 |
||
75 |
/*
|
|
76 |
WARNING: It is crucial for the optimizations in calc_packed_length()
|
|
77 |
that 'count' is the first element of 'HUFF_ELEMENT'.
|
|
78 |
*/
|
|
79 |
struct st_huff_element { |
|
80 |
my_off_t count; |
|
81 |
union un_element { |
|
82 |
struct st_nod { |
|
83 |
HUFF_ELEMENT *left,*right; |
|
84 |
} nod; |
|
85 |
struct st_leaf { |
|
86 |
HUFF_ELEMENT *null; |
|
87 |
uint element_nr; /* Number of element */ |
|
88 |
} leaf; |
|
89 |
} a; |
|
90 |
};
|
|
91 |
||
92 |
||
93 |
typedef struct st_huff_tree { |
|
94 |
HUFF_ELEMENT *root,*element_buffer; |
|
95 |
HUFF_COUNTS *counts; |
|
96 |
uint tree_number; |
|
97 |
uint elements; |
|
98 |
my_off_t bytes_packed; |
|
99 |
uint tree_pack_length; |
|
100 |
uint min_chr,max_chr,char_bits,offset_bits,max_offset,height; |
|
151
by Brian Aker
Ulonglong to uint64_t |
101 |
uint64_t *code; |
1
by brian
clean slate |
102 |
uchar *code_len; |
103 |
} HUFF_TREE; |
|
104 |
||
105 |
||
106 |
typedef struct st_isam_mrg { |
|
107 |
MI_INFO **file,**current,**end; |
|
108 |
uint free_file; |
|
109 |
uint count; |
|
110 |
uint min_pack_length; /* Theese is used by packed data */ |
|
111 |
uint max_pack_length; |
|
112 |
uint ref_length; |
|
113 |
uint max_blob_length; |
|
114 |
my_off_t records; |
|
115 |
/* true if at least one source file has at least one disabled index */
|
|
116 |
my_bool src_file_has_indexes_disabled; |
|
117 |
} PACK_MRG_INFO; |
|
118 |
||
119 |
||
120 |
extern int main(int argc,char * *argv); |
|
121 |
static void get_options(int *argc,char ***argv); |
|
122 |
static MI_INFO *open_isam_file(char *name,int mode); |
|
123 |
static my_bool open_isam_files(PACK_MRG_INFO *mrg,char **names,uint count); |
|
124 |
static int compress(PACK_MRG_INFO *file,char *join_name); |
|
125 |
static HUFF_COUNTS *init_huff_count(MI_INFO *info,my_off_t records); |
|
126 |
static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, |
|
127 |
uint trees, |
|
128 |
HUFF_COUNTS *huff_counts, |
|
129 |
uint fields); |
|
130 |
static int compare_tree(void* cmp_arg __attribute__((unused)), |
|
131 |
const uchar *s,const uchar *t); |
|
132 |
static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts); |
|
133 |
static void check_counts(HUFF_COUNTS *huff_counts,uint trees, |
|
134 |
my_off_t records); |
|
135 |
static int test_space_compress(HUFF_COUNTS *huff_counts,my_off_t records, |
|
136 |
uint max_space_length,my_off_t *space_counts, |
|
137 |
my_off_t tot_space_count, |
|
138 |
enum en_fieldtype field_type); |
|
139 |
static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts,uint trees); |
|
140 |
static int make_huff_tree(HUFF_TREE *tree,HUFF_COUNTS *huff_counts); |
|
141 |
static int compare_huff_elements(void *not_used, uchar *a,uchar *b); |
|
142 |
static int save_counts_in_queue(uchar *key,element_count count, |
|
143 |
HUFF_TREE *tree); |
|
144 |
static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts,uint flag); |
|
145 |
static uint join_same_trees(HUFF_COUNTS *huff_counts,uint trees); |
|
146 |
static int make_huff_decode_table(HUFF_TREE *huff_tree,uint trees); |
|
147 |
static void make_traverse_code_tree(HUFF_TREE *huff_tree, |
|
148 |
HUFF_ELEMENT *element,uint size, |
|
151
by Brian Aker
Ulonglong to uint64_t |
149 |
uint64_t code); |
1
by brian
clean slate |
150 |
static int write_header(PACK_MRG_INFO *isam_file, uint header_length,uint trees, |
151 |
my_off_t tot_elements,my_off_t filelength); |
|
152 |
static void write_field_info(HUFF_COUNTS *counts, uint fields,uint trees); |
|
153 |
static my_off_t write_huff_tree(HUFF_TREE *huff_tree,uint trees); |
|
154 |
static uint *make_offset_code_tree(HUFF_TREE *huff_tree, |
|
155 |
HUFF_ELEMENT *element, |
|
156 |
uint *offset); |
|
157 |
static uint max_bit(uint value); |
|
158 |
static int compress_isam_file(PACK_MRG_INFO *file,HUFF_COUNTS *huff_counts); |
|
159 |
static char *make_new_name(char *new_name,char *old_name); |
|
160 |
static char *make_old_name(char *new_name,char *old_name); |
|
154
by Brian Aker
Removed oddball types in my_global.h |
161 |
static void init_file_buffer(File file,bool read_buffer); |
1
by brian
clean slate |
162 |
static int flush_buffer(ulong neaded_length); |
163 |
static void end_file_buffer(void); |
|
151
by Brian Aker
Ulonglong to uint64_t |
164 |
static void write_bits(uint64_t value, uint bits); |
1
by brian
clean slate |
165 |
static void flush_bits(void); |
166 |
static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, |
|
167 |
ha_checksum crc); |
|
168 |
static int save_state_mrg(File file,PACK_MRG_INFO *isam_file,my_off_t new_length, |
|
169 |
ha_checksum crc); |
|
170 |
static int mrg_close(PACK_MRG_INFO *mrg); |
|
171 |
static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf); |
|
172 |
static void mrg_reset(PACK_MRG_INFO *mrg); |
|
173 |
||
174 |
static int error_on_write=0,test_only=0,verbose=0,silent=0, |
|
175 |
write_loop=0,force_pack=0, isamchk_neaded=0; |
|
176 |
static int tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL; |
|
177 |
static my_bool backup, opt_wait; |
|
178 |
/*
|
|
179 |
tree_buff_length is somewhat arbitrary. The bigger it is the better
|
|
180 |
the chance to win in terms of compression factor. On the other hand,
|
|
181 |
this table becomes part of the compressed file header. And its length
|
|
182 |
is coded with 16 bits in the header. Hence the limit is 2**16 - 1.
|
|
183 |
*/
|
|
184 |
static uint tree_buff_length= 65536 - MALLOC_OVERHEAD; |
|
185 |
static char tmp_dir[FN_REFLEN]={0},*join_table; |
|
186 |
static my_off_t intervall_length; |
|
187 |
static ha_checksum glob_crc; |
|
188 |
static struct st_file_buffer file_buffer; |
|
189 |
static QUEUE queue; |
|
190 |
static HUFF_COUNTS *global_count; |
|
191 |
static char zero_string[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; |
|
192 |
static const char *load_default_groups[]= { "myisampack",0 }; |
|
193 |
||
194 |
/* The main program */
|
|
195 |
||
196 |
int main(int argc, char **argv) |
|
197 |
{
|
|
198 |
int error,ok; |
|
199 |
PACK_MRG_INFO merge; |
|
200 |
char **default_argv; |
|
201 |
MY_INIT(argv[0]); |
|
202 |
||
203 |
load_defaults("my",load_default_groups,&argc,&argv); |
|
204 |
default_argv= argv; |
|
205 |
get_options(&argc,&argv); |
|
206 |
||
207 |
error=ok=isamchk_neaded=0; |
|
208 |
if (join_table) |
|
209 |
{ /* Join files into one */ |
|
210 |
if (open_isam_files(&merge,argv,(uint) argc) || |
|
211 |
compress(&merge,join_table)) |
|
212 |
error=1; |
|
213 |
}
|
|
214 |
else while (argc--) |
|
215 |
{
|
|
216 |
MI_INFO *isam_file; |
|
217 |
if (!(isam_file=open_isam_file(*argv++,O_RDWR))) |
|
218 |
error=1; |
|
219 |
else
|
|
220 |
{
|
|
221 |
merge.file= &isam_file; |
|
222 |
merge.current=0; |
|
223 |
merge.free_file=0; |
|
224 |
merge.count=1; |
|
225 |
if (compress(&merge,0)) |
|
226 |
error=1; |
|
227 |
else
|
|
228 |
ok=1; |
|
229 |
}
|
|
230 |
}
|
|
231 |
if (ok && isamchk_neaded && !silent) |
|
232 |
puts("Remember to run myisamchk -rq on compressed tables"); |
|
233 |
VOID(fflush(stdout)); |
|
234 |
VOID(fflush(stderr)); |
|
235 |
free_defaults(default_argv); |
|
236 |
my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR); |
|
237 |
exit(error ? 2 : 0); |
|
238 |
#ifndef _lint
|
|
239 |
return 0; /* No compiler warning */ |
|
240 |
#endif
|
|
241 |
}
|
|
242 |
||
243 |
enum options_mp {OPT_CHARSETS_DIR_MP=256, OPT_AUTO_CLOSE}; |
|
244 |
||
245 |
static struct my_option my_long_options[] = |
|
246 |
{
|
|
247 |
{"backup", 'b', "Make a backup of the table as table_name.OLD.", |
|
77.1.77
by Monty Taylor
A crapton more warning cleanups (I turned on more warnings) |
248 |
(char**) &backup, (char**) &backup, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, |
1
by brian
clean slate |
249 |
{"character-sets-dir", OPT_CHARSETS_DIR_MP, |
77.1.77
by Monty Taylor
A crapton more warning cleanups (I turned on more warnings) |
250 |
"Directory where character sets are.", (char**) &charsets_dir, |
251 |
(char**) &charsets_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, |
|
1
by brian
clean slate |
252 |
{"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.", |
253 |
0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, |
|
254 |
{"force", 'f', |
|
255 |
"Force packing of table even if it gets bigger or if tempfile exists.", |
|
256 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
257 |
{"join", 'j', |
|
258 |
"Join all given tables into 'new_table_name'. All tables MUST have identical layouts.", |
|
77.1.77
by Monty Taylor
A crapton more warning cleanups (I turned on more warnings) |
259 |
(char**) &join_table, (char**) &join_table, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, |
1
by brian
clean slate |
260 |
0, 0, 0}, |
261 |
{"help", '?', "Display this help and exit.", |
|
262 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
263 |
{"silent", 's', "Be more silent.", |
|
264 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
265 |
{"tmpdir", 'T', "Use temporary directory to store temporary table.", |
|
266 |
0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, |
|
267 |
{"test", 't', "Don't pack table, only test packing it.", |
|
268 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
269 |
{"verbose", 'v', "Write info about progress and packing result. Use many -v for more verbosity!", |
|
270 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
271 |
{"version", 'V', "Output version information and exit.", |
|
272 |
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
77.1.77
by Monty Taylor
A crapton more warning cleanups (I turned on more warnings) |
273 |
{"wait", 'w', "Wait and retry if table is in use.", (char**) &opt_wait, |
274 |
(char**) &opt_wait, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, |
|
1
by brian
clean slate |
275 |
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} |
276 |
};
|
|
277 |
||
278 |
static void print_version(void) |
|
279 |
{
|
|
280 |
VOID(printf("%s Ver 1.23 for %s on %s\n", |
|
281 |
my_progname, SYSTEM_TYPE, MACHINE_TYPE)); |
|
282 |
}
|
|
283 |
||
284 |
||
285 |
static void usage(void) |
|
286 |
{
|
|
287 |
print_version(); |
|
288 |
puts("Copyright (C) 2002 MySQL AB"); |
|
289 |
puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,"); |
|
290 |
puts("and you are welcome to modify and redistribute it under the GPL license\n"); |
|
291 |
||
292 |
puts("Pack a MyISAM-table to take much less space."); |
|
293 |
puts("Keys are not updated, you must run myisamchk -rq on the datafile"); |
|
294 |
puts("afterwards to update the keys."); |
|
295 |
puts("You should give the .MYI file as the filename argument."); |
|
296 |
||
297 |
VOID(printf("\nUsage: %s [OPTIONS] filename...\n", my_progname)); |
|
298 |
my_print_help(my_long_options); |
|
299 |
print_defaults("my", load_default_groups); |
|
300 |
my_print_variables(my_long_options); |
|
301 |
}
|
|
302 |
||
77.1.86
by Monty Taylor
A few remaining my_bool -> bool cleanups. |
303 |
static bool |
1
by brian
clean slate |
304 |
get_one_option(int optid, const struct my_option *opt __attribute__((unused)), |
305 |
char *argument) |
|
306 |
{
|
|
307 |
uint length; |
|
308 |
||
309 |
switch(optid) { |
|
310 |
#ifdef __NETWARE__
|
|
311 |
case OPT_AUTO_CLOSE: |
|
312 |
setscreenmode(SCR_AUTOCLOSE_ON_EXIT); |
|
313 |
break; |
|
314 |
#endif
|
|
315 |
case 'f': |
|
316 |
force_pack= 1; |
|
317 |
tmpfile_createflag= O_RDWR | O_TRUNC; |
|
318 |
break; |
|
319 |
case 's': |
|
320 |
write_loop= verbose= 0; |
|
321 |
silent= 1; |
|
322 |
break; |
|
323 |
case 't': |
|
324 |
test_only= 1; |
|
325 |
/* Avoid to reset 'verbose' if it was already set > 1. */
|
|
326 |
if (! verbose) |
|
327 |
verbose= 1; |
|
328 |
break; |
|
329 |
case 'T': |
|
330 |
length= (uint) (strmov(tmp_dir, argument) - tmp_dir); |
|
331 |
if (length != dirname_length(tmp_dir)) |
|
332 |
{
|
|
333 |
tmp_dir[length]=FN_LIBCHAR; |
|
334 |
tmp_dir[length+1]=0; |
|
335 |
}
|
|
336 |
break; |
|
337 |
case 'v': |
|
338 |
verbose++; /* Allow for selecting the level of verbosity. */ |
|
339 |
silent= 0; |
|
340 |
break; |
|
341 |
case 'V': |
|
342 |
print_version(); |
|
343 |
exit(0); |
|
344 |
case 'I': |
|
345 |
case '?': |
|
346 |
usage(); |
|
347 |
exit(0); |
|
348 |
}
|
|
349 |
return 0; |
|
350 |
}
|
|
351 |
||
352 |
/* reads options */
|
|
353 |
/* Initiates DEBUG - but no debugging here ! */
|
|
354 |
||
355 |
static void get_options(int *argc,char ***argv) |
|
356 |
{
|
|
357 |
int ho_error; |
|
358 |
||
359 |
my_progname= argv[0][0]; |
|
360 |
if (isatty(fileno(stdout))) |
|
361 |
write_loop=1; |
|
362 |
||
363 |
if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) |
|
364 |
exit(ho_error); |
|
365 |
||
366 |
if (!*argc) |
|
367 |
{
|
|
368 |
usage(); |
|
369 |
exit(1); |
|
370 |
}
|
|
371 |
if (join_table) |
|
372 |
{
|
|
373 |
backup=0; /* Not needed */ |
|
374 |
tmp_dir[0]=0; |
|
375 |
}
|
|
376 |
return; |
|
377 |
}
|
|
378 |
||
379 |
||
380 |
static MI_INFO *open_isam_file(char *name,int mode) |
|
381 |
{
|
|
382 |
MI_INFO *isam_file; |
|
383 |
MYISAM_SHARE *share; |
|
384 |
||
385 |
if (!(isam_file=mi_open(name,mode, |
|
386 |
(opt_wait ? HA_OPEN_WAIT_IF_LOCKED : |
|
387 |
HA_OPEN_ABORT_IF_LOCKED)))) |
|
388 |
{
|
|
389 |
VOID(fprintf(stderr, "%s gave error %d on open\n", name, my_errno)); |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
390 |
return(0); |
1
by brian
clean slate |
391 |
}
|
392 |
share=isam_file->s; |
|
393 |
if (share->options & HA_OPTION_COMPRESS_RECORD && !join_table) |
|
394 |
{
|
|
395 |
if (!force_pack) |
|
396 |
{
|
|
397 |
VOID(fprintf(stderr, "%s is already compressed\n", name)); |
|
398 |
VOID(mi_close(isam_file)); |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
399 |
return(0); |
1
by brian
clean slate |
400 |
}
|
401 |
if (verbose) |
|
402 |
puts("Recompressing already compressed table"); |
|
403 |
share->options&= ~HA_OPTION_READ_ONLY_DATA; /* We are modifing it */ |
|
404 |
}
|
|
405 |
if (! force_pack && share->state.state.records != 0 && |
|
406 |
(share->state.state.records <= 1 || |
|
407 |
share->state.state.data_file_length < 1024)) |
|
408 |
{
|
|
409 |
VOID(fprintf(stderr, "%s is too small to compress\n", name)); |
|
410 |
VOID(mi_close(isam_file)); |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
411 |
return(0); |
1
by brian
clean slate |
412 |
}
|
413 |
VOID(mi_lock_database(isam_file,F_WRLCK)); |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
414 |
return(isam_file); |
1
by brian
clean slate |
415 |
}
|
416 |
||
417 |
||
418 |
static my_bool open_isam_files(PACK_MRG_INFO *mrg, char **names, uint count) |
|
419 |
{
|
|
420 |
uint i,j; |
|
421 |
mrg->count=0; |
|
422 |
mrg->current=0; |
|
423 |
mrg->file=(MI_INFO**) my_malloc(sizeof(MI_INFO*)*count,MYF(MY_FAE)); |
|
424 |
mrg->free_file=1; |
|
425 |
mrg->src_file_has_indexes_disabled= 0; |
|
426 |
for (i=0; i < count ; i++) |
|
427 |
{
|
|
428 |
if (!(mrg->file[i]=open_isam_file(names[i],O_RDONLY))) |
|
429 |
goto error; |
|
430 |
||
431 |
mrg->src_file_has_indexes_disabled|= |
|
432 |
! mi_is_all_keys_active(mrg->file[i]->s->state.key_map, |
|
433 |
mrg->file[i]->s->base.keys); |
|
434 |
}
|
|
435 |
/* Check that files are identical */
|
|
436 |
for (j=0 ; j < count-1 ; j++) |
|
437 |
{
|
|
438 |
MI_COLUMNDEF *m1,*m2,*end; |
|
439 |
if (mrg->file[j]->s->base.reclength != mrg->file[j+1]->s->base.reclength || |
|
440 |
mrg->file[j]->s->base.fields != mrg->file[j+1]->s->base.fields) |
|
441 |
goto diff_file; |
|
442 |
m1=mrg->file[j]->s->rec; |
|
443 |
end=m1+mrg->file[j]->s->base.fields; |
|
444 |
m2=mrg->file[j+1]->s->rec; |
|
445 |
for ( ; m1 != end ; m1++,m2++) |
|
446 |
{
|
|
447 |
if (m1->type != m2->type || m1->length != m2->length) |
|
448 |
goto diff_file; |
|
449 |
}
|
|
450 |
}
|
|
451 |
mrg->count=count; |
|
452 |
return 0; |
|
453 |
||
454 |
diff_file: |
|
455 |
VOID(fprintf(stderr, "%s: Tables '%s' and '%s' are not identical\n", |
|
456 |
my_progname, names[j], names[j+1])); |
|
457 |
error: |
|
458 |
while (i--) |
|
459 |
mi_close(mrg->file[i]); |
|
460 |
my_free((uchar*) mrg->file,MYF(0)); |
|
461 |
return 1; |
|
462 |
}
|
|
463 |
||
464 |
||
465 |
static int compress(PACK_MRG_INFO *mrg,char *result_table) |
|
466 |
{
|
|
467 |
int error; |
|
468 |
File new_file,join_isam_file; |
|
469 |
MI_INFO *isam_file; |
|
470 |
MYISAM_SHARE *share; |
|
471 |
char org_name[FN_REFLEN],new_name[FN_REFLEN],temp_name[FN_REFLEN]; |
|
472 |
uint i,header_length,fields,trees,used_trees; |
|
473 |
my_off_t old_length,new_length,tot_elements; |
|
474 |
HUFF_COUNTS *huff_counts; |
|
475 |
HUFF_TREE *huff_trees; |
|
476 |
||
477 |
isam_file=mrg->file[0]; /* Take this as an example */ |
|
478 |
share=isam_file->s; |
|
479 |
new_file=join_isam_file= -1; |
|
480 |
trees=fields=0; |
|
481 |
huff_trees=0; |
|
482 |
huff_counts=0; |
|
483 |
||
484 |
/* Create temporary or join file */
|
|
485 |
||
486 |
if (backup) |
|
487 |
VOID(fn_format(org_name,isam_file->filename,"",MI_NAME_DEXT,2)); |
|
488 |
else
|
|
489 |
VOID(fn_format(org_name,isam_file->filename,"",MI_NAME_DEXT,2+4+16)); |
|
490 |
if (!test_only && result_table) |
|
491 |
{
|
|
492 |
/* Make a new indexfile based on first file in list */
|
|
493 |
uint length; |
|
494 |
uchar *buff; |
|
495 |
strmov(org_name,result_table); /* Fix error messages */ |
|
496 |
VOID(fn_format(new_name,result_table,"",MI_NAME_IEXT,2)); |
|
497 |
if ((join_isam_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) |
|
498 |
< 0) |
|
499 |
goto err; |
|
500 |
length=(uint) share->base.keystart; |
|
501 |
if (!(buff= (uchar*) my_malloc(length,MYF(MY_WME)))) |
|
502 |
goto err; |
|
503 |
if (my_pread(share->kfile,buff,length,0L,MYF(MY_WME | MY_NABP)) || |
|
504 |
my_write(join_isam_file,buff,length, |
|
505 |
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL))) |
|
506 |
{
|
|
507 |
my_free(buff,MYF(0)); |
|
508 |
goto err; |
|
509 |
}
|
|
510 |
my_free(buff,MYF(0)); |
|
511 |
VOID(fn_format(new_name,result_table,"",MI_NAME_DEXT,2)); |
|
512 |
}
|
|
513 |
else if (!tmp_dir[0]) |
|
514 |
VOID(make_new_name(new_name,org_name)); |
|
515 |
else
|
|
516 |
VOID(fn_format(new_name,org_name,tmp_dir,DATA_TMP_EXT,1+2+4)); |
|
517 |
if (!test_only && |
|
518 |
(new_file=my_create(new_name,0,tmpfile_createflag,MYF(MY_WME))) < 0) |
|
519 |
goto err; |
|
520 |
||
521 |
/* Start calculating statistics */
|
|
522 |
||
523 |
mrg->records=0; |
|
524 |
for (i=0 ; i < mrg->count ; i++) |
|
525 |
mrg->records+=mrg->file[i]->s->state.state.records; |
|
526 |
||
527 |
if (write_loop || verbose) |
|
528 |
{
|
|
529 |
VOID(printf("Compressing %s: (%lu records)\n", |
|
530 |
result_table ? new_name : org_name, (ulong) mrg->records)); |
|
531 |
}
|
|
532 |
trees=fields=share->base.fields; |
|
533 |
huff_counts=init_huff_count(isam_file,mrg->records); |
|
534 |
QUICK_SAFEMALLOC; |
|
535 |
||
536 |
/*
|
|
537 |
Read the whole data file(s) for statistics.
|
|
538 |
*/
|
|
539 |
if (write_loop || verbose) |
|
540 |
VOID(printf("- Calculating statistics\n")); |
|
541 |
if (get_statistic(mrg,huff_counts)) |
|
542 |
goto err; |
|
543 |
NORMAL_SAFEMALLOC; |
|
544 |
old_length=0; |
|
545 |
for (i=0; i < mrg->count ; i++) |
|
546 |
old_length+= (mrg->file[i]->s->state.state.data_file_length - |
|
547 |
mrg->file[i]->s->state.state.empty); |
|
548 |
||
549 |
/*
|
|
550 |
Create a global priority queue in preparation for making
|
|
551 |
temporary Huffman trees.
|
|
552 |
*/
|
|
553 |
if (init_queue(&queue,256,0,0,compare_huff_elements,0)) |
|
554 |
goto err; |
|
555 |
||
556 |
/*
|
|
557 |
Check each column if we should use pre-space-compress, end-space-
|
|
558 |
compress, empty-field-compress or zero-field-compress.
|
|
559 |
*/
|
|
560 |
check_counts(huff_counts,fields,mrg->records); |
|
561 |
||
562 |
/*
|
|
563 |
Build a Huffman tree for each column.
|
|
564 |
*/
|
|
565 |
huff_trees=make_huff_trees(huff_counts,trees); |
|
566 |
||
567 |
/*
|
|
568 |
If the packed lengths of combined columns is less then the sum of
|
|
569 |
the non-combined columns, then create common Huffman trees for them.
|
|
570 |
We do this only for byte compressed columns, not for distinct values
|
|
571 |
compressed columns.
|
|
572 |
*/
|
|
573 |
if ((int) (used_trees=join_same_trees(huff_counts,trees)) < 0) |
|
574 |
goto err; |
|
575 |
||
576 |
/*
|
|
577 |
Assign codes to all byte or column values.
|
|
578 |
*/
|
|
579 |
if (make_huff_decode_table(huff_trees,fields)) |
|
580 |
goto err; |
|
581 |
||
582 |
/* Prepare a file buffer. */
|
|
583 |
init_file_buffer(new_file,0); |
|
584 |
||
585 |
/*
|
|
586 |
Reserve space in the target file for the fixed compressed file header.
|
|
587 |
*/
|
|
588 |
file_buffer.pos_in_file=HEAD_LENGTH; |
|
589 |
if (! test_only) |
|
590 |
VOID(my_seek(new_file,file_buffer.pos_in_file,MY_SEEK_SET,MYF(0))); |
|
591 |
||
592 |
/*
|
|
593 |
Write field infos: field type, pack type, length bits, tree number.
|
|
594 |
*/
|
|
595 |
write_field_info(huff_counts,fields,used_trees); |
|
596 |
||
597 |
/*
|
|
598 |
Write decode trees.
|
|
599 |
*/
|
|
600 |
if (!(tot_elements=write_huff_tree(huff_trees,trees))) |
|
601 |
goto err; |
|
602 |
||
603 |
/*
|
|
604 |
Calculate the total length of the compression info header.
|
|
605 |
This includes the fixed compressed file header, the column compression
|
|
606 |
type descriptions, and the decode trees.
|
|
607 |
*/
|
|
608 |
header_length=(uint) file_buffer.pos_in_file+ |
|
609 |
(uint) (file_buffer.pos-file_buffer.buffer); |
|
610 |
||
611 |
/*
|
|
612 |
Compress the source file into the target file.
|
|
613 |
*/
|
|
614 |
if (write_loop || verbose) |
|
615 |
VOID(printf("- Compressing file\n")); |
|
616 |
error=compress_isam_file(mrg,huff_counts); |
|
617 |
new_length=file_buffer.pos_in_file; |
|
618 |
if (!error && !test_only) |
|
619 |
{
|
|
620 |
uchar buff[MEMMAP_EXTRA_MARGIN]; /* End marginal for memmap */ |
|
212.6.1
by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file. |
621 |
memset(buff, 0, sizeof(buff)); |
1
by brian
clean slate |
622 |
error=my_write(file_buffer.file,buff,sizeof(buff), |
623 |
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0; |
|
624 |
}
|
|
625 |
||
626 |
/*
|
|
627 |
Write the fixed compressed file header.
|
|
628 |
*/
|
|
629 |
if (!error) |
|
630 |
error=write_header(mrg,header_length,used_trees,tot_elements, |
|
631 |
new_length); |
|
632 |
||
633 |
/* Flush the file buffer. */
|
|
634 |
end_file_buffer(); |
|
635 |
||
636 |
/* Display statistics. */
|
|
637 |
if (verbose && mrg->records) |
|
638 |
VOID(printf("Min record length: %6d Max length: %6d " |
|
639 |
"Mean total length: %6ld\n", mrg->min_pack_length, |
|
640 |
mrg->max_pack_length, (ulong) (new_length/mrg->records))); |
|
641 |
||
642 |
/* Close source and target file. */
|
|
643 |
if (!test_only) |
|
644 |
{
|
|
645 |
error|=my_close(new_file,MYF(MY_WME)); |
|
646 |
if (!result_table) |
|
647 |
{
|
|
648 |
error|=my_close(isam_file->dfile,MYF(MY_WME)); |
|
649 |
isam_file->dfile= -1; /* Tell mi_close file is closed */ |
|
650 |
}
|
|
651 |
}
|
|
652 |
||
653 |
/* Cleanup. */
|
|
654 |
free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields); |
|
655 |
if (! test_only && ! error) |
|
656 |
{
|
|
657 |
if (result_table) |
|
658 |
{
|
|
659 |
error=save_state_mrg(join_isam_file,mrg,new_length,glob_crc); |
|
660 |
}
|
|
661 |
else
|
|
662 |
{
|
|
663 |
if (backup) |
|
664 |
{
|
|
665 |
if (my_rename(org_name,make_old_name(temp_name,isam_file->filename), |
|
666 |
MYF(MY_WME))) |
|
667 |
error=1; |
|
668 |
else
|
|
669 |
{
|
|
670 |
if (tmp_dir[0]) |
|
671 |
error=my_copy(new_name,org_name,MYF(MY_WME)); |
|
672 |
else
|
|
673 |
error=my_rename(new_name,org_name,MYF(MY_WME)); |
|
674 |
if (!error) |
|
675 |
{
|
|
676 |
VOID(my_copystat(temp_name,org_name,MYF(MY_COPYTIME))); |
|
677 |
if (tmp_dir[0]) |
|
678 |
VOID(my_delete(new_name,MYF(MY_WME))); |
|
679 |
}
|
|
680 |
}
|
|
681 |
}
|
|
682 |
else
|
|
683 |
{
|
|
684 |
if (tmp_dir[0]) |
|
685 |
{
|
|
686 |
error=my_copy(new_name,org_name, |
|
687 |
MYF(MY_WME | MY_HOLD_ORIGINAL_MODES | MY_COPYTIME)); |
|
688 |
if (!error) |
|
689 |
VOID(my_delete(new_name,MYF(MY_WME))); |
|
690 |
}
|
|
691 |
else
|
|
692 |
error=my_redel(org_name,new_name,MYF(MY_WME | MY_COPYTIME)); |
|
693 |
}
|
|
694 |
if (! error) |
|
695 |
error=save_state(isam_file,mrg,new_length,glob_crc); |
|
696 |
}
|
|
697 |
}
|
|
698 |
error|=mrg_close(mrg); |
|
699 |
if (join_isam_file >= 0) |
|
700 |
error|=my_close(join_isam_file,MYF(MY_WME)); |
|
701 |
if (error) |
|
702 |
{
|
|
703 |
VOID(fprintf(stderr, "Aborting: %s is not compressed\n", org_name)); |
|
704 |
VOID(my_delete(new_name,MYF(MY_WME))); |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
705 |
return(-1); |
1
by brian
clean slate |
706 |
}
|
707 |
if (write_loop || verbose) |
|
708 |
{
|
|
709 |
if (old_length) |
|
710 |
VOID(printf("%.4g%% \n", |
|
152
by Brian Aker
longlong replacement |
711 |
(((int64_t) (old_length - new_length)) * 100.0 / |
712 |
(int64_t) old_length))); |
|
1
by brian
clean slate |
713 |
else
|
714 |
puts("Empty file saved in compressed format"); |
|
715 |
}
|
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
716 |
return(0); |
1
by brian
clean slate |
717 |
|
718 |
err: |
|
719 |
free_counts_and_tree_and_queue(huff_trees,trees,huff_counts,fields); |
|
720 |
if (new_file >= 0) |
|
721 |
VOID(my_close(new_file,MYF(0))); |
|
722 |
if (join_isam_file >= 0) |
|
723 |
VOID(my_close(join_isam_file,MYF(0))); |
|
724 |
mrg_close(mrg); |
|
725 |
VOID(fprintf(stderr, "Aborted: %s is not compressed\n", org_name)); |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
726 |
return(-1); |
1
by brian
clean slate |
727 |
}
|
728 |
||
729 |
/* Init a huff_count-struct for each field and init it */
|
|
730 |
||
731 |
static HUFF_COUNTS *init_huff_count(MI_INFO *info,my_off_t records) |
|
732 |
{
|
|
733 |
register uint i; |
|
734 |
register HUFF_COUNTS *count; |
|
735 |
if ((count = (HUFF_COUNTS*) my_malloc(info->s->base.fields* |
|
736 |
sizeof(HUFF_COUNTS), |
|
737 |
MYF(MY_ZEROFILL | MY_WME)))) |
|
738 |
{
|
|
739 |
for (i=0 ; i < info->s->base.fields ; i++) |
|
740 |
{
|
|
741 |
enum en_fieldtype type; |
|
742 |
count[i].field_length=info->s->rec[i].length; |
|
743 |
type= count[i].field_type= (enum en_fieldtype) info->s->rec[i].type; |
|
744 |
if (type == FIELD_INTERVALL || |
|
745 |
type == FIELD_CONSTANT || |
|
746 |
type == FIELD_ZERO) |
|
747 |
type = FIELD_NORMAL; |
|
748 |
if (count[i].field_length <= 8 && |
|
749 |
(type == FIELD_NORMAL || |
|
750 |
type == FIELD_SKIP_ZERO)) |
|
751 |
count[i].max_zero_fill= count[i].field_length; |
|
752 |
/*
|
|
753 |
For every column initialize a tree, which is used to detect distinct
|
|
754 |
column values. 'int_tree' works together with 'tree_buff' and
|
|
755 |
'tree_pos'. It's keys are implemented by pointers into 'tree_buff'.
|
|
756 |
This is accomplished by '-1' as the element size.
|
|
757 |
*/
|
|
758 |
init_tree(&count[i].int_tree,0,0,-1,(qsort_cmp2) compare_tree,0, NULL, |
|
759 |
NULL); |
|
760 |
if (records && type != FIELD_BLOB && type != FIELD_VARCHAR) |
|
761 |
count[i].tree_pos=count[i].tree_buff = |
|
762 |
my_malloc(count[i].field_length > 1 ? tree_buff_length : 2, |
|
763 |
MYF(MY_WME)); |
|
764 |
}
|
|
765 |
}
|
|
766 |
return count; |
|
767 |
}
|
|
768 |
||
769 |
||
770 |
/* Free memory used by counts and trees */
|
|
771 |
||
772 |
static void free_counts_and_tree_and_queue(HUFF_TREE *huff_trees, uint trees, |
|
773 |
HUFF_COUNTS *huff_counts, |
|
774 |
uint fields) |
|
775 |
{
|
|
776 |
register uint i; |
|
777 |
||
778 |
if (huff_trees) |
|
779 |
{
|
|
780 |
for (i=0 ; i < trees ; i++) |
|
781 |
{
|
|
782 |
if (huff_trees[i].element_buffer) |
|
783 |
my_free((uchar*) huff_trees[i].element_buffer,MYF(0)); |
|
784 |
if (huff_trees[i].code) |
|
785 |
my_free((uchar*) huff_trees[i].code,MYF(0)); |
|
786 |
}
|
|
787 |
my_free((uchar*) huff_trees,MYF(0)); |
|
788 |
}
|
|
789 |
if (huff_counts) |
|
790 |
{
|
|
791 |
for (i=0 ; i < fields ; i++) |
|
792 |
{
|
|
793 |
if (huff_counts[i].tree_buff) |
|
794 |
{
|
|
795 |
my_free((uchar*) huff_counts[i].tree_buff,MYF(0)); |
|
796 |
delete_tree(&huff_counts[i].int_tree); |
|
797 |
}
|
|
798 |
}
|
|
799 |
my_free((uchar*) huff_counts,MYF(0)); |
|
800 |
}
|
|
801 |
delete_queue(&queue); /* This is safe to free */ |
|
802 |
return; |
|
803 |
}
|
|
804 |
||
805 |
/* Read through old file and gather some statistics */
|
|
806 |
||
807 |
static int get_statistic(PACK_MRG_INFO *mrg,HUFF_COUNTS *huff_counts) |
|
808 |
{
|
|
809 |
int error; |
|
810 |
uint length; |
|
811 |
ulong reclength,max_blob_length; |
|
812 |
uchar *record,*pos,*next_pos,*end_pos,*start_pos; |
|
813 |
ha_rows record_count; |
|
814 |
my_bool static_row_size; |
|
815 |
HUFF_COUNTS *count,*end_count; |
|
816 |
TREE_ELEMENT *element; |
|
817 |
||
818 |
reclength=mrg->file[0]->s->base.reclength; |
|
819 |
record=(uchar*) my_alloca(reclength); |
|
820 |
end_count=huff_counts+mrg->file[0]->s->base.fields; |
|
821 |
record_count=0; glob_crc=0; |
|
822 |
max_blob_length=0; |
|
823 |
||
824 |
/* Check how to calculate checksum */
|
|
825 |
static_row_size=1; |
|
826 |
for (count=huff_counts ; count < end_count ; count++) |
|
827 |
{
|
|
828 |
if (count->field_type == FIELD_BLOB || |
|
829 |
count->field_type == FIELD_VARCHAR) |
|
830 |
{
|
|
831 |
static_row_size=0; |
|
832 |
break; |
|
833 |
}
|
|
834 |
}
|
|
835 |
||
836 |
mrg_reset(mrg); |
|
837 |
while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE) |
|
838 |
{
|
|
839 |
ulong tot_blob_length=0; |
|
840 |
if (! error) |
|
841 |
{
|
|
842 |
/* glob_crc is a checksum over all bytes of all records. */
|
|
843 |
if (static_row_size) |
|
844 |
glob_crc+=mi_static_checksum(mrg->file[0],record); |
|
845 |
else
|
|
846 |
glob_crc+=mi_checksum(mrg->file[0],record); |
|
847 |
||
848 |
/* Count the incidence of values separately for every column. */
|
|
849 |
for (pos=record,count=huff_counts ; |
|
850 |
count < end_count ; |
|
851 |
count++, |
|
852 |
pos=next_pos) |
|
853 |
{
|
|
854 |
next_pos=end_pos=(start_pos=pos)+count->field_length; |
|
855 |
||
856 |
/*
|
|
857 |
Put the whole column value in a tree if there is room for it.
|
|
858 |
'int_tree' is used to quickly check for duplicate values.
|
|
859 |
'tree_buff' collects as many distinct column values as
|
|
860 |
possible. If the field length is > 1, it is tree_buff_length,
|
|
861 |
else 2 bytes. Each value is 'field_length' bytes big. If there
|
|
862 |
are more distinct column values than fit into the buffer, we
|
|
863 |
give up with this tree. BLOBs and VARCHARs do not have a
|
|
864 |
tree_buff as it can only be used with fixed length columns.
|
|
865 |
For the special case of field length == 1, we handle only the
|
|
866 |
case that there is only one distinct value in the table(s).
|
|
867 |
Otherwise, we can have a maximum of 256 distinct values. This
|
|
868 |
is then handled by the normal Huffman tree build.
|
|
869 |
||
870 |
Another limit for collecting distinct column values is the
|
|
871 |
number of values itself. Since we would need to build a
|
|
872 |
Huffman tree for the values, we are limited by the 'IS_OFFSET'
|
|
873 |
constant. This constant expresses a bit which is used to
|
|
874 |
determine if a tree element holds a final value or an offset
|
|
875 |
to a child element. Hence, all values and offsets need to be
|
|
876 |
smaller than 'IS_OFFSET'. A tree element is implemented with
|
|
877 |
two integer values, one for the left branch and one for the
|
|
878 |
right branch. For the extreme case that the first element
|
|
879 |
points to the last element, the number of integers in the tree
|
|
880 |
must be less or equal to IS_OFFSET. So the number of elements
|
|
881 |
must be less or equal to IS_OFFSET / 2.
|
|
882 |
||
883 |
WARNING: At first, we insert a pointer into the record buffer
|
|
884 |
as the key for the tree. If we got a new distinct value, which
|
|
885 |
is really inserted into the tree, instead of being counted
|
|
886 |
only, we will copy the column value from the record buffer to
|
|
887 |
'tree_buff' and adjust the key pointer of the tree accordingly.
|
|
888 |
*/
|
|
889 |
if (count->tree_buff) |
|
890 |
{
|
|
891 |
global_count=count; |
|
892 |
if (!(element=tree_insert(&count->int_tree,pos, 0, |
|
893 |
count->int_tree.custom_arg)) || |
|
894 |
(element->count == 1 && |
|
895 |
(count->tree_buff + tree_buff_length < |
|
896 |
count->tree_pos + count->field_length)) || |
|
897 |
(count->int_tree.elements_in_tree > IS_OFFSET / 2) || |
|
898 |
(count->field_length == 1 && |
|
899 |
count->int_tree.elements_in_tree > 1)) |
|
900 |
{
|
|
901 |
delete_tree(&count->int_tree); |
|
902 |
my_free(count->tree_buff,MYF(0)); |
|
903 |
count->tree_buff=0; |
|
904 |
}
|
|
905 |
else
|
|
906 |
{
|
|
907 |
/*
|
|
908 |
If tree_insert() succeeds, it either creates a new element
|
|
909 |
or increments the counter of an existing element.
|
|
910 |
*/
|
|
911 |
if (element->count == 1) |
|
912 |
{
|
|
913 |
/* Copy the new column value into 'tree_buff'. */
|
|
914 |
memcpy(count->tree_pos,pos,(size_t) count->field_length); |
|
915 |
/* Adjust the key pointer in the tree. */
|
|
916 |
tree_set_pointer(element,count->tree_pos); |
|
917 |
/* Point behind the last column value so far. */
|
|
918 |
count->tree_pos+=count->field_length; |
|
919 |
}
|
|
920 |
}
|
|
921 |
}
|
|
922 |
||
923 |
/* Save character counters and space-counts and zero-field-counts */
|
|
924 |
if (count->field_type == FIELD_NORMAL || |
|
925 |
count->field_type == FIELD_SKIP_ENDSPACE) |
|
926 |
{
|
|
927 |
/* Ignore trailing space. */
|
|
928 |
for ( ; end_pos > pos ; end_pos--) |
|
929 |
if (end_pos[-1] != ' ') |
|
930 |
break; |
|
931 |
/* Empty fields are just counted. Go to the next record. */
|
|
932 |
if (end_pos == pos) |
|
933 |
{
|
|
934 |
count->empty_fields++; |
|
935 |
count->max_zero_fill=0; |
|
936 |
continue; |
|
937 |
}
|
|
938 |
/*
|
|
939 |
Count the total of all trailing spaces and the number of
|
|
940 |
short trailing spaces. Remember the longest trailing space.
|
|
941 |
*/
|
|
942 |
length= (uint) (next_pos-end_pos); |
|
943 |
count->tot_end_space+=length; |
|
944 |
if (length < 8) |
|
945 |
count->end_space[length]++; |
|
946 |
if (count->max_end_space < length) |
|
947 |
count->max_end_space = length; |
|
948 |
}
|
|
949 |
||
950 |
if (count->field_type == FIELD_NORMAL || |
|
951 |
count->field_type == FIELD_SKIP_PRESPACE) |
|
952 |
{
|
|
953 |
/* Ignore leading space. */
|
|
954 |
for (pos=start_pos; pos < end_pos ; pos++) |
|
955 |
if (pos[0] != ' ') |
|
956 |
break; |
|
957 |
/* Empty fields are just counted. Go to the next record. */
|
|
958 |
if (end_pos == pos) |
|
959 |
{
|
|
960 |
count->empty_fields++; |
|
961 |
count->max_zero_fill=0; |
|
962 |
continue; |
|
963 |
}
|
|
964 |
/*
|
|
965 |
Count the total of all leading spaces and the number of
|
|
966 |
short leading spaces. Remember the longest leading space.
|
|
967 |
*/
|
|
968 |
length= (uint) (pos-start_pos); |
|
969 |
count->tot_pre_space+=length; |
|
970 |
if (length < 8) |
|
971 |
count->pre_space[length]++; |
|
972 |
if (count->max_pre_space < length) |
|
973 |
count->max_pre_space = length; |
|
974 |
}
|
|
975 |
||
976 |
/* Calculate pos, end_pos, and max_length for variable length fields. */
|
|
977 |
if (count->field_type == FIELD_BLOB) |
|
978 |
{
|
|
979 |
uint field_length=count->field_length -portable_sizeof_char_ptr; |
|
980 |
ulong blob_length= _mi_calc_blob_length(field_length, start_pos); |
|
981 |
memcpy_fixed((char*) &pos, start_pos+field_length,sizeof(char*)); |
|
982 |
end_pos=pos+blob_length; |
|
983 |
tot_blob_length+=blob_length; |
|
984 |
set_if_bigger(count->max_length,blob_length); |
|
985 |
}
|
|
986 |
else if (count->field_type == FIELD_VARCHAR) |
|
987 |
{
|
|
988 |
uint pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1); |
|
989 |
length= (pack_length == 1 ? (uint) *(uchar*) start_pos : |
|
990 |
uint2korr(start_pos)); |
|
991 |
pos= start_pos+pack_length; |
|
992 |
end_pos= pos+length; |
|
993 |
set_if_bigger(count->max_length,length); |
|
994 |
}
|
|
995 |
||
996 |
/* Evaluate 'max_zero_fill' for short fields. */
|
|
997 |
if (count->field_length <= 8 && |
|
998 |
(count->field_type == FIELD_NORMAL || |
|
999 |
count->field_type == FIELD_SKIP_ZERO)) |
|
1000 |
{
|
|
1001 |
uint i; |
|
1002 |
/* Zero fields are just counted. Go to the next record. */
|
|
1003 |
if (!memcmp((uchar*) start_pos,zero_string,count->field_length)) |
|
1004 |
{
|
|
1005 |
count->zero_fields++; |
|
1006 |
continue; |
|
1007 |
}
|
|
1008 |
/*
|
|
1009 |
max_zero_fill starts with field_length. It is decreased every
|
|
1010 |
time a shorter "zero trailer" is found. It is set to zero when
|
|
1011 |
an empty field is found (see above). This suggests that the
|
|
1012 |
variable should be called 'min_zero_fill'.
|
|
1013 |
*/
|
|
1014 |
for (i =0 ; i < count->max_zero_fill && ! end_pos[-1 - (int) i] ; |
|
1015 |
i++) ; |
|
1016 |
if (i < count->max_zero_fill) |
|
1017 |
count->max_zero_fill=i; |
|
1018 |
}
|
|
1019 |
||
1020 |
/* Ignore zero fields and check fields. */
|
|
1021 |
if (count->field_type == FIELD_ZERO || |
|
1022 |
count->field_type == FIELD_CHECK) |
|
1023 |
continue; |
|
1024 |
||
1025 |
/*
|
|
1026 |
Count the incidence of every byte value in the
|
|
1027 |
significant field value.
|
|
1028 |
*/
|
|
1029 |
for ( ; pos < end_pos ; pos++) |
|
1030 |
count->counts[(uchar) *pos]++; |
|
1031 |
||
1032 |
/* Step to next field. */
|
|
1033 |
}
|
|
1034 |
||
1035 |
if (tot_blob_length > max_blob_length) |
|
1036 |
max_blob_length=tot_blob_length; |
|
1037 |
record_count++; |
|
1038 |
if (write_loop && record_count % WRITE_COUNT == 0) |
|
1039 |
{
|
|
1040 |
VOID(printf("%lu\r", (ulong) record_count)); |
|
1041 |
VOID(fflush(stdout)); |
|
1042 |
}
|
|
1043 |
}
|
|
1044 |
else if (error != HA_ERR_RECORD_DELETED) |
|
1045 |
{
|
|
1046 |
VOID(fprintf(stderr, "Got error %d while reading rows", error)); |
|
1047 |
break; |
|
1048 |
}
|
|
1049 |
||
1050 |
/* Step to next record. */
|
|
1051 |
}
|
|
1052 |
if (write_loop) |
|
1053 |
{
|
|
1054 |
VOID(printf(" \r")); |
|
1055 |
VOID(fflush(stdout)); |
|
1056 |
}
|
|
1057 |
||
1058 |
if (verbose >= 2) |
|
1059 |
VOID(printf("Found the following number of incidents " |
|
1060 |
"of the byte codes:\n")); |
|
1061 |
for (count= huff_counts ; count < end_count; count++) |
|
1062 |
{
|
|
1063 |
uint idx; |
|
1064 |
my_off_t total_count; |
|
1065 |
char llbuf[32]; |
|
1066 |
||
1067 |
if (verbose >= 2) |
|
1068 |
VOID(printf("column: %3u\n", (uint) (count - huff_counts + 1))); |
|
1069 |
if (count->tree_buff) |
|
1070 |
{
|
|
1071 |
if (verbose >= 2) |
|
1072 |
VOID(printf("number of distinct values: %u\n", |
|
1073 |
(uint) ((count->tree_pos - count->tree_buff) / |
|
1074 |
count->field_length))); |
|
1075 |
}
|
|
1076 |
total_count= 0; |
|
1077 |
for (idx= 0; idx < 256; idx++) |
|
1078 |
{
|
|
1079 |
if (count->counts[idx]) |
|
1080 |
{
|
|
1081 |
total_count+= count->counts[idx]; |
|
1082 |
if (verbose >= 2) |
|
1083 |
VOID(printf("counts[0x%02x]: %12s\n", idx, |
|
152
by Brian Aker
longlong replacement |
1084 |
llstr((int64_t) count->counts[idx], llbuf))); |
1
by brian
clean slate |
1085 |
}
|
1086 |
}
|
|
1087 |
if ((verbose >= 2) && total_count) |
|
1088 |
{
|
|
1089 |
VOID(printf("total: %12s\n", |
|
152
by Brian Aker
longlong replacement |
1090 |
llstr((int64_t) total_count, llbuf))); |
1
by brian
clean slate |
1091 |
}
|
1092 |
}
|
|
1093 |
||
1094 |
mrg->records=record_count; |
|
1095 |
mrg->max_blob_length=max_blob_length; |
|
1096 |
my_afree((uchar*) record); |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
1097 |
return(error != HA_ERR_END_OF_FILE); |
1
by brian
clean slate |
1098 |
}
|
1099 |
||
1100 |
static int compare_huff_elements(void *not_used __attribute__((unused)), |
|
1101 |
uchar *a, uchar *b) |
|
1102 |
{
|
|
1103 |
return *((my_off_t*) a) < *((my_off_t*) b) ? -1 : |
|
1104 |
(*((my_off_t*) a) == *((my_off_t*) b) ? 0 : 1); |
|
1105 |
}
|
|
1106 |
||
1107 |
/* Check each tree if we should use pre-space-compress, end-space-
|
|
1108 |
compress, empty-field-compress or zero-field-compress */
|
|
1109 |
||
1110 |
static void check_counts(HUFF_COUNTS *huff_counts, uint trees, |
|
1111 |
my_off_t records) |
|
1112 |
{
|
|
1113 |
uint space_fields,fill_zero_fields,field_count[(int) FIELD_enum_val_count]; |
|
1114 |
my_off_t old_length,new_length,length; |
|
1115 |
||
212.6.1
by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file. |
1116 |
memset((uchar*) field_count, 0, sizeof(field_count)); |
1
by brian
clean slate |
1117 |
space_fields=fill_zero_fields=0; |
1118 |
||
1119 |
for (; trees-- ; huff_counts++) |
|
1120 |
{
|
|
1121 |
if (huff_counts->field_type == FIELD_BLOB) |
|
1122 |
{
|
|
1123 |
huff_counts->length_bits=max_bit(huff_counts->max_length); |
|
1124 |
goto found_pack; |
|
1125 |
}
|
|
1126 |
else if (huff_counts->field_type == FIELD_VARCHAR) |
|
1127 |
{
|
|
1128 |
huff_counts->length_bits=max_bit(huff_counts->max_length); |
|
1129 |
goto found_pack; |
|
1130 |
}
|
|
1131 |
else if (huff_counts->field_type == FIELD_CHECK) |
|
1132 |
{
|
|
1133 |
huff_counts->bytes_packed=0; |
|
1134 |
huff_counts->counts[0]=0; |
|
1135 |
goto found_pack; |
|
1136 |
}
|
|
1137 |
||
1138 |
huff_counts->field_type=FIELD_NORMAL; |
|
1139 |
huff_counts->pack_type=0; |
|
1140 |
||
1141 |
/* Check for zero-filled records (in this column), or zero records. */
|
|
1142 |
if (huff_counts->zero_fields || ! records) |
|
1143 |
{
|
|
1144 |
my_off_t old_space_count; |
|
1145 |
/*
|
|
1146 |
If there are only zero filled records (in this column),
|
|
1147 |
or no records at all, we are done.
|
|
1148 |
*/
|
|
1149 |
if (huff_counts->zero_fields == records) |
|
1150 |
{
|
|
1151 |
huff_counts->field_type= FIELD_ZERO; |
|
1152 |
huff_counts->bytes_packed=0; |
|
1153 |
huff_counts->counts[0]=0; |
|
1154 |
goto found_pack; |
|
1155 |
}
|
|
1156 |
/* Remeber the number of significant spaces. */
|
|
1157 |
old_space_count=huff_counts->counts[' ']; |
|
1158 |
/* Add all leading and trailing spaces. */
|
|
1159 |
huff_counts->counts[' ']+= (huff_counts->tot_end_space + |
|
1160 |
huff_counts->tot_pre_space + |
|
1161 |
huff_counts->empty_fields * |
|
1162 |
huff_counts->field_length); |
|
1163 |
/* Check, what the compressed length of this would be. */
|
|
1164 |
old_length=calc_packed_length(huff_counts,0)+records/8; |
|
1165 |
/* Get the number of zero bytes. */
|
|
1166 |
length=huff_counts->zero_fields*huff_counts->field_length; |
|
1167 |
/* Add it to the counts. */
|
|
1168 |
huff_counts->counts[0]+=length; |
|
1169 |
/* Check, what the compressed length of this would be. */
|
|
1170 |
new_length=calc_packed_length(huff_counts,0); |
|
1171 |
/* If the compression without the zeroes would be shorter, we are done. */
|
|
1172 |
if (old_length < new_length && huff_counts->field_length > 1) |
|
1173 |
{
|
|
1174 |
huff_counts->field_type=FIELD_SKIP_ZERO; |
|
1175 |
huff_counts->counts[0]-=length; |
|
1176 |
huff_counts->bytes_packed=old_length- records/8; |
|
1177 |
goto found_pack; |
|
1178 |
}
|
|
1179 |
/* Remove the insignificant spaces, but keep the zeroes. */
|
|
1180 |
huff_counts->counts[' ']=old_space_count; |
|
1181 |
}
|
|
1182 |
/* Check, what the compressed length of this column would be. */
|
|
1183 |
huff_counts->bytes_packed=calc_packed_length(huff_counts,0); |
|
1184 |
||
1185 |
/*
|
|
1186 |
If there are enough empty records (in this column),
|
|
1187 |
treating them specially may pay off.
|
|
1188 |
*/
|
|
1189 |
if (huff_counts->empty_fields) |
|
1190 |
{
|
|
1191 |
if (huff_counts->field_length > 2 && |
|
1192 |
huff_counts->empty_fields + (records - huff_counts->empty_fields)* |
|
1193 |
(1+max_bit(max(huff_counts->max_pre_space, |
|
1194 |
huff_counts->max_end_space))) < |
|
1195 |
records * max_bit(huff_counts->field_length)) |
|
1196 |
{
|
|
1197 |
huff_counts->pack_type |= PACK_TYPE_SPACE_FIELDS; |
|
1198 |
}
|
|
1199 |
else
|
|
1200 |
{
|
|
1201 |
length=huff_counts->empty_fields*huff_counts->field_length; |
|
1202 |
if (huff_counts->tot_end_space || ! huff_counts->tot_pre_space) |
|
1203 |
{
|
|
1204 |
huff_counts->tot_end_space+=length; |
|
1205 |
huff_counts->max_end_space=huff_counts->field_length; |
|
1206 |
if (huff_counts->field_length < 8) |
|
1207 |
huff_counts->end_space[huff_counts->field_length]+= |
|
1208 |
huff_counts->empty_fields; |
|
1209 |
}
|
|
1210 |
if (huff_counts->tot_pre_space) |
|
1211 |
{
|
|
1212 |
huff_counts->tot_pre_space+=length; |
|
1213 |
huff_counts->max_pre_space=huff_counts->field_length; |
|
1214 |
if (huff_counts->field_length < 8) |
|
1215 |
huff_counts->pre_space[huff_counts->field_length]+= |
|
1216 |
huff_counts->empty_fields; |
|
1217 |
}
|
|
1218 |
}
|
|
1219 |
}
|
|
1220 |
||
1221 |
/*
|
|
1222 |
If there are enough trailing spaces (in this column),
|
|
1223 |
treating them specially may pay off.
|
|
1224 |
*/
|
|
1225 |
if (huff_counts->tot_end_space) |
|
1226 |
{
|
|
1227 |
huff_counts->counts[' ']+=huff_counts->tot_pre_space; |
|
1228 |
if (test_space_compress(huff_counts,records,huff_counts->max_end_space, |
|
1229 |
huff_counts->end_space, |
|
1230 |
huff_counts->tot_end_space,FIELD_SKIP_ENDSPACE)) |
|
1231 |
goto found_pack; |
|
1232 |
huff_counts->counts[' ']-=huff_counts->tot_pre_space; |
|
1233 |
}
|
|
1234 |
||
1235 |
/*
|
|
1236 |
If there are enough leading spaces (in this column),
|
|
1237 |
treating them specially may pay off.
|
|
1238 |
*/
|
|
1239 |
if (huff_counts->tot_pre_space) |
|
1240 |
{
|
|
1241 |
if (test_space_compress(huff_counts,records,huff_counts->max_pre_space, |
|
1242 |
huff_counts->pre_space, |
|
1243 |
huff_counts->tot_pre_space,FIELD_SKIP_PRESPACE)) |
|
1244 |
goto found_pack; |
|
1245 |
}
|
|
1246 |
||
1247 |
found_pack: /* Found field-packing */ |
|
1248 |
||
1249 |
/* Test if we can use zero-fill */
|
|
1250 |
||
1251 |
if (huff_counts->max_zero_fill && |
|
1252 |
(huff_counts->field_type == FIELD_NORMAL || |
|
1253 |
huff_counts->field_type == FIELD_SKIP_ZERO)) |
|
1254 |
{
|
|
1255 |
huff_counts->counts[0]-=huff_counts->max_zero_fill* |
|
1256 |
(huff_counts->field_type == FIELD_SKIP_ZERO ? |
|
1257 |
records - huff_counts->zero_fields : records); |
|
1258 |
huff_counts->pack_type|=PACK_TYPE_ZERO_FILL; |
|
1259 |
huff_counts->bytes_packed=calc_packed_length(huff_counts,0); |
|
1260 |
}
|
|
1261 |
||
1262 |
/* Test if intervall-field is better */
|
|
1263 |
||
1264 |
if (huff_counts->tree_buff) |
|
1265 |
{
|
|
1266 |
HUFF_TREE tree; |
|
1267 |
||
1268 |
tree.element_buffer=0; |
|
1269 |
if (!make_huff_tree(&tree,huff_counts) && |
|
1270 |
tree.bytes_packed+tree.tree_pack_length < huff_counts->bytes_packed) |
|
1271 |
{
|
|
1272 |
if (tree.elements == 1) |
|
1273 |
huff_counts->field_type=FIELD_CONSTANT; |
|
1274 |
else
|
|
1275 |
huff_counts->field_type=FIELD_INTERVALL; |
|
1276 |
huff_counts->pack_type=0; |
|
1277 |
}
|
|
1278 |
else
|
|
1279 |
{
|
|
1280 |
my_free((uchar*) huff_counts->tree_buff,MYF(0)); |
|
1281 |
delete_tree(&huff_counts->int_tree); |
|
1282 |
huff_counts->tree_buff=0; |
|
1283 |
}
|
|
1284 |
if (tree.element_buffer) |
|
1285 |
my_free((uchar*) tree.element_buffer,MYF(0)); |
|
1286 |
}
|
|
1287 |
if (huff_counts->pack_type & PACK_TYPE_SPACE_FIELDS) |
|
1288 |
space_fields++; |
|
1289 |
if (huff_counts->pack_type & PACK_TYPE_ZERO_FILL) |
|
1290 |
fill_zero_fields++; |
|
1291 |
field_count[huff_counts->field_type]++; |
|
1292 |
}
|
|
1293 |
if (verbose) |
|
1294 |
VOID(printf("\nnormal: %3d empty-space: %3d " |
|
1295 |
"empty-zero: %3d empty-fill: %3d\n" |
|
1296 |
"pre-space: %3d end-space: %3d "
|
|
1297 |
"intervall-fields: %3d zero: %3d\n", |
|
1298 |
field_count[FIELD_NORMAL],space_fields, |
|
1299 |
field_count[FIELD_SKIP_ZERO],fill_zero_fields, |
|
1300 |
field_count[FIELD_SKIP_PRESPACE], |
|
1301 |
field_count[FIELD_SKIP_ENDSPACE], |
|
1302 |
field_count[FIELD_INTERVALL], |
|
1303 |
field_count[FIELD_ZERO])); |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
1304 |
return; |
1
by brian
clean slate |
1305 |
}
|
1306 |
||
1307 |
/* Test if we can use space-compression and empty-field-compression */
|
|
1308 |
||
1309 |
static int |
|
1310 |
test_space_compress(HUFF_COUNTS *huff_counts, my_off_t records, |
|
1311 |
uint max_space_length, my_off_t *space_counts, |
|
1312 |
my_off_t tot_space_count, enum en_fieldtype field_type) |
|
1313 |
{
|
|
1314 |
int min_pos; |
|
1315 |
uint length_bits,i; |
|
1316 |
my_off_t space_count,min_space_count,min_pack,new_length,skip; |
|
1317 |
||
1318 |
length_bits=max_bit(max_space_length); |
|
1319 |
||
1320 |
/* Default no end_space-packing */
|
|
1321 |
space_count=huff_counts->counts[(uint) ' ']; |
|
1322 |
min_space_count= (huff_counts->counts[(uint) ' ']+= tot_space_count); |
|
1323 |
min_pack=calc_packed_length(huff_counts,0); |
|
1324 |
min_pos= -2; |
|
1325 |
huff_counts->counts[(uint) ' ']=space_count; |
|
1326 |
||
1327 |
/* Test with allways space-count */
|
|
1328 |
new_length=huff_counts->bytes_packed+length_bits*records/8; |
|
1329 |
if (new_length+1 < min_pack) |
|
1330 |
{
|
|
1331 |
min_pos= -1; |
|
1332 |
min_pack=new_length; |
|
1333 |
min_space_count=space_count; |
|
1334 |
}
|
|
1335 |
/* Test with length-flag */
|
|
1336 |
for (skip=0L, i=0 ; i < 8 ; i++) |
|
1337 |
{
|
|
1338 |
if (space_counts[i]) |
|
1339 |
{
|
|
1340 |
if (i) |
|
1341 |
huff_counts->counts[(uint) ' ']+=space_counts[i]; |
|
1342 |
skip+=huff_counts->pre_space[i]; |
|
1343 |
new_length=calc_packed_length(huff_counts,0)+ |
|
1344 |
(records+(records-skip)*(1+length_bits))/8; |
|
1345 |
if (new_length < min_pack) |
|
1346 |
{
|
|
1347 |
min_pos=(int) i; |
|
1348 |
min_pack=new_length; |
|
1349 |
min_space_count=huff_counts->counts[(uint) ' ']; |
|
1350 |
}
|
|
1351 |
}
|
|
1352 |
}
|
|
1353 |
||
1354 |
huff_counts->counts[(uint) ' ']=min_space_count; |
|
1355 |
huff_counts->bytes_packed=min_pack; |
|
1356 |
switch (min_pos) { |
|
1357 |
case -2: |
|
1358 |
return(0); /* No space-compress */ |
|
1359 |
case -1: /* Always space-count */ |
|
1360 |
huff_counts->field_type=field_type; |
|
1361 |
huff_counts->min_space=0; |
|
1362 |
huff_counts->length_bits=max_bit(max_space_length); |
|
1363 |
break; |
|
1364 |
default: |
|
1365 |
huff_counts->field_type=field_type; |
|
1366 |
huff_counts->min_space=(uint) min_pos; |
|
1367 |
huff_counts->pack_type|=PACK_TYPE_SELECTED; |
|
1368 |
huff_counts->length_bits=max_bit(max_space_length); |
|
1369 |
break; |
|
1370 |
}
|
|
1371 |
return(1); /* Using space-compress */ |
|
1372 |
}
|
|
1373 |
||
1374 |
||
1375 |
/* Make a huff_tree of each huff_count */
|
|
1376 |
||
1377 |
static HUFF_TREE* make_huff_trees(HUFF_COUNTS *huff_counts, uint trees) |
|
1378 |
{
|
|
1379 |
uint tree; |
|
1380 |
HUFF_TREE *huff_tree; |
|
1381 |
||
1382 |
if (!(huff_tree=(HUFF_TREE*) my_malloc(trees*sizeof(HUFF_TREE), |
|
1383 |
MYF(MY_WME | MY_ZEROFILL)))) |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
1384 |
return(0); |
1
by brian
clean slate |
1385 |
|
1386 |
for (tree=0 ; tree < trees ; tree++) |
|
1387 |
{
|
|
1388 |
if (make_huff_tree(huff_tree+tree,huff_counts+tree)) |
|
1389 |
{
|
|
1390 |
while (tree--) |
|
1391 |
my_free((uchar*) huff_tree[tree].element_buffer,MYF(0)); |
|
1392 |
my_free((uchar*) huff_tree,MYF(0)); |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
1393 |
return(0); |
1
by brian
clean slate |
1394 |
}
|
1395 |
}
|
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
1396 |
return(huff_tree); |
1
by brian
clean slate |
1397 |
}
|
1398 |
||
1399 |
/*
|
|
1400 |
Build a Huffman tree.
|
|
1401 |
||
1402 |
SYNOPSIS
|
|
1403 |
make_huff_tree()
|
|
1404 |
huff_tree The Huffman tree.
|
|
1405 |
huff_counts The counts.
|
|
1406 |
||
1407 |
DESCRIPTION
|
|
1408 |
Build a Huffman tree according to huff_counts->counts or
|
|
1409 |
huff_counts->tree_buff. tree_buff, if non-NULL contains up to
|
|
1410 |
tree_buff_length of distinct column values. In that case, whole
|
|
1411 |
values can be Huffman encoded instead of single bytes.
|
|
1412 |
||
1413 |
RETURN
|
|
1414 |
0 OK
|
|
1415 |
!= 0 Error
|
|
1416 |
*/
|
|
1417 |
||
1418 |
static int make_huff_tree(HUFF_TREE *huff_tree, HUFF_COUNTS *huff_counts) |
|
1419 |
{
|
|
1420 |
uint i,found,bits_packed,first,last; |
|
1421 |
my_off_t bytes_packed; |
|
1422 |
HUFF_ELEMENT *a,*b,*new_huff_el; |
|
1423 |
||
1424 |
first=last=0; |
|
1425 |
if (huff_counts->tree_buff) |
|
1426 |
{
|
|
1427 |
/* Calculate the number of distinct values in tree_buff. */
|
|
1428 |
found= (uint) (huff_counts->tree_pos - huff_counts->tree_buff) / |
|
1429 |
huff_counts->field_length; |
|
1430 |
first=0; last=found-1; |
|
1431 |
}
|
|
1432 |
else
|
|
1433 |
{
|
|
1434 |
/* Count the number of byte codes found in the column. */
|
|
1435 |
for (i=found=0 ; i < 256 ; i++) |
|
1436 |
{
|
|
1437 |
if (huff_counts->counts[i]) |
|
1438 |
{
|
|
1439 |
if (! found++) |
|
1440 |
first=i; |
|
1441 |
last=i; |
|
1442 |
}
|
|
1443 |
}
|
|
1444 |
if (found < 2) |
|
1445 |
found=2; |
|
1446 |
}
|
|
1447 |
||
1448 |
/* When using 'tree_buff' we can have more that 256 values. */
|
|
1449 |
if (queue.max_elements < found) |
|
1450 |
{
|
|
1451 |
delete_queue(&queue); |
|
1452 |
if (init_queue(&queue,found,0,0,compare_huff_elements,0)) |
|
1453 |
return -1; |
|
1454 |
}
|
|
1455 |
||
1456 |
/* Allocate or reallocate an element buffer for the Huffman tree. */
|
|
1457 |
if (!huff_tree->element_buffer) |
|
1458 |
{
|
|
1459 |
if (!(huff_tree->element_buffer= |
|
1460 |
(HUFF_ELEMENT*) my_malloc(found*2*sizeof(HUFF_ELEMENT),MYF(MY_WME)))) |
|
1461 |
return 1; |
|
1462 |
}
|
|
1463 |
else
|
|
1464 |
{
|
|
1465 |
HUFF_ELEMENT *temp; |
|
1466 |
if (!(temp= |
|
1467 |
(HUFF_ELEMENT*) my_realloc((uchar*) huff_tree->element_buffer, |
|
1468 |
found*2*sizeof(HUFF_ELEMENT), |
|
1469 |
MYF(MY_WME)))) |
|
1470 |
return 1; |
|
1471 |
huff_tree->element_buffer=temp; |
|
1472 |
}
|
|
1473 |
||
1474 |
huff_counts->tree=huff_tree; |
|
1475 |
huff_tree->counts=huff_counts; |
|
1476 |
huff_tree->min_chr=first; |
|
1477 |
huff_tree->max_chr=last; |
|
1478 |
huff_tree->char_bits=max_bit(last-first); |
|
1479 |
huff_tree->offset_bits=max_bit(found-1)+1; |
|
1480 |
||
1481 |
if (huff_counts->tree_buff) |
|
1482 |
{
|
|
1483 |
huff_tree->elements=0; |
|
1484 |
huff_tree->tree_pack_length=(1+15+16+5+5+ |
|
1485 |
(huff_tree->char_bits+1)*found+ |
|
1486 |
(huff_tree->offset_bits+1)* |
|
1487 |
(found-2)+7)/8 + |
|
1488 |
(uint) (huff_tree->counts->tree_pos- |
|
1489 |
huff_tree->counts->tree_buff); |
|
1490 |
/*
|
|
1491 |
Put a HUFF_ELEMENT into the queue for every distinct column value.
|
|
1492 |
||
1493 |
tree_walk() calls save_counts_in_queue() for every element in
|
|
1494 |
'int_tree'. This takes elements from the target trees element
|
|
1495 |
buffer and places references to them into the buffer of the
|
|
1496 |
priority queue. We insert in column value order, but the order is
|
|
1497 |
in fact irrelevant here. We will establish the correct order
|
|
1498 |
later.
|
|
1499 |
*/
|
|
1500 |
tree_walk(&huff_counts->int_tree, |
|
1501 |
(int (*)(void*, element_count,void*)) save_counts_in_queue, |
|
1502 |
(uchar*) huff_tree, left_root_right); |
|
1503 |
}
|
|
1504 |
else
|
|
1505 |
{
|
|
1506 |
huff_tree->elements=found; |
|
1507 |
huff_tree->tree_pack_length=(9+9+5+5+ |
|
1508 |
(huff_tree->char_bits+1)*found+ |
|
1509 |
(huff_tree->offset_bits+1)* |
|
1510 |
(found-2)+7)/8; |
|
1511 |
/*
|
|
1512 |
Put a HUFF_ELEMENT into the queue for every byte code found in the column.
|
|
1513 |
||
1514 |
The elements are taken from the target trees element buffer.
|
|
1515 |
Instead of using queue_insert(), we just place references to the
|
|
1516 |
elements into the buffer of the priority queue. We insert in byte
|
|
1517 |
value order, but the order is in fact irrelevant here. We will
|
|
1518 |
establish the correct order later.
|
|
1519 |
*/
|
|
1520 |
for (i=first, found=0 ; i <= last ; i++) |
|
1521 |
{
|
|
1522 |
if (huff_counts->counts[i]) |
|
1523 |
{
|
|
1524 |
new_huff_el=huff_tree->element_buffer+(found++); |
|
1525 |
new_huff_el->count=huff_counts->counts[i]; |
|
1526 |
new_huff_el->a.leaf.null=0; |
|
1527 |
new_huff_el->a.leaf.element_nr=i; |
|
1528 |
queue.root[found]=(uchar*) new_huff_el; |
|
1529 |
}
|
|
1530 |
}
|
|
1531 |
/*
|
|
1532 |
If there is only a single byte value in this field in all records,
|
|
1533 |
add a second element with zero incidence. This is required to enter
|
|
1534 |
the loop, which builds the Huffman tree.
|
|
1535 |
*/
|
|
1536 |
while (found < 2) |
|
1537 |
{
|
|
1538 |
new_huff_el=huff_tree->element_buffer+(found++); |
|
1539 |
new_huff_el->count=0; |
|
1540 |
new_huff_el->a.leaf.null=0; |
|
1541 |
if (last) |
|
1542 |
new_huff_el->a.leaf.element_nr=huff_tree->min_chr=last-1; |
|
1543 |
else
|
|
1544 |
new_huff_el->a.leaf.element_nr=huff_tree->max_chr=last+1; |
|
1545 |
queue.root[found]=(uchar*) new_huff_el; |
|
1546 |
}
|
|
1547 |
}
|
|
1548 |
||
1549 |
/* Make a queue from the queue buffer. */
|
|
1550 |
queue.elements=found; |
|
1551 |
||
1552 |
/*
|
|
1553 |
Make a priority queue from the queue. Construct its index so that we
|
|
1554 |
have a partially ordered tree.
|
|
1555 |
*/
|
|
1556 |
for (i=found/2 ; i > 0 ; i--) |
|
1557 |
_downheap(&queue,i); |
|
1558 |
||
1559 |
/* The Huffman algorithm. */
|
|
1560 |
bytes_packed=0; bits_packed=0; |
|
1561 |
for (i=1 ; i < found ; i++) |
|
1562 |
{
|
|
1563 |
/*
|
|
1564 |
Pop the top element from the queue (the one with the least incidence).
|
|
1565 |
Popping from a priority queue includes a re-ordering of the queue,
|
|
1566 |
to get the next least incidence element to the top.
|
|
1567 |
*/
|
|
1568 |
a=(HUFF_ELEMENT*) queue_remove(&queue,0); |
|
1569 |
/*
|
|
1570 |
Copy the next least incidence element. The queue implementation
|
|
1571 |
reserves root[0] for temporary purposes. root[1] is the top.
|
|
1572 |
*/
|
|
1573 |
b=(HUFF_ELEMENT*) queue.root[1]; |
|
1574 |
/* Get a new element from the element buffer. */
|
|
1575 |
new_huff_el=huff_tree->element_buffer+found+i; |
|
1576 |
/* The new element gets the sum of the two least incidence elements. */
|
|
1577 |
new_huff_el->count=a->count+b->count; |
|
1578 |
/*
|
|
1579 |
The Huffman algorithm assigns another bit to the code for a byte
|
|
1580 |
every time that bytes incidence is combined (directly or indirectly)
|
|
1581 |
to a new element as one of the two least incidence elements.
|
|
1582 |
This means that one more bit per incidence of that byte is required
|
|
1583 |
in the resulting file. So we add the new combined incidence as the
|
|
1584 |
number of bits by which the result grows.
|
|
1585 |
*/
|
|
1586 |
bits_packed+=(uint) (new_huff_el->count & 7); |
|
1587 |
bytes_packed+=new_huff_el->count/8; |
|
1588 |
/* The new element points to its children, lesser in left. */
|
|
1589 |
new_huff_el->a.nod.left=a; |
|
1590 |
new_huff_el->a.nod.right=b; |
|
1591 |
/*
|
|
1592 |
Replace the copied top element by the new element and re-order the
|
|
1593 |
queue.
|
|
1594 |
*/
|
|
1595 |
queue.root[1]=(uchar*) new_huff_el; |
|
1596 |
queue_replaced(&queue); |
|
1597 |
}
|
|
1598 |
huff_tree->root=(HUFF_ELEMENT*) queue.root[1]; |
|
1599 |
huff_tree->bytes_packed=bytes_packed+(bits_packed+7)/8; |
|
1600 |
return 0; |
|
1601 |
}
|
|
1602 |
||
1603 |
static int compare_tree(void* cmp_arg __attribute__((unused)), |
|
1604 |
register const uchar *s, register const uchar *t) |
|
1605 |
{
|
|
1606 |
uint length; |
|
1607 |
for (length=global_count->field_length; length-- ;) |
|
1608 |
if (*s++ != *t++) |
|
1609 |
return (int) s[-1] - (int) t[-1]; |
|
1610 |
return 0; |
|
1611 |
}
|
|
1612 |
||
1613 |
/*
|
|
1614 |
Organize distinct column values and their incidences into a priority queue.
|
|
1615 |
||
1616 |
SYNOPSIS
|
|
1617 |
save_counts_in_queue()
|
|
1618 |
key The column value.
|
|
1619 |
count The incidence of this value.
|
|
1620 |
tree The Huffman tree to be built later.
|
|
1621 |
||
1622 |
DESCRIPTION
|
|
1623 |
We use the element buffer of the targeted tree. The distinct column
|
|
1624 |
values are organized in a priority queue first. The Huffman
|
|
1625 |
algorithm will later organize the elements into a Huffman tree. For
|
|
1626 |
the time being, we just place references to the elements into the
|
|
1627 |
queue buffer. The buffer will later be organized into a priority
|
|
1628 |
queue.
|
|
1629 |
||
1630 |
RETURN
|
|
1631 |
0
|
|
1632 |
*/
|
|
1633 |
||
1634 |
static int save_counts_in_queue(uchar *key, element_count count, |
|
1635 |
HUFF_TREE *tree) |
|
1636 |
{
|
|
1637 |
HUFF_ELEMENT *new_huff_el; |
|
1638 |
||
1639 |
new_huff_el=tree->element_buffer+(tree->elements++); |
|
1640 |
new_huff_el->count=count; |
|
1641 |
new_huff_el->a.leaf.null=0; |
|
1642 |
new_huff_el->a.leaf.element_nr= (uint) (key- tree->counts->tree_buff) / |
|
1643 |
tree->counts->field_length; |
|
1644 |
queue.root[tree->elements]=(uchar*) new_huff_el; |
|
1645 |
return 0; |
|
1646 |
}
|
|
1647 |
||
1648 |
||
1649 |
/*
|
|
1650 |
Calculate length of file if given counts should be used.
|
|
1651 |
||
1652 |
SYNOPSIS
|
|
1653 |
calc_packed_length()
|
|
1654 |
huff_counts The counts for a column of the table(s).
|
|
1655 |
add_tree_lenght If the decode tree length should be added.
|
|
1656 |
||
1657 |
DESCRIPTION
|
|
1658 |
We need to follow the Huffman algorithm until we know, how many bits
|
|
1659 |
are required for each byte code. But we do not need the resulting
|
|
1660 |
Huffman tree. Hence, we can leave out some steps which are essential
|
|
1661 |
in make_huff_tree().
|
|
1662 |
||
1663 |
RETURN
|
|
1664 |
Number of bytes required to compress this table column.
|
|
1665 |
*/
|
|
1666 |
||
1667 |
static my_off_t calc_packed_length(HUFF_COUNTS *huff_counts, |
|
1668 |
uint add_tree_lenght) |
|
1669 |
{
|
|
1670 |
uint i,found,bits_packed,first,last; |
|
1671 |
my_off_t bytes_packed; |
|
1672 |
HUFF_ELEMENT element_buffer[256]; |
|
1673 |
||
1674 |
/*
|
|
1675 |
WARNING: We use a small hack for efficiency: Instead of placing
|
|
1676 |
references to HUFF_ELEMENTs into the queue, we just insert
|
|
1677 |
references to the counts of the byte codes which appeared in this
|
|
1678 |
table column. During the Huffman algorithm they are successively
|
|
1679 |
replaced by references to HUFF_ELEMENTs. This works, because
|
|
1680 |
HUFF_ELEMENTs have the incidence count at their beginning.
|
|
1681 |
Regardless, wether the queue array contains references to counts of
|
|
1682 |
type my_off_t or references to HUFF_ELEMENTs which have the count of
|
|
1683 |
type my_off_t at their beginning, it always points to a count of the
|
|
1684 |
same type.
|
|
1685 |
||
1686 |
Instead of using queue_insert(), we just copy the references into
|
|
1687 |
the buffer of the priority queue. We insert in byte value order, but
|
|
1688 |
the order is in fact irrelevant here. We will establish the correct
|
|
1689 |
order later.
|
|
1690 |
*/
|
|
1691 |
first=last=0; |
|
1692 |
for (i=found=0 ; i < 256 ; i++) |
|
1693 |
{
|
|
1694 |
if (huff_counts->counts[i]) |
|
1695 |
{
|
|
1696 |
if (! found++) |
|
1697 |
first=i; |
|
1698 |
last=i; |
|
1699 |
/* We start with root[1], which is the queues top element. */
|
|
1700 |
queue.root[found]=(uchar*) &huff_counts->counts[i]; |
|
1701 |
}
|
|
1702 |
}
|
|
1703 |
if (!found) |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
1704 |
return(0); /* Empty tree */ |
1
by brian
clean slate |
1705 |
/*
|
1706 |
If there is only a single byte value in this field in all records,
|
|
1707 |
add a second element with zero incidence. This is required to enter
|
|
1708 |
the loop, which follows the Huffman algorithm.
|
|
1709 |
*/
|
|
1710 |
if (found < 2) |
|
1711 |
queue.root[++found]=(uchar*) &huff_counts->counts[last ? 0 : 1]; |
|
1712 |
||
1713 |
/* Make a queue from the queue buffer. */
|
|
1714 |
queue.elements=found; |
|
1715 |
||
1716 |
bytes_packed=0; bits_packed=0; |
|
1717 |
/* Add the length of the coding table, which would become part of the file. */
|
|
1718 |
if (add_tree_lenght) |
|
1719 |
bytes_packed=(8+9+5+5+(max_bit(last-first)+1)*found+ |
|
1720 |
(max_bit(found-1)+1+1)*(found-2) +7)/8; |
|
1721 |
||
1722 |
/*
|
|
1723 |
Make a priority queue from the queue. Construct its index so that we
|
|
1724 |
have a partially ordered tree.
|
|
1725 |
*/
|
|
1726 |
for (i=(found+1)/2 ; i > 0 ; i--) |
|
1727 |
_downheap(&queue,i); |
|
1728 |
||
1729 |
/* The Huffman algorithm. */
|
|
1730 |
for (i=0 ; i < found-1 ; i++) |
|
1731 |
{
|
|
1732 |
my_off_t *a; |
|
1733 |
my_off_t *b; |
|
1734 |
HUFF_ELEMENT *new_huff_el; |
|
1735 |
||
1736 |
/*
|
|
1737 |
Pop the top element from the queue (the one with the least
|
|
1738 |
incidence). Popping from a priority queue includes a re-ordering
|
|
1739 |
of the queue, to get the next least incidence element to the top.
|
|
1740 |
*/
|
|
1741 |
a= (my_off_t*) queue_remove(&queue, 0); |
|
1742 |
/*
|
|
1743 |
Copy the next least incidence element. The queue implementation
|
|
1744 |
reserves root[0] for temporary purposes. root[1] is the top.
|
|
1745 |
*/
|
|
1746 |
b= (my_off_t*) queue.root[1]; |
|
1747 |
/* Create a new element in a local (automatic) buffer. */
|
|
1748 |
new_huff_el= element_buffer + i; |
|
1749 |
/* The new element gets the sum of the two least incidence elements. */
|
|
1750 |
new_huff_el->count= *a + *b; |
|
1751 |
/*
|
|
1752 |
The Huffman algorithm assigns another bit to the code for a byte
|
|
1753 |
every time that bytes incidence is combined (directly or indirectly)
|
|
1754 |
to a new element as one of the two least incidence elements.
|
|
1755 |
This means that one more bit per incidence of that byte is required
|
|
1756 |
in the resulting file. So we add the new combined incidence as the
|
|
1757 |
number of bits by which the result grows.
|
|
1758 |
*/
|
|
1759 |
bits_packed+=(uint) (new_huff_el->count & 7); |
|
1760 |
bytes_packed+=new_huff_el->count/8; |
|
1761 |
/*
|
|
1762 |
Replace the copied top element by the new element and re-order the
|
|
1763 |
queue. This successively replaces the references to counts by
|
|
1764 |
references to HUFF_ELEMENTs.
|
|
1765 |
*/
|
|
1766 |
queue.root[1]=(uchar*) new_huff_el; |
|
1767 |
queue_replaced(&queue); |
|
1768 |
}
|
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
1769 |
return(bytes_packed+(bits_packed+7)/8); |
1
by brian
clean slate |
1770 |
}
|
1771 |
||
1772 |
||
1773 |
/* Remove trees that don't give any compression */
|
|
1774 |
||
1775 |
static uint join_same_trees(HUFF_COUNTS *huff_counts, uint trees) |
|
1776 |
{
|
|
1777 |
uint k,tree_number; |
|
1778 |
HUFF_COUNTS count,*i,*j,*last_count; |
|
1779 |
||
1780 |
last_count=huff_counts+trees; |
|
1781 |
for (tree_number=0, i=huff_counts ; i < last_count ; i++) |
|
1782 |
{
|
|
1783 |
if (!i->tree->tree_number) |
|
1784 |
{
|
|
1785 |
i->tree->tree_number= ++tree_number; |
|
1786 |
if (i->tree_buff) |
|
1787 |
continue; /* Don't join intervall */ |
|
1788 |
for (j=i+1 ; j < last_count ; j++) |
|
1789 |
{
|
|
1790 |
if (! j->tree->tree_number && ! j->tree_buff) |
|
1791 |
{
|
|
1792 |
for (k=0 ; k < 256 ; k++) |
|
1793 |
count.counts[k]=i->counts[k]+j->counts[k]; |
|
1794 |
if (calc_packed_length(&count,1) <= |
|
1795 |
i->tree->bytes_packed + j->tree->bytes_packed+ |
|
1796 |
i->tree->tree_pack_length+j->tree->tree_pack_length+ |
|
1797 |
ALLOWED_JOIN_DIFF) |
|
1798 |
{
|
|
1799 |
memcpy_fixed((uchar*) i->counts,(uchar*) count.counts, |
|
1800 |
sizeof(count.counts[0])*256); |
|
1801 |
my_free((uchar*) j->tree->element_buffer,MYF(0)); |
|
1802 |
j->tree->element_buffer=0; |
|
1803 |
j->tree=i->tree; |
|
1804 |
bmove((uchar*) i->counts,(uchar*) count.counts, |
|
1805 |
sizeof(count.counts[0])*256); |
|
1806 |
if (make_huff_tree(i->tree,i)) |
|
1807 |
return (uint) -1; |
|
1808 |
}
|
|
1809 |
}
|
|
1810 |
}
|
|
1811 |
}
|
|
1812 |
}
|
|
1813 |
if (verbose) |
|
1814 |
VOID(printf("Original trees: %d After join: %d\n", trees, tree_number)); |
|
1815 |
return tree_number; /* Return trees left */ |
|
1816 |
}
|
|
1817 |
||
1818 |
||
1819 |
/*
|
|
1820 |
Fill in huff_tree encode tables.
|
|
1821 |
||
1822 |
SYNOPSIS
|
|
1823 |
make_huff_decode_table()
|
|
1824 |
huff_tree An array of HUFF_TREE which are to be encoded.
|
|
1825 |
trees The number of HUFF_TREE in the array.
|
|
1826 |
||
1827 |
RETURN
|
|
1828 |
0 success
|
|
1829 |
!= 0 error
|
|
1830 |
*/
|
|
1831 |
||
1832 |
static int make_huff_decode_table(HUFF_TREE *huff_tree, uint trees) |
|
1833 |
{
|
|
1834 |
uint elements; |
|
1835 |
for ( ; trees-- ; huff_tree++) |
|
1836 |
{
|
|
1837 |
if (huff_tree->tree_number > 0) |
|
1838 |
{
|
|
1839 |
elements=huff_tree->counts->tree_buff ? huff_tree->elements : 256; |
|
1840 |
if (!(huff_tree->code = |
|
151
by Brian Aker
Ulonglong to uint64_t |
1841 |
(uint64_t*) my_malloc(elements* |
1842 |
(sizeof(uint64_t) + sizeof(uchar)), |
|
1
by brian
clean slate |
1843 |
MYF(MY_WME | MY_ZEROFILL)))) |
1844 |
return 1; |
|
1845 |
huff_tree->code_len=(uchar*) (huff_tree->code+elements); |
|
1846 |
make_traverse_code_tree(huff_tree, huff_tree->root, |
|
151
by Brian Aker
Ulonglong to uint64_t |
1847 |
8 * sizeof(uint64_t), 0LL); |
1
by brian
clean slate |
1848 |
}
|
1849 |
}
|
|
1850 |
return 0; |
|
1851 |
}
|
|
1852 |
||
1853 |
||
1854 |
static void make_traverse_code_tree(HUFF_TREE *huff_tree, |
|
1855 |
HUFF_ELEMENT *element, |
|
151
by Brian Aker
Ulonglong to uint64_t |
1856 |
uint size, uint64_t code) |
1
by brian
clean slate |
1857 |
{
|
1858 |
uint chr; |
|
1859 |
if (!element->a.leaf.null) |
|
1860 |
{
|
|
1861 |
chr=element->a.leaf.element_nr; |
|
151
by Brian Aker
Ulonglong to uint64_t |
1862 |
huff_tree->code_len[chr]= (uchar) (8 * sizeof(uint64_t) - size); |
1
by brian
clean slate |
1863 |
huff_tree->code[chr]= (code >> size); |
151
by Brian Aker
Ulonglong to uint64_t |
1864 |
if (huff_tree->height < 8 * sizeof(uint64_t) - size) |
1865 |
huff_tree->height= 8 * sizeof(uint64_t) - size; |
|
1
by brian
clean slate |
1866 |
}
|
1867 |
else
|
|
1868 |
{
|
|
1869 |
size--; |
|
1870 |
make_traverse_code_tree(huff_tree,element->a.nod.left,size,code); |
|
1871 |
make_traverse_code_tree(huff_tree, element->a.nod.right, size, |
|
151
by Brian Aker
Ulonglong to uint64_t |
1872 |
code + (((uint64_t) 1) << size)); |
1
by brian
clean slate |
1873 |
}
|
1874 |
return; |
|
1875 |
}
|
|
1876 |
||
1877 |
||
1878 |
/*
|
|
1879 |
Convert a value into binary digits.
|
|
1880 |
||
1881 |
SYNOPSIS
|
|
1882 |
bindigits()
|
|
1883 |
value The value.
|
|
1884 |
length The number of low order bits to convert.
|
|
1885 |
||
1886 |
NOTE
|
|
1887 |
The result string is in static storage. It is reused on every call.
|
|
1888 |
So you cannot use it twice in one expression.
|
|
1889 |
||
1890 |
RETURN
|
|
1891 |
A pointer to a static NUL-terminated string.
|
|
1892 |
*/
|
|
1893 |
||
151
by Brian Aker
Ulonglong to uint64_t |
1894 |
static char *bindigits(uint64_t value, uint bits) |
1
by brian
clean slate |
1895 |
{
|
1896 |
static char digits[72]; |
|
1897 |
char *ptr= digits; |
|
1898 |
uint idx= bits; |
|
1899 |
||
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
1900 |
assert(idx < sizeof(digits)); |
1
by brian
clean slate |
1901 |
while (idx) |
1902 |
*(ptr++)= '0' + ((char) (value >> (--idx)) & (char) 1); |
|
1903 |
*ptr= '\0'; |
|
1904 |
return digits; |
|
1905 |
}
|
|
1906 |
||
1907 |
||
1908 |
/*
|
|
1909 |
Convert a value into hexadecimal digits.
|
|
1910 |
||
1911 |
SYNOPSIS
|
|
1912 |
hexdigits()
|
|
1913 |
value The value.
|
|
1914 |
||
1915 |
NOTE
|
|
1916 |
The result string is in static storage. It is reused on every call.
|
|
1917 |
So you cannot use it twice in one expression.
|
|
1918 |
||
1919 |
RETURN
|
|
1920 |
A pointer to a static NUL-terminated string.
|
|
1921 |
*/
|
|
1922 |
||
151
by Brian Aker
Ulonglong to uint64_t |
1923 |
static char *hexdigits(uint64_t value) |
1
by brian
clean slate |
1924 |
{
|
1925 |
static char digits[20]; |
|
1926 |
char *ptr= digits; |
|
1927 |
uint idx= 2 * sizeof(value); /* Two hex digits per byte. */ |
|
1928 |
||
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
1929 |
assert(idx < sizeof(digits)); |
1
by brian
clean slate |
1930 |
while (idx) |
1931 |
{
|
|
1932 |
if ((*(ptr++)= '0' + ((char) (value >> (4 * (--idx))) & (char) 0xf)) > '9') |
|
1933 |
*(ptr - 1)+= 'a' - '9' - 1; |
|
1934 |
}
|
|
1935 |
*ptr= '\0'; |
|
1936 |
return digits; |
|
1937 |
}
|
|
1938 |
||
1939 |
||
1940 |
/* Write header to new packed data file */
|
|
1941 |
||
1942 |
static int write_header(PACK_MRG_INFO *mrg,uint head_length,uint trees, |
|
1943 |
my_off_t tot_elements,my_off_t filelength) |
|
1944 |
{
|
|
1945 |
uchar *buff= (uchar*) file_buffer.pos; |
|
1946 |
||
212.6.1
by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file. |
1947 |
memset(buff, 0, HEAD_LENGTH); |
1
by brian
clean slate |
1948 |
memcpy_fixed(buff,myisam_pack_file_magic,4); |
1949 |
int4store(buff+4,head_length); |
|
1950 |
int4store(buff+8, mrg->min_pack_length); |
|
1951 |
int4store(buff+12,mrg->max_pack_length); |
|
1952 |
int4store(buff+16,tot_elements); |
|
1953 |
int4store(buff+20,intervall_length); |
|
1954 |
int2store(buff+24,trees); |
|
1955 |
buff[26]=(char) mrg->ref_length; |
|
1956 |
/* Save record pointer length */
|
|
151
by Brian Aker
Ulonglong to uint64_t |
1957 |
buff[27]= (uchar) mi_get_pointer_length((uint64_t) filelength,2); |
1
by brian
clean slate |
1958 |
if (test_only) |
1959 |
return 0; |
|
1960 |
VOID(my_seek(file_buffer.file,0L,MY_SEEK_SET,MYF(0))); |
|
1961 |
return my_write(file_buffer.file,(const uchar *) file_buffer.pos,HEAD_LENGTH, |
|
1962 |
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)) != 0; |
|
1963 |
}
|
|
1964 |
||
1965 |
/* Write fieldinfo to new packed file */
|
|
1966 |
||
1967 |
static void write_field_info(HUFF_COUNTS *counts, uint fields, uint trees) |
|
1968 |
{
|
|
1969 |
register uint i; |
|
1970 |
uint huff_tree_bits; |
|
1971 |
huff_tree_bits=max_bit(trees ? trees-1 : 0); |
|
1972 |
||
1973 |
if (verbose >= 2) |
|
1974 |
{
|
|
1975 |
VOID(printf("\n")); |
|
1976 |
VOID(printf("column types:\n")); |
|
1977 |
VOID(printf("FIELD_NORMAL 0\n")); |
|
1978 |
VOID(printf("FIELD_SKIP_ENDSPACE 1\n")); |
|
1979 |
VOID(printf("FIELD_SKIP_PRESPACE 2\n")); |
|
1980 |
VOID(printf("FIELD_SKIP_ZERO 3\n")); |
|
1981 |
VOID(printf("FIELD_BLOB 4\n")); |
|
1982 |
VOID(printf("FIELD_CONSTANT 5\n")); |
|
1983 |
VOID(printf("FIELD_INTERVALL 6\n")); |
|
1984 |
VOID(printf("FIELD_ZERO 7\n")); |
|
1985 |
VOID(printf("FIELD_VARCHAR 8\n")); |
|
1986 |
VOID(printf("FIELD_CHECK 9\n")); |
|
1987 |
VOID(printf("\n")); |
|
1988 |
VOID(printf("pack type as a set of flags:\n")); |
|
1989 |
VOID(printf("PACK_TYPE_SELECTED 1\n")); |
|
1990 |
VOID(printf("PACK_TYPE_SPACE_FIELDS 2\n")); |
|
1991 |
VOID(printf("PACK_TYPE_ZERO_FILL 4\n")); |
|
1992 |
VOID(printf("\n")); |
|
1993 |
}
|
|
1994 |
for (i=0 ; i++ < fields ; counts++) |
|
1995 |
{
|
|
151
by Brian Aker
Ulonglong to uint64_t |
1996 |
write_bits((uint64_t) (int) counts->field_type, 5); |
1
by brian
clean slate |
1997 |
write_bits(counts->pack_type,6); |
1998 |
if (counts->pack_type & PACK_TYPE_ZERO_FILL) |
|
1999 |
write_bits(counts->max_zero_fill,5); |
|
2000 |
else
|
|
2001 |
write_bits(counts->length_bits,5); |
|
151
by Brian Aker
Ulonglong to uint64_t |
2002 |
write_bits((uint64_t) counts->tree->tree_number - 1, huff_tree_bits); |
1
by brian
clean slate |
2003 |
if (verbose >= 2) |
2004 |
VOID(printf("column: %3u type: %2u pack: %2u zero: %4u lbits: %2u " |
|
2005 |
"tree: %2u length: %4u\n", i , counts->field_type, |
|
2006 |
counts->pack_type, counts->max_zero_fill, counts->length_bits, |
|
2007 |
counts->tree->tree_number, counts->field_length)); |
|
2008 |
}
|
|
2009 |
flush_bits(); |
|
2010 |
return; |
|
2011 |
}
|
|
2012 |
||
2013 |
/* Write all huff_trees to new datafile. Return tot count of
|
|
2014 |
elements in all trees
|
|
2015 |
Returns 0 on error */
|
|
2016 |
||
2017 |
static my_off_t write_huff_tree(HUFF_TREE *huff_tree, uint trees) |
|
2018 |
{
|
|
2019 |
uint i,int_length; |
|
2020 |
uint tree_no; |
|
2021 |
uint codes; |
|
2022 |
uint errors= 0; |
|
2023 |
uint *packed_tree,*offset,length; |
|
2024 |
my_off_t elements; |
|
2025 |
||
2026 |
/* Find the highest number of elements in the trees. */
|
|
2027 |
for (i=length=0 ; i < trees ; i++) |
|
2028 |
if (huff_tree[i].tree_number > 0 && huff_tree[i].elements > length) |
|
2029 |
length=huff_tree[i].elements; |
|
2030 |
/*
|
|
2031 |
Allocate a buffer for packing a decode tree. Two numbers per element
|
|
2032 |
(left child and right child).
|
|
2033 |
*/
|
|
2034 |
if (!(packed_tree=(uint*) my_alloca(sizeof(uint)*length*2))) |
|
2035 |
{
|
|
2036 |
my_error(EE_OUTOFMEMORY,MYF(ME_BELL),sizeof(uint)*length*2); |
|
2037 |
return 0; |
|
2038 |
}
|
|
2039 |
||
2040 |
if (verbose >= 2) |
|
2041 |
VOID(printf("\n")); |
|
2042 |
tree_no= 0; |
|
2043 |
intervall_length=0; |
|
2044 |
for (elements=0; trees-- ; huff_tree++) |
|
2045 |
{
|
|
2046 |
/* Skip columns that have been joined with other columns. */
|
|
2047 |
if (huff_tree->tree_number == 0) |
|
2048 |
continue; /* Deleted tree */ |
|
2049 |
tree_no++; |
|
2050 |
if (verbose >= 3) |
|
2051 |
VOID(printf("\n")); |
|
2052 |
/* Count the total number of elements (byte codes or column values). */
|
|
2053 |
elements+=huff_tree->elements; |
|
2054 |
huff_tree->max_offset=2; |
|
2055 |
/* Build a tree of offsets and codes for decoding in 'packed_tree'. */
|
|
2056 |
if (huff_tree->elements <= 1) |
|
2057 |
offset=packed_tree; |
|
2058 |
else
|
|
2059 |
offset=make_offset_code_tree(huff_tree,huff_tree->root,packed_tree); |
|
2060 |
||
2061 |
/* This should be the same as 'length' above. */
|
|
2062 |
huff_tree->offset_bits=max_bit(huff_tree->max_offset); |
|
2063 |
||
2064 |
/*
|
|
2065 |
Since we check this during collecting the distinct column values,
|
|
2066 |
this should never happen.
|
|
2067 |
*/
|
|
2068 |
if (huff_tree->max_offset >= IS_OFFSET) |
|
2069 |
{ /* This should be impossible */ |
|
2070 |
VOID(fprintf(stderr, "Tree offset got too big: %d, aborted\n", |
|
2071 |
huff_tree->max_offset)); |
|
2072 |
my_afree((uchar*) packed_tree); |
|
2073 |
return 0; |
|
2074 |
}
|
|
2075 |
||
2076 |
if (!huff_tree->counts->tree_buff) |
|
2077 |
{
|
|
2078 |
/* We do a byte compression on this column. Mark with bit 0. */
|
|
2079 |
write_bits(0,1); |
|
2080 |
write_bits(huff_tree->min_chr,8); |
|
2081 |
write_bits(huff_tree->elements,9); |
|
2082 |
write_bits(huff_tree->char_bits,5); |
|
2083 |
write_bits(huff_tree->offset_bits,5); |
|
2084 |
int_length=0; |
|
2085 |
}
|
|
2086 |
else
|
|
2087 |
{
|
|
2088 |
int_length=(uint) (huff_tree->counts->tree_pos - |
|
2089 |
huff_tree->counts->tree_buff); |
|
2090 |
/* We have distinct column values for this column. Mark with bit 1. */
|
|
2091 |
write_bits(1,1); |
|
2092 |
write_bits(huff_tree->elements,15); |
|
2093 |
write_bits(int_length,16); |
|
2094 |
write_bits(huff_tree->char_bits,5); |
|
2095 |
write_bits(huff_tree->offset_bits,5); |
|
2096 |
intervall_length+=int_length; |
|
2097 |
}
|
|
2098 |
if (verbose >= 2) |
|
2099 |
VOID(printf("tree: %2u elements: %4u char_bits: %2u offset_bits: %2u " |
|
2100 |
"%s: %5u codelen: %2u\n", tree_no, huff_tree->elements, |
|
2101 |
huff_tree->char_bits, huff_tree->offset_bits, |
|
2102 |
huff_tree->counts->tree_buff ? "bufflen" : "min_chr", |
|
2103 |
huff_tree->counts->tree_buff ? int_length : |
|
2104 |
huff_tree->min_chr, huff_tree->height)); |
|
2105 |
||
2106 |
/* Check that the code tree length matches the element count. */
|
|
2107 |
length=(uint) (offset-packed_tree); |
|
2108 |
if (length != huff_tree->elements*2-2) |
|
2109 |
{
|
|
2110 |
VOID(fprintf(stderr, "error: Huff-tree-length: %d != calc_length: %d\n", |
|
2111 |
length, huff_tree->elements * 2 - 2)); |
|
2112 |
errors++; |
|
2113 |
break; |
|
2114 |
}
|
|
2115 |
||
2116 |
for (i=0 ; i < length ; i++) |
|
2117 |
{
|
|
2118 |
if (packed_tree[i] & IS_OFFSET) |
|
2119 |
write_bits(packed_tree[i] - IS_OFFSET+ (1 << huff_tree->offset_bits), |
|
2120 |
huff_tree->offset_bits+1); |
|
2121 |
else
|
|
2122 |
write_bits(packed_tree[i]-huff_tree->min_chr,huff_tree->char_bits+1); |
|
2123 |
if (verbose >= 3) |
|
2124 |
VOID(printf("tree[0x%04x]: %s0x%04x\n", |
|
2125 |
i, (packed_tree[i] & IS_OFFSET) ? " -> " : "", |
|
2126 |
(packed_tree[i] & IS_OFFSET) ? |
|
2127 |
packed_tree[i] - IS_OFFSET + i : packed_tree[i])); |
|
2128 |
}
|
|
2129 |
flush_bits(); |
|
2130 |
||
2131 |
/*
|
|
2132 |
Display coding tables and check their correctness.
|
|
2133 |
*/
|
|
2134 |
codes= huff_tree->counts->tree_buff ? huff_tree->elements : 256; |
|
2135 |
for (i= 0; i < codes; i++) |
|
2136 |
{
|
|
151
by Brian Aker
Ulonglong to uint64_t |
2137 |
uint64_t code; |
1
by brian
clean slate |
2138 |
uint bits; |
2139 |
uint len; |
|
2140 |
uint idx; |
|
2141 |
||
2142 |
if (! (len= huff_tree->code_len[i])) |
|
2143 |
continue; |
|
2144 |
if (verbose >= 3) |
|
2145 |
VOID(printf("code[0x%04x]: 0x%s bits: %2u bin: %s\n", i, |
|
2146 |
hexdigits(huff_tree->code[i]), huff_tree->code_len[i], |
|
2147 |
bindigits(huff_tree->code[i], huff_tree->code_len[i]))); |
|
2148 |
||
2149 |
/* Check that the encode table decodes correctly. */
|
|
2150 |
code= 0; |
|
2151 |
bits= 0; |
|
2152 |
idx= 0; |
|
2153 |
for (;;) |
|
2154 |
{
|
|
2155 |
if (! len) |
|
2156 |
{
|
|
2157 |
VOID(fflush(stdout)); |
|
2158 |
VOID(fprintf(stderr, "error: code 0x%s with %u bits not found\n", |
|
2159 |
hexdigits(huff_tree->code[i]), huff_tree->code_len[i])); |
|
2160 |
errors++; |
|
2161 |
break; |
|
2162 |
}
|
|
2163 |
code<<= 1; |
|
2164 |
code|= (huff_tree->code[i] >> (--len)) & 1; |
|
2165 |
bits++; |
|
2166 |
if (bits > 8 * sizeof(code)) |
|
2167 |
{
|
|
2168 |
VOID(fflush(stdout)); |
|
2169 |
VOID(fprintf(stderr, "error: Huffman code too long: %u/%u\n", |
|
2170 |
bits, (uint) (8 * sizeof(code)))); |
|
2171 |
errors++; |
|
2172 |
break; |
|
2173 |
}
|
|
2174 |
idx+= (uint) code & 1; |
|
2175 |
if (idx >= length) |
|
2176 |
{
|
|
2177 |
VOID(fflush(stdout)); |
|
2178 |
VOID(fprintf(stderr, "error: illegal tree offset: %u/%u\n", |
|
2179 |
idx, length)); |
|
2180 |
errors++; |
|
2181 |
break; |
|
2182 |
}
|
|
2183 |
if (packed_tree[idx] & IS_OFFSET) |
|
2184 |
idx+= packed_tree[idx] & ~IS_OFFSET; |
|
2185 |
else
|
|
2186 |
break; /* Hit a leaf. This contains the result value. */ |
|
2187 |
}
|
|
2188 |
if (errors) |
|
2189 |
break; |
|
2190 |
||
2191 |
if (packed_tree[idx] != i) |
|
2192 |
{
|
|
2193 |
VOID(fflush(stdout)); |
|
2194 |
VOID(fprintf(stderr, "error: decoded value 0x%04x should be: 0x%04x\n", |
|
2195 |
packed_tree[idx], i)); |
|
2196 |
errors++; |
|
2197 |
break; |
|
2198 |
}
|
|
2199 |
} /*end for (codes)*/ |
|
2200 |
if (errors) |
|
2201 |
break; |
|
2202 |
||
2203 |
/* Write column values in case of distinct column value compression. */
|
|
2204 |
if (huff_tree->counts->tree_buff) |
|
2205 |
{
|
|
2206 |
for (i=0 ; i < int_length ; i++) |
|
2207 |
{
|
|
151
by Brian Aker
Ulonglong to uint64_t |
2208 |
write_bits((uint64_t) (uchar) huff_tree->counts->tree_buff[i], 8); |
1
by brian
clean slate |
2209 |
if (verbose >= 3) |
2210 |
VOID(printf("column_values[0x%04x]: 0x%02x\n", |
|
2211 |
i, (uchar) huff_tree->counts->tree_buff[i])); |
|
2212 |
}
|
|
2213 |
}
|
|
2214 |
flush_bits(); |
|
2215 |
}
|
|
2216 |
if (verbose >= 2) |
|
2217 |
VOID(printf("\n")); |
|
2218 |
my_afree((uchar*) packed_tree); |
|
2219 |
if (errors) |
|
2220 |
{
|
|
2221 |
VOID(fprintf(stderr, "Error: Generated decode trees are corrupt. Stop.\n")); |
|
2222 |
return 0; |
|
2223 |
}
|
|
2224 |
return elements; |
|
2225 |
}
|
|
2226 |
||
2227 |
||
2228 |
static uint *make_offset_code_tree(HUFF_TREE *huff_tree, HUFF_ELEMENT *element, |
|
2229 |
uint *offset) |
|
2230 |
{
|
|
2231 |
uint *prev_offset; |
|
2232 |
||
2233 |
prev_offset= offset; |
|
2234 |
/*
|
|
2235 |
'a.leaf.null' takes the same place as 'a.nod.left'. If this is null,
|
|
2236 |
then there is no left child and, hence no right child either. This
|
|
2237 |
is a property of a binary tree. An element is either a node with two
|
|
2238 |
childs, or a leaf without childs.
|
|
2239 |
||
2240 |
The current element is always a node with two childs. Go left first.
|
|
2241 |
*/
|
|
2242 |
if (!element->a.nod.left->a.leaf.null) |
|
2243 |
{
|
|
2244 |
/* Store the byte code or the index of the column value. */
|
|
2245 |
prev_offset[0] =(uint) element->a.nod.left->a.leaf.element_nr; |
|
2246 |
offset+=2; |
|
2247 |
}
|
|
2248 |
else
|
|
2249 |
{
|
|
2250 |
/*
|
|
2251 |
Recursively traverse the tree to the left. Mark it as an offset to
|
|
2252 |
another tree node (in contrast to a byte code or column value index).
|
|
2253 |
*/
|
|
2254 |
prev_offset[0]= IS_OFFSET+2; |
|
2255 |
offset=make_offset_code_tree(huff_tree,element->a.nod.left,offset+2); |
|
2256 |
}
|
|
2257 |
||
2258 |
/* Now, check the right child. */
|
|
2259 |
if (!element->a.nod.right->a.leaf.null) |
|
2260 |
{
|
|
2261 |
/* Store the byte code or the index of the column value. */
|
|
2262 |
prev_offset[1]=element->a.nod.right->a.leaf.element_nr; |
|
2263 |
return offset; |
|
2264 |
}
|
|
2265 |
else
|
|
2266 |
{
|
|
2267 |
/*
|
|
2268 |
Recursively traverse the tree to the right. Mark it as an offset to
|
|
2269 |
another tree node (in contrast to a byte code or column value index).
|
|
2270 |
*/
|
|
2271 |
uint temp=(uint) (offset-prev_offset-1); |
|
2272 |
prev_offset[1]= IS_OFFSET+ temp; |
|
2273 |
if (huff_tree->max_offset < temp) |
|
2274 |
huff_tree->max_offset = temp; |
|
2275 |
return make_offset_code_tree(huff_tree,element->a.nod.right,offset); |
|
2276 |
}
|
|
2277 |
}
|
|
2278 |
||
2279 |
/* Get number of bits neaded to represent value */
|
|
2280 |
||
2281 |
static uint max_bit(register uint value) |
|
2282 |
{
|
|
2283 |
register uint power=1; |
|
2284 |
||
2285 |
while ((value>>=1)) |
|
2286 |
power++; |
|
2287 |
return (power); |
|
2288 |
}
|
|
2289 |
||
2290 |
||
2291 |
static int compress_isam_file(PACK_MRG_INFO *mrg, HUFF_COUNTS *huff_counts) |
|
2292 |
{
|
|
2293 |
int error; |
|
2294 |
uint i,max_calc_length,pack_ref_length,min_record_length,max_record_length, |
|
2295 |
intervall,field_length,max_pack_length,pack_blob_length; |
|
2296 |
my_off_t record_count; |
|
2297 |
char llbuf[32]; |
|
2298 |
ulong length,pack_length; |
|
2299 |
uchar *record,*pos,*end_pos,*record_pos,*start_pos; |
|
2300 |
HUFF_COUNTS *count,*end_count; |
|
2301 |
HUFF_TREE *tree; |
|
2302 |
MI_INFO *isam_file=mrg->file[0]; |
|
2303 |
uint pack_version= (uint) isam_file->s->pack.version; |
|
2304 |
||
2305 |
/* Allocate a buffer for the records (excluding blobs). */
|
|
2306 |
if (!(record=(uchar*) my_alloca(isam_file->s->base.reclength))) |
|
2307 |
return -1; |
|
2308 |
||
2309 |
end_count=huff_counts+isam_file->s->base.fields; |
|
2310 |
min_record_length= (uint) ~0; |
|
2311 |
max_record_length=0; |
|
2312 |
||
2313 |
/*
|
|
2314 |
Calculate the maximum number of bits required to pack the records.
|
|
2315 |
Remember to understand 'max_zero_fill' as 'min_zero_fill'.
|
|
2316 |
The tree height determines the maximum number of bits per value.
|
|
2317 |
Some fields skip leading or trailing spaces or zeroes. The skipped
|
|
2318 |
number of bytes is encoded by 'length_bits' bits.
|
|
2319 |
Empty blobs and varchar are encoded with a single 1 bit. Other blobs
|
|
2320 |
and varchar get a leading 0 bit.
|
|
2321 |
*/
|
|
2322 |
for (i=max_calc_length=0 ; i < isam_file->s->base.fields ; i++) |
|
2323 |
{
|
|
2324 |
if (!(huff_counts[i].pack_type & PACK_TYPE_ZERO_FILL)) |
|
2325 |
huff_counts[i].max_zero_fill=0; |
|
2326 |
if (huff_counts[i].field_type == FIELD_CONSTANT || |
|
2327 |
huff_counts[i].field_type == FIELD_ZERO || |
|
2328 |
huff_counts[i].field_type == FIELD_CHECK) |
|
2329 |
continue; |
|
2330 |
if (huff_counts[i].field_type == FIELD_INTERVALL) |
|
2331 |
max_calc_length+=huff_counts[i].tree->height; |
|
2332 |
else if (huff_counts[i].field_type == FIELD_BLOB || |
|
2333 |
huff_counts[i].field_type == FIELD_VARCHAR) |
|
2334 |
max_calc_length+=huff_counts[i].tree->height*huff_counts[i].max_length + huff_counts[i].length_bits +1; |
|
2335 |
else
|
|
2336 |
max_calc_length+= |
|
2337 |
(huff_counts[i].field_length - huff_counts[i].max_zero_fill)* |
|
2338 |
huff_counts[i].tree->height+huff_counts[i].length_bits; |
|
2339 |
}
|
|
2340 |
max_calc_length= (max_calc_length + 7) / 8; |
|
2341 |
pack_ref_length= calc_pack_length(pack_version, max_calc_length); |
|
2342 |
record_count=0; |
|
2343 |
/* 'max_blob_length' is the max length of all blobs of a record. */
|
|
2344 |
pack_blob_length= isam_file->s->base.blobs ? |
|
2345 |
calc_pack_length(pack_version, mrg->max_blob_length) : 0; |
|
2346 |
max_pack_length=pack_ref_length+pack_blob_length; |
|
2347 |
||
2348 |
mrg_reset(mrg); |
|
2349 |
while ((error=mrg_rrnd(mrg,record)) != HA_ERR_END_OF_FILE) |
|
2350 |
{
|
|
2351 |
ulong tot_blob_length=0; |
|
2352 |
if (! error) |
|
2353 |
{
|
|
2354 |
if (flush_buffer((ulong) max_calc_length + (ulong) max_pack_length)) |
|
2355 |
break; |
|
2356 |
record_pos= (uchar*) file_buffer.pos; |
|
2357 |
file_buffer.pos+=max_pack_length; |
|
2358 |
for (start_pos=record, count= huff_counts; count < end_count ; count++) |
|
2359 |
{
|
|
2360 |
end_pos=start_pos+(field_length=count->field_length); |
|
2361 |
tree=count->tree; |
|
2362 |
||
2363 |
/* Check if the column contains spaces only. */
|
|
2364 |
if (count->pack_type & PACK_TYPE_SPACE_FIELDS) |
|
2365 |
{
|
|
2366 |
for (pos=start_pos ; *pos == ' ' && pos < end_pos; pos++) ; |
|
2367 |
if (pos == end_pos) |
|
2368 |
{
|
|
2369 |
write_bits(1,1); |
|
2370 |
start_pos=end_pos; |
|
2371 |
continue; |
|
2372 |
}
|
|
2373 |
write_bits(0,1); |
|
2374 |
}
|
|
2375 |
end_pos-=count->max_zero_fill; |
|
2376 |
field_length-=count->max_zero_fill; |
|
2377 |
||
2378 |
switch (count->field_type) { |
|
2379 |
case FIELD_SKIP_ZERO: |
|
2380 |
if (!memcmp((uchar*) start_pos,zero_string,field_length)) |
|
2381 |
{
|
|
2382 |
write_bits(1,1); |
|
2383 |
start_pos=end_pos; |
|
2384 |
break; |
|
2385 |
}
|
|
2386 |
write_bits(0,1); |
|
2387 |
/* Fall through */
|
|
2388 |
case FIELD_NORMAL: |
|
2389 |
for ( ; start_pos < end_pos ; start_pos++) |
|
2390 |
{
|
|
2391 |
write_bits(tree->code[(uchar) *start_pos], |
|
2392 |
(uint) tree->code_len[(uchar) *start_pos]); |
|
2393 |
}
|
|
2394 |
break; |
|
2395 |
case FIELD_SKIP_ENDSPACE: |
|
2396 |
for (pos=end_pos ; pos > start_pos && pos[-1] == ' ' ; pos--) ; |
|
2397 |
length= (ulong) (end_pos - pos); |
|
2398 |
if (count->pack_type & PACK_TYPE_SELECTED) |
|
2399 |
{
|
|
2400 |
if (length > count->min_space) |
|
2401 |
{
|
|
2402 |
write_bits(1,1); |
|
2403 |
write_bits(length,count->length_bits); |
|
2404 |
}
|
|
2405 |
else
|
|
2406 |
{
|
|
2407 |
write_bits(0,1); |
|
2408 |
pos=end_pos; |
|
2409 |
}
|
|
2410 |
}
|
|
2411 |
else
|
|
2412 |
{
|
|
2413 |
write_bits(length,count->length_bits); |
|
2414 |
}
|
|
2415 |
/* Encode all significant bytes. */
|
|
2416 |
for ( ; start_pos < pos ; start_pos++) |
|
2417 |
{
|
|
2418 |
write_bits(tree->code[(uchar) *start_pos], |
|
2419 |
(uint) tree->code_len[(uchar) *start_pos]); |
|
2420 |
}
|
|
2421 |
start_pos=end_pos; |
|
2422 |
break; |
|
2423 |
case FIELD_SKIP_PRESPACE: |
|
2424 |
for (pos=start_pos ; pos < end_pos && pos[0] == ' ' ; pos++) ; |
|
2425 |
length= (ulong) (pos - start_pos); |
|
2426 |
if (count->pack_type & PACK_TYPE_SELECTED) |
|
2427 |
{
|
|
2428 |
if (length > count->min_space) |
|
2429 |
{
|
|
2430 |
write_bits(1,1); |
|
2431 |
write_bits(length,count->length_bits); |
|
2432 |
}
|
|
2433 |
else
|
|
2434 |
{
|
|
2435 |
pos=start_pos; |
|
2436 |
write_bits(0,1); |
|
2437 |
}
|
|
2438 |
}
|
|
2439 |
else
|
|
2440 |
{
|
|
2441 |
write_bits(length,count->length_bits); |
|
2442 |
}
|
|
2443 |
/* Encode all significant bytes. */
|
|
2444 |
for (start_pos=pos ; start_pos < end_pos ; start_pos++) |
|
2445 |
{
|
|
2446 |
write_bits(tree->code[(uchar) *start_pos], |
|
2447 |
(uint) tree->code_len[(uchar) *start_pos]); |
|
2448 |
}
|
|
2449 |
break; |
|
2450 |
case FIELD_CONSTANT: |
|
2451 |
case FIELD_ZERO: |
|
2452 |
case FIELD_CHECK: |
|
2453 |
start_pos=end_pos; |
|
2454 |
break; |
|
2455 |
case FIELD_INTERVALL: |
|
2456 |
global_count=count; |
|
2457 |
pos=(uchar*) tree_search(&count->int_tree, start_pos, |
|
2458 |
count->int_tree.custom_arg); |
|
2459 |
intervall=(uint) (pos - count->tree_buff)/field_length; |
|
2460 |
write_bits(tree->code[intervall],(uint) tree->code_len[intervall]); |
|
2461 |
start_pos=end_pos; |
|
2462 |
break; |
|
2463 |
case FIELD_BLOB: |
|
2464 |
{
|
|
2465 |
ulong blob_length=_mi_calc_blob_length(field_length- |
|
2466 |
portable_sizeof_char_ptr, |
|
2467 |
start_pos); |
|
2468 |
/* Empty blobs are encoded with a single 1 bit. */
|
|
2469 |
if (!blob_length) |
|
2470 |
{
|
|
2471 |
write_bits(1,1); |
|
2472 |
}
|
|
2473 |
else
|
|
2474 |
{
|
|
2475 |
uchar *blob,*blob_end; |
|
2476 |
write_bits(0,1); |
|
2477 |
/* Write the blob length. */
|
|
2478 |
write_bits(blob_length,count->length_bits); |
|
2479 |
memcpy_fixed(&blob,end_pos-portable_sizeof_char_ptr, |
|
2480 |
sizeof(char*)); |
|
2481 |
blob_end=blob+blob_length; |
|
2482 |
/* Encode the blob bytes. */
|
|
2483 |
for ( ; blob < blob_end ; blob++) |
|
2484 |
{
|
|
2485 |
write_bits(tree->code[(uchar) *blob], |
|
2486 |
(uint) tree->code_len[(uchar) *blob]); |
|
2487 |
}
|
|
2488 |
tot_blob_length+=blob_length; |
|
2489 |
}
|
|
2490 |
start_pos= end_pos; |
|
2491 |
break; |
|
2492 |
}
|
|
2493 |
case FIELD_VARCHAR: |
|
2494 |
{
|
|
2495 |
uint var_pack_length= HA_VARCHAR_PACKLENGTH(count->field_length-1); |
|
2496 |
ulong col_length= (var_pack_length == 1 ? |
|
2497 |
(uint) *(uchar*) start_pos : |
|
2498 |
uint2korr(start_pos)); |
|
2499 |
/* Empty varchar are encoded with a single 1 bit. */
|
|
2500 |
if (!col_length) |
|
2501 |
{
|
|
2502 |
write_bits(1,1); /* Empty varchar */ |
|
2503 |
}
|
|
2504 |
else
|
|
2505 |
{
|
|
2506 |
uchar *end= start_pos + var_pack_length + col_length; |
|
2507 |
write_bits(0,1); |
|
2508 |
/* Write the varchar length. */
|
|
2509 |
write_bits(col_length,count->length_bits); |
|
2510 |
/* Encode the varchar bytes. */
|
|
2511 |
for (start_pos+= var_pack_length ; start_pos < end ; start_pos++) |
|
2512 |
{
|
|
2513 |
write_bits(tree->code[(uchar) *start_pos], |
|
2514 |
(uint) tree->code_len[(uchar) *start_pos]); |
|
2515 |
}
|
|
2516 |
}
|
|
2517 |
start_pos= end_pos; |
|
2518 |
break; |
|
2519 |
}
|
|
2520 |
case FIELD_LAST: |
|
2521 |
case FIELD_enum_val_count: |
|
2522 |
abort(); /* Impossible */ |
|
2523 |
}
|
|
2524 |
start_pos+=count->max_zero_fill; |
|
2525 |
}
|
|
2526 |
flush_bits(); |
|
2527 |
length=(ulong) ((uchar*) file_buffer.pos - record_pos) - max_pack_length; |
|
2528 |
pack_length= save_pack_length(pack_version, record_pos, length); |
|
2529 |
if (pack_blob_length) |
|
2530 |
pack_length+= save_pack_length(pack_version, record_pos + pack_length, |
|
2531 |
tot_blob_length); |
|
2532 |
/* Correct file buffer if the header was smaller */
|
|
2533 |
if (pack_length != max_pack_length) |
|
2534 |
{
|
|
2535 |
bmove(record_pos+pack_length,record_pos+max_pack_length,length); |
|
2536 |
file_buffer.pos-= (max_pack_length-pack_length); |
|
2537 |
}
|
|
2538 |
if (length < (ulong) min_record_length) |
|
2539 |
min_record_length=(uint) length; |
|
2540 |
if (length > (ulong) max_record_length) |
|
2541 |
max_record_length=(uint) length; |
|
2542 |
record_count++; |
|
2543 |
if (write_loop && record_count % WRITE_COUNT == 0) |
|
2544 |
{
|
|
2545 |
VOID(printf("%lu\r", (ulong) record_count)); |
|
2546 |
VOID(fflush(stdout)); |
|
2547 |
}
|
|
2548 |
}
|
|
2549 |
else if (error != HA_ERR_RECORD_DELETED) |
|
2550 |
break; |
|
2551 |
}
|
|
2552 |
if (error == HA_ERR_END_OF_FILE) |
|
2553 |
error=0; |
|
2554 |
else
|
|
2555 |
{
|
|
2556 |
VOID(fprintf(stderr, "%s: Got error %d reading records\n", |
|
2557 |
my_progname, error)); |
|
2558 |
}
|
|
2559 |
if (verbose >= 2) |
|
152
by Brian Aker
longlong replacement |
2560 |
VOID(printf("wrote %s records.\n", llstr((int64_t) record_count, llbuf))); |
1
by brian
clean slate |
2561 |
|
2562 |
my_afree((uchar*) record); |
|
2563 |
mrg->ref_length=max_pack_length; |
|
2564 |
mrg->min_pack_length=max_record_length ? min_record_length : 0; |
|
2565 |
mrg->max_pack_length=max_record_length; |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
2566 |
return(error || error_on_write || flush_buffer(~(ulong) 0)); |
1
by brian
clean slate |
2567 |
}
|
2568 |
||
2569 |
||
2570 |
static char *make_new_name(char *new_name, char *old_name) |
|
2571 |
{
|
|
2572 |
return fn_format(new_name,old_name,"",DATA_TMP_EXT,2+4); |
|
2573 |
}
|
|
2574 |
||
2575 |
static char *make_old_name(char *new_name, char *old_name) |
|
2576 |
{
|
|
2577 |
return fn_format(new_name,old_name,"",OLD_EXT,2+4); |
|
2578 |
}
|
|
2579 |
||
2580 |
/* rutines for bit writing buffer */
|
|
2581 |
||
154
by Brian Aker
Removed oddball types in my_global.h |
2582 |
static void init_file_buffer(File file, bool read_buffer) |
1
by brian
clean slate |
2583 |
{
|
2584 |
file_buffer.file=file; |
|
2585 |
file_buffer.buffer= (uchar*) my_malloc(ALIGN_SIZE(RECORD_CACHE_SIZE), |
|
2586 |
MYF(MY_WME)); |
|
2587 |
file_buffer.end=file_buffer.buffer+ALIGN_SIZE(RECORD_CACHE_SIZE)-8; |
|
2588 |
file_buffer.pos_in_file=0; |
|
2589 |
error_on_write=0; |
|
2590 |
if (read_buffer) |
|
2591 |
{
|
|
2592 |
||
2593 |
file_buffer.pos=file_buffer.end; |
|
2594 |
file_buffer.bits=0; |
|
2595 |
}
|
|
2596 |
else
|
|
2597 |
{
|
|
2598 |
file_buffer.pos=file_buffer.buffer; |
|
2599 |
file_buffer.bits=BITS_SAVED; |
|
2600 |
}
|
|
2601 |
file_buffer.bitbucket= 0; |
|
2602 |
}
|
|
2603 |
||
2604 |
||
2605 |
static int flush_buffer(ulong neaded_length) |
|
2606 |
{
|
|
2607 |
ulong length; |
|
2608 |
||
2609 |
/*
|
|
2610 |
file_buffer.end is 8 bytes lower than the real end of the buffer.
|
|
2611 |
This is done so that the end-of-buffer condition does not need to be
|
|
2612 |
checked for every byte (see write_bits()). Consequently,
|
|
2613 |
file_buffer.pos can become greater than file_buffer.end. The
|
|
2614 |
algorithms in the other functions ensure that there will never be
|
|
2615 |
more than 8 bytes written to the buffer without an end-of-buffer
|
|
2616 |
check. So the buffer cannot be overrun. But we need to check for the
|
|
2617 |
near-to-buffer-end condition to avoid a negative result, which is
|
|
2618 |
casted to unsigned and thus becomes giant.
|
|
2619 |
*/
|
|
2620 |
if ((file_buffer.pos < file_buffer.end) && |
|
2621 |
((ulong) (file_buffer.end - file_buffer.pos) > neaded_length)) |
|
2622 |
return 0; |
|
2623 |
length=(ulong) (file_buffer.pos-file_buffer.buffer); |
|
2624 |
file_buffer.pos=file_buffer.buffer; |
|
2625 |
file_buffer.pos_in_file+=length; |
|
2626 |
if (test_only) |
|
2627 |
return 0; |
|
2628 |
if (error_on_write|| my_write(file_buffer.file, |
|
2629 |
(const uchar*) file_buffer.buffer, |
|
2630 |
length, |
|
2631 |
MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL))) |
|
2632 |
{
|
|
2633 |
error_on_write=1; |
|
2634 |
return 1; |
|
2635 |
}
|
|
2636 |
||
2637 |
if (neaded_length != ~(ulong) 0 && |
|
2638 |
(ulong) (file_buffer.end-file_buffer.buffer) < neaded_length) |
|
2639 |
{
|
|
2640 |
char *tmp; |
|
2641 |
neaded_length+=256; /* some margin */ |
|
2642 |
tmp= my_realloc((char*) file_buffer.buffer, neaded_length,MYF(MY_WME)); |
|
2643 |
if (!tmp) |
|
2644 |
return 1; |
|
2645 |
file_buffer.pos= ((uchar*) tmp + |
|
2646 |
(ulong) (file_buffer.pos - file_buffer.buffer)); |
|
2647 |
file_buffer.buffer= (uchar*) tmp; |
|
2648 |
file_buffer.end= (uchar*) (tmp+neaded_length-8); |
|
2649 |
}
|
|
2650 |
return 0; |
|
2651 |
}
|
|
2652 |
||
2653 |
||
2654 |
static void end_file_buffer(void) |
|
2655 |
{
|
|
2656 |
my_free((uchar*) file_buffer.buffer,MYF(0)); |
|
2657 |
}
|
|
2658 |
||
2659 |
/* output `bits` low bits of `value' */
|
|
2660 |
||
151
by Brian Aker
Ulonglong to uint64_t |
2661 |
static void write_bits(register uint64_t value, register uint bits) |
1
by brian
clean slate |
2662 |
{
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
2663 |
assert(((bits < 8 * sizeof(value)) && ! (value >> bits)) || |
1
by brian
clean slate |
2664 |
(bits == 8 * sizeof(value))); |
2665 |
||
2666 |
if ((file_buffer.bits-= (int) bits) >= 0) |
|
2667 |
{
|
|
2668 |
file_buffer.bitbucket|= value << file_buffer.bits; |
|
2669 |
}
|
|
2670 |
else
|
|
2671 |
{
|
|
151
by Brian Aker
Ulonglong to uint64_t |
2672 |
register uint64_t bit_buffer; |
1
by brian
clean slate |
2673 |
bits= (uint) -file_buffer.bits; |
2674 |
bit_buffer= (file_buffer.bitbucket | |
|
2675 |
((bits != 8 * sizeof(value)) ? (value >> bits) : 0)); |
|
2676 |
#if BITS_SAVED == 64
|
|
2677 |
*file_buffer.pos++= (uchar) (bit_buffer >> 56); |
|
2678 |
*file_buffer.pos++= (uchar) (bit_buffer >> 48); |
|
2679 |
*file_buffer.pos++= (uchar) (bit_buffer >> 40); |
|
2680 |
*file_buffer.pos++= (uchar) (bit_buffer >> 32); |
|
2681 |
#endif
|
|
2682 |
*file_buffer.pos++= (uchar) (bit_buffer >> 24); |
|
2683 |
*file_buffer.pos++= (uchar) (bit_buffer >> 16); |
|
2684 |
*file_buffer.pos++= (uchar) (bit_buffer >> 8); |
|
2685 |
*file_buffer.pos++= (uchar) (bit_buffer); |
|
2686 |
||
2687 |
if (bits != 8 * sizeof(value)) |
|
151
by Brian Aker
Ulonglong to uint64_t |
2688 |
value&= (((uint64_t) 1) << bits) - 1; |
1
by brian
clean slate |
2689 |
if (file_buffer.pos >= file_buffer.end) |
2690 |
VOID(flush_buffer(~ (ulong) 0)); |
|
2691 |
file_buffer.bits=(int) (BITS_SAVED - bits); |
|
2692 |
file_buffer.bitbucket= value << (BITS_SAVED - bits); |
|
2693 |
}
|
|
2694 |
return; |
|
2695 |
}
|
|
2696 |
||
2697 |
/* Flush bits in bit_buffer to buffer */
|
|
2698 |
||
2699 |
static void flush_bits(void) |
|
2700 |
{
|
|
2701 |
int bits; |
|
151
by Brian Aker
Ulonglong to uint64_t |
2702 |
uint64_t bit_buffer; |
1
by brian
clean slate |
2703 |
|
2704 |
bits= file_buffer.bits & ~7; |
|
2705 |
bit_buffer= file_buffer.bitbucket >> bits; |
|
2706 |
bits= BITS_SAVED - bits; |
|
2707 |
while (bits > 0) |
|
2708 |
{
|
|
2709 |
bits-= 8; |
|
2710 |
*file_buffer.pos++= (uchar) (bit_buffer >> bits); |
|
2711 |
}
|
|
2712 |
if (file_buffer.pos >= file_buffer.end) |
|
2713 |
VOID(flush_buffer(~ (ulong) 0)); |
|
2714 |
file_buffer.bits= BITS_SAVED; |
|
2715 |
file_buffer.bitbucket= 0; |
|
2716 |
}
|
|
2717 |
||
2718 |
||
2719 |
/****************************************************************************
|
|
2720 |
** functions to handle the joined files
|
|
2721 |
****************************************************************************/
|
|
2722 |
||
2723 |
static int save_state(MI_INFO *isam_file,PACK_MRG_INFO *mrg,my_off_t new_length, |
|
2724 |
ha_checksum crc) |
|
2725 |
{
|
|
2726 |
MYISAM_SHARE *share=isam_file->s; |
|
2727 |
uint options=mi_uint2korr(share->state.header.options); |
|
2728 |
uint key; |
|
2729 |
||
2730 |
options|= HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA; |
|
2731 |
mi_int2store(share->state.header.options,options); |
|
2732 |
||
2733 |
share->state.state.data_file_length=new_length; |
|
2734 |
share->state.state.del=0; |
|
2735 |
share->state.state.empty=0; |
|
2736 |
share->state.dellink= HA_OFFSET_ERROR; |
|
2737 |
share->state.split=(ha_rows) mrg->records; |
|
2738 |
share->state.version=(ulong) time((time_t*) 0); |
|
2739 |
if (! mi_is_all_keys_active(share->state.key_map, share->base.keys)) |
|
2740 |
{
|
|
2741 |
/*
|
|
2742 |
Some indexes are disabled, cannot use current key_file_length value
|
|
2743 |
as an estimate of upper bound of index file size. Use packed data file
|
|
2744 |
size instead.
|
|
2745 |
*/
|
|
2746 |
share->state.state.key_file_length= new_length; |
|
2747 |
}
|
|
2748 |
/*
|
|
2749 |
If there are no disabled indexes, keep key_file_length value from
|
|
2750 |
original file so "myisamchk -rq" can use this value (this is necessary
|
|
2751 |
because index size cannot be easily calculated for fulltext keys)
|
|
2752 |
*/
|
|
2753 |
mi_clear_all_keys_active(share->state.key_map); |
|
2754 |
for (key=0 ; key < share->base.keys ; key++) |
|
2755 |
share->state.key_root[key]= HA_OFFSET_ERROR; |
|
2756 |
for (key=0 ; key < share->state.header.max_block_size_index ; key++) |
|
2757 |
share->state.key_del[key]= HA_OFFSET_ERROR; |
|
2758 |
isam_file->state->checksum=crc; /* Save crc here */ |
|
2759 |
share->changed=1; /* Force write of header */ |
|
2760 |
share->state.open_count=0; |
|
2761 |
share->global_changed=0; |
|
30
by Brian Aker
Large file and ftruncate() support |
2762 |
(void)ftruncate(share->kfile, share->base.keystart); |
1
by brian
clean slate |
2763 |
if (share->base.keys) |
2764 |
isamchk_neaded=1; |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
2765 |
return(mi_state_info_write(share->kfile,&share->state,1+2)); |
1
by brian
clean slate |
2766 |
}
|
2767 |
||
2768 |
||
2769 |
static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length, |
|
2770 |
ha_checksum crc) |
|
2771 |
{
|
|
2772 |
MI_STATE_INFO state; |
|
2773 |
MI_INFO *isam_file=mrg->file[0]; |
|
2774 |
uint options; |
|
2775 |
||
2776 |
state= isam_file->s->state; |
|
2777 |
options= (mi_uint2korr(state.header.options) | HA_OPTION_COMPRESS_RECORD | |
|
2778 |
HA_OPTION_READ_ONLY_DATA); |
|
2779 |
mi_int2store(state.header.options,options); |
|
2780 |
state.state.data_file_length=new_length; |
|
2781 |
state.state.del=0; |
|
2782 |
state.state.empty=0; |
|
2783 |
state.state.records=state.split=(ha_rows) mrg->records; |
|
2784 |
/* See comment above in save_state about key_file_length handling. */
|
|
2785 |
if (mrg->src_file_has_indexes_disabled) |
|
2786 |
{
|
|
2787 |
isam_file->s->state.state.key_file_length= |
|
2788 |
max(isam_file->s->state.state.key_file_length, new_length); |
|
2789 |
}
|
|
2790 |
state.dellink= HA_OFFSET_ERROR; |
|
2791 |
state.version=(ulong) time((time_t*) 0); |
|
2792 |
mi_clear_all_keys_active(state.key_map); |
|
2793 |
state.state.checksum=crc; |
|
2794 |
if (isam_file->s->base.keys) |
|
2795 |
isamchk_neaded=1; |
|
2796 |
state.changed=STATE_CHANGED | STATE_NOT_ANALYZED; /* Force check of table */ |
|
51.1.84
by Jay Pipes
1st round removing DBUG_XXX symbols. Round 2 is custom debugging code layer... |
2797 |
return (mi_state_info_write(file,&state,1+2)); |
1
by brian
clean slate |
2798 |
}
|
2799 |
||
2800 |
||
2801 |
/* reset for mrg_rrnd */
|
|
2802 |
||
2803 |
static void mrg_reset(PACK_MRG_INFO *mrg) |
|
2804 |
{
|
|
2805 |
if (mrg->current) |
|
2806 |
{
|
|
2807 |
mi_extra(*mrg->current, HA_EXTRA_NO_CACHE, 0); |
|
2808 |
mrg->current=0; |
|
2809 |
}
|
|
2810 |
}
|
|
2811 |
||
2812 |
static int mrg_rrnd(PACK_MRG_INFO *info,uchar *buf) |
|
2813 |
{
|
|
2814 |
int error; |
|
2815 |
MI_INFO *isam_info; |
|
2816 |
my_off_t filepos; |
|
2817 |
||
2818 |
if (!info->current) |
|
2819 |
{
|
|
2820 |
isam_info= *(info->current=info->file); |
|
2821 |
info->end=info->current+info->count; |
|
2822 |
mi_reset(isam_info); |
|
2823 |
mi_extra(isam_info, HA_EXTRA_CACHE, 0); |
|
2824 |
filepos=isam_info->s->pack.header_length; |
|
2825 |
}
|
|
2826 |
else
|
|
2827 |
{
|
|
2828 |
isam_info= *info->current; |
|
2829 |
filepos= isam_info->nextpos; |
|
2830 |
}
|
|
2831 |
||
2832 |
for (;;) |
|
2833 |
{
|
|
2834 |
isam_info->update&= HA_STATE_CHANGED; |
|
2835 |
if (!(error=(*isam_info->s->read_rnd)(isam_info,(uchar*) buf, |
|
2836 |
filepos, 1)) || |
|
2837 |
error != HA_ERR_END_OF_FILE) |
|
2838 |
return (error); |
|
2839 |
mi_extra(isam_info,HA_EXTRA_NO_CACHE, 0); |
|
2840 |
if (info->current+1 == info->end) |
|
2841 |
return(HA_ERR_END_OF_FILE); |
|
2842 |
info->current++; |
|
2843 |
isam_info= *info->current; |
|
2844 |
filepos=isam_info->s->pack.header_length; |
|
2845 |
mi_reset(isam_info); |
|
2846 |
mi_extra(isam_info,HA_EXTRA_CACHE, 0); |
|
2847 |
}
|
|
2848 |
}
|
|
2849 |
||
2850 |
||
2851 |
static int mrg_close(PACK_MRG_INFO *mrg) |
|
2852 |
{
|
|
2853 |
uint i; |
|
2854 |
int error=0; |
|
2855 |
for (i=0 ; i < mrg->count ; i++) |
|
2856 |
error|=mi_close(mrg->file[i]); |
|
2857 |
if (mrg->free_file) |
|
2858 |
my_free((uchar*) mrg->file,MYF(0)); |
|
2859 |
return error; |
|
2860 |
}
|