~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2001 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/*
17
  Function to handle quick removal of duplicates
18
  This code is used when doing multi-table deletes to find the rows in
19
  reference tables that needs to be deleted.
20
21
  The basic idea is as follows:
22
23
  Store first all strings in a binary tree, ignoring duplicates.
24
  When the tree uses more memory than 'max_heap_table_size',
25
  write the tree (in sorted order) out to disk and start with a new tree.
26
  When all data has been generated, merge the trees (removing any found
27
  duplicates).
28
29
  The unique entries will be returned in sort order, to ensure that we do the
30
  deletes in disk order.
31
*/
32
1241.9.36 by Monty Taylor
ZOMG. I deleted drizzled/server_includes.h.
33
#include "config.h"
1241.9.1 by Monty Taylor
Removed global.h. Fixed all the headers.
34
35
#include <math.h>
36
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
37
#include <queue>
572.1.4 by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf.
38
1241.9.23 by Monty Taylor
Removed sql_table.h from server_includes.h.
39
#include "drizzled/sql_sort.h"
40
#include "drizzled/session.h"
41
#include "drizzled/sql_list.h"
1241.9.64 by Monty Taylor
Moved remaining non-public portions of mysys and mystrings to drizzled/internal.
42
#include "drizzled/internal/iocache.h"
1241.9.23 by Monty Taylor
Removed sql_table.h from server_includes.h.
43
572.1.4 by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf.
44
#if defined(CMATH_NAMESPACE)
45
using namespace CMATH_NAMESPACE;
46
#endif
1 by brian
clean slate
47
1253.1.6 by Monty Taylor
Moved mem_root functions into drizzled::memory:: namespace.
48
using namespace drizzled;
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
49
using namespace std;
50
1 by brian
clean slate
51
1241.9.46 by Monty Taylor
Removed some more evil.
52
int unique_write_to_file(unsigned char* key, uint32_t,
77.1.45 by Monty Taylor
Warning fixes.
53
                         Unique *unique)
1 by brian
clean slate
54
{
55
  /*
56
    Use unique->size (size of element stored in the tree) and not
57
    unique->tree.size_of_element. The latter is different from unique->size
58
    when tree implementation chooses to store pointer to key in TREE_ELEMENT
59
    (instead of storing the element itself there)
60
  */
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
61
  return my_b_write(unique->file, key, unique->size) ? 1 : 0;
1 by brian
clean slate
62
}
63
481 by Brian Aker
Remove all of uchar.
64
int unique_write_to_ptrs(unsigned char* key,
1241.9.46 by Monty Taylor
Removed some more evil.
65
                         uint32_t, Unique *unique)
1 by brian
clean slate
66
{
67
  memcpy(unique->record_pointers, key, unique->size);
68
  unique->record_pointers+=unique->size;
69
  return 0;
70
}
71
72
Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
73
	       uint32_t size_arg, size_t max_in_memory_size_arg)
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
74
  : max_in_memory_size(max_in_memory_size_arg),
1253.1.6 by Monty Taylor
Moved mem_root functions into drizzled::memory:: namespace.
75
    file(static_cast<IO_CACHE *>(memory::sql_calloc(sizeof(IO_CACHE)))),
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
76
    size(size_arg),
77
    elements(0)
1 by brian
clean slate
78
{
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
79
  my_b_clear(file);
1164 by Brian Aker
Small cleanup.
80
  init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, false,
1 by brian
clean slate
81
            NULL, comp_func_fixed_arg);
82
  /* If the following fail's the next add will also fail */
83
  my_init_dynamic_array(&file_ptrs, sizeof(BUFFPEK), 16, 16);
84
  /*
85
    If you change the following, change it in get_max_elements function, too.
86
  */
87
  max_elements= (ulong) (max_in_memory_size /
88
                         ALIGN_SIZE(sizeof(TREE_ELEMENT)+size));
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
89
  open_cached_file(file, drizzle_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE,
398.1.10 by Monty Taylor
Actually removed VOID() this time.
90
                   MYF(MY_WME));
1 by brian
clean slate
91
}
92
93
94
/*
95
  Calculate log2(n!)
96
97
  NOTES
98
    Stirling's approximate formula is used:
99
100
      n! ~= sqrt(2*M_PI*n) * (n/M_E)^n
101
102
    Derivation of formula used for calculations is as follows:
103
104
    log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
105
106
      = (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
107
*/
108
109
inline double log2_n_fact(double x)
110
{
111
  return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2;
112
}
113
114
115
/*
116
  Calculate cost of merge_buffers function call for given sequence of
117
  input stream lengths and store the number of rows in result stream in *last.
118
119
  SYNOPSIS
120
    get_merge_buffers_cost()
121
      buff_elems  Array of #s of elements in buffers
122
      elem_size   Size of element stored in buffer
123
      first       Pointer to first merged element size
124
      last        Pointer to last merged element size
125
126
  RETURN
127
    Cost of merge_buffers operation in disk seeks.
128
129
  NOTES
130
    It is assumed that no rows are eliminated during merge.
131
    The cost is calculated as
132
133
      cost(read_and_write) + cost(merge_comparisons).
134
135
    All bytes in the sequences is read and written back during merge so cost
136
    of disk io is 2*elem_size*total_buf_elems/IO_SIZE (2 is for read + write)
137
138
    For comparisons cost calculations we assume that all merged sequences have
139
    the same length, so each of total_buf_size elements will be added to a sort
140
    heap with (n_buffers-1) elements. This gives the comparison cost:
141
142
      total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID;
143
*/
144
654 by Brian Aker
Remove unused (yet more)
145
static double get_merge_buffers_cost(uint32_t *, uint32_t elem_size,
482 by Brian Aker
Remove uint.
146
                                     uint32_t *first, uint32_t *last)
1 by brian
clean slate
147
{
482 by Brian Aker
Remove uint.
148
  uint32_t total_buf_elems= 0;
149
  for (uint32_t *pbuf= first; pbuf <= last; pbuf++)
1 by brian
clean slate
150
    total_buf_elems+= *pbuf;
151
  *last= total_buf_elems;
152
153
  int n_buffers= last - first + 1;
154
155
  /* Using log2(n)=log(n)/log(2) formula */
156
  return 2*((double)total_buf_elems*elem_size) / IO_SIZE +
157
     total_buf_elems*log((double) n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2);
158
}
159
160
161
/*
162
  Calculate cost of merging buffers into one in Unique::get, i.e. calculate
163
  how long (in terms of disk seeks) the two calls
164
    merge_many_buffs(...);
165
    merge_buffers(...);
166
  will take.
167
168
  SYNOPSIS
169
    get_merge_many_buffs_cost()
170
      buffer        buffer space for temporary data, at least
171
                    Unique::get_cost_calc_buff_size bytes
172
      maxbuffer     # of full buffers
173
      max_n_elems   # of elements in first maxbuffer buffers
174
      last_n_elems  # of elements in last buffer
175
      elem_size     size of buffer element
176
177
  NOTES
178
    maxbuffer+1 buffers are merged, where first maxbuffer buffers contain
179
    max_n_elems elements each and last buffer contains last_n_elems elements.
180
181
    The current implementation does a dumb simulation of merge_many_buffs
182
    function actions.
183
184
  RETURN
185
    Cost of merge in disk seeks.
186
*/
187
482 by Brian Aker
Remove uint.
188
static double get_merge_many_buffs_cost(uint32_t *buffer,
189
                                        uint32_t maxbuffer, uint32_t max_n_elems,
190
                                        uint32_t last_n_elems, int elem_size)
1 by brian
clean slate
191
{
192
  register int i;
193
  double total_cost= 0.0;
482 by Brian Aker
Remove uint.
194
  uint32_t *buff_elems= buffer; /* #s of elements in each of merged sequences */
1 by brian
clean slate
195
196
  /*
197
    Set initial state: first maxbuffer sequences contain max_n_elems elements
198
    each, last sequence contains last_n_elems elements.
199
  */
200
  for (i = 0; i < (int)maxbuffer; i++)
201
    buff_elems[i]= max_n_elems;
202
  buff_elems[maxbuffer]= last_n_elems;
203
204
  /*
205
    Do it exactly as merge_many_buff function does, calling
206
    get_merge_buffers_cost to get cost of merge_buffers.
207
  */
208
  if (maxbuffer >= MERGEBUFF2)
209
  {
210
    while (maxbuffer >= MERGEBUFF2)
211
    {
482 by Brian Aker
Remove uint.
212
      uint32_t lastbuff= 0;
1 by brian
clean slate
213
      for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF)
214
      {
215
        total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
216
                                           buff_elems + i,
217
                                           buff_elems + i + MERGEBUFF-1);
218
	lastbuff++;
219
      }
220
      total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
221
                                         buff_elems + i,
222
                                         buff_elems + maxbuffer);
223
      maxbuffer= lastbuff;
224
    }
225
  }
226
227
  /* Simulate final merge_buff call. */
228
  total_cost += get_merge_buffers_cost(buff_elems, elem_size,
229
                                       buff_elems, buff_elems + maxbuffer);
230
  return total_cost;
231
}
232
233
234
/*
235
  Calculate cost of using Unique for processing nkeys elements of size
236
  key_size using max_in_memory_size memory.
237
238
  SYNOPSIS
239
    Unique::get_use_cost()
240
      buffer    space for temporary data, use Unique::get_cost_calc_buff_size
241
                to get # bytes needed.
242
      nkeys     #of elements in Unique
243
      key_size  size of each elements in bytes
244
      max_in_memory_size amount of memory Unique will be allowed to use
245
246
  RETURN
247
    Cost in disk seeks.
248
249
  NOTES
250
    cost(using_unqiue) =
251
      cost(create_trees) +  (see #1)
252
      cost(merge) +         (see #2)
253
      cost(read_result)     (see #3)
254
255
    1. Cost of trees creation
256
      For each Unique::put operation there will be 2*log2(n+1) elements
257
      comparisons, where n runs from 1 tree_size (we assume that all added
258
      elements are different). Together this gives:
259
260
      n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
261
262
      then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
263
264
      Total cost of creating trees:
265
      (n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
266
267
      Approximate value of log2(N!) is calculated by log2_n_fact function.
268
269
    2. Cost of merging.
270
      If only one tree is created by Unique no merging will be necessary.
271
      Otherwise, we model execution of merge_many_buff function and count
272
      #of merges. (The reason behind this is that number of buffers is small,
273
      while size of buffers is big and we don't want to loose precision with
274
      O(x)-style formula)
275
276
    3. If only one tree is created by Unique no disk io will happen.
277
      Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
278
      these will be random seeks.
279
*/
280
482 by Brian Aker
Remove uint.
281
double Unique::get_use_cost(uint32_t *buffer, uint32_t nkeys, uint32_t key_size,
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
282
                            size_t max_in_memory_size)
1 by brian
clean slate
283
{
284
  ulong max_elements_in_tree;
285
  ulong last_tree_elems;
286
  int   n_full_trees; /* number of trees in unique - 1 */
287
  double result;
288
289
  max_elements_in_tree= ((ulong) max_in_memory_size /
290
                         ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
291
292
  n_full_trees=    nkeys / max_elements_in_tree;
293
  last_tree_elems= nkeys % max_elements_in_tree;
294
295
  /* Calculate cost of creating trees */
296
  result= 2*log2_n_fact(last_tree_elems + 1.0);
297
  if (n_full_trees)
298
    result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
299
  result /= TIME_FOR_COMPARE_ROWID;
300
301
302
  if (!n_full_trees)
303
    return result;
304
305
  /*
306
    There is more then one tree and merging is necessary.
307
    First, add cost of writing all trees to disk, assuming that all disk
308
    writes are sequential.
309
  */
310
  result += DISK_SEEK_BASE_COST * n_full_trees *
311
              ceil(((double) key_size)*max_elements_in_tree / IO_SIZE);
312
  result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE);
313
314
  /* Cost of merge */
315
  double merge_cost= get_merge_many_buffs_cost(buffer, n_full_trees,
316
                                               max_elements_in_tree,
317
                                               last_tree_elems, key_size);
318
  if (merge_cost < 0.0)
319
    return merge_cost;
320
321
  result += merge_cost;
322
  /*
323
    Add cost of reading the resulting sequence, assuming there were no
324
    duplicate elements.
325
  */
326
  result += ceil((double)key_size*nkeys/IO_SIZE);
327
328
  return result;
329
}
330
331
Unique::~Unique()
332
{
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
333
  close_cached_file(file);
1 by brian
clean slate
334
  delete_tree(&tree);
335
  delete_dynamic(&file_ptrs);
336
}
337
338
339
    /* Write tree to disk; clear tree */
340
bool Unique::flush()
341
{
342
  BUFFPEK file_ptr;
343
  elements+= tree.elements_in_tree;
344
  file_ptr.count=tree.elements_in_tree;
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
345
  file_ptr.file_pos=my_b_tell(file);
1 by brian
clean slate
346
347
  if (tree_walk(&tree, (tree_walk_action) unique_write_to_file,
348
		(void*) this, left_root_right) ||
481 by Brian Aker
Remove all of uchar.
349
      insert_dynamic(&file_ptrs, (unsigned char*) &file_ptr))
1 by brian
clean slate
350
    return 1;
351
  delete_tree(&tree);
352
  return 0;
353
}
354
355
356
/*
357
  Clear the tree and the file.
358
  You must call reset() if you want to reuse Unique after walk().
359
*/
360
361
void
362
Unique::reset()
363
{
364
  reset_tree(&tree);
365
  /*
366
    If elements != 0, some trees were stored in the file (see how
367
    flush() works). Note, that we can not count on my_b_tell(&file) == 0
368
    here, because it can return 0 right after walk(), and walk() does not
369
    reset any Unique member.
370
  */
371
  if (elements)
372
  {
373
    reset_dynamic(&file_ptrs);
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
374
    reinit_io_cache(file, WRITE_CACHE, 0L, 0, 1);
1 by brian
clean slate
375
  }
376
  elements= 0;
377
}
378
379
/*
380
  The comparison function, passed to queue_init() in merge_walk() and in
381
  merge_buffers() when the latter is called from Uniques::get() must
382
  use comparison function of Uniques::tree, but compare members of struct
383
  BUFFPEK.
384
*/
385
398.1.9 by Monty Taylor
Cleaned up stuff out of global.h.
386
#ifdef __cplusplus
387
extern "C" {
388
#endif
1 by brian
clean slate
389
481 by Brian Aker
Remove all of uchar.
390
static int buffpek_compare(void *arg, unsigned char *key_ptr1, unsigned char *key_ptr2)
1 by brian
clean slate
391
{
392
  BUFFPEK_COMPARE_CONTEXT *ctx= (BUFFPEK_COMPARE_CONTEXT *) arg;
393
  return ctx->key_compare(ctx->key_compare_arg,
481 by Brian Aker
Remove all of uchar.
394
                          *((unsigned char **) key_ptr1), *((unsigned char **)key_ptr2));
1 by brian
clean slate
395
}
396
398.1.9 by Monty Taylor
Cleaned up stuff out of global.h.
397
#ifdef __cplusplus
398
}
399
#endif
400
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
401
/*
402
 The comparison function object, passed to a priority_queue in merge_walk()
403
 as its sort function parameter.
404
*/
1 by brian
clean slate
405
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
406
class buffpek_compare_functor
407
{
408
  qsort_cmp2 key_compare;
409
  void *key_compare_arg;
410
  public:
411
  buffpek_compare_functor(qsort_cmp2 in_key_compare, void *in_compare_arg)
412
    : key_compare(in_key_compare), key_compare_arg(in_compare_arg) { }
897.1.12 by Padraig
Making my function objects const correct.
413
  inline bool operator()(const BUFFPEK *i, const BUFFPEK *j)
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
414
  {
415
    return key_compare(key_compare_arg,
416
                    i->key, j->key);
417
  }
418
};
1 by brian
clean slate
419
420
/*
421
  DESCRIPTION
422
423
    Function is very similar to merge_buffers, but instead of writing sorted
424
    unique keys to the output file, it invokes walk_action for each key.
425
    This saves I/O if you need to pass through all unique keys only once.
426
427
  SYNOPSIS
428
    merge_walk()
429
  All params are 'IN' (but see comment for begin, end):
430
    merge_buffer       buffer to perform cached piece-by-piece loading
431
                       of trees; initially the buffer is empty
432
    merge_buffer_size  size of merge_buffer. Must be aligned with
433
                       key_length
434
    key_length         size of tree element; key_length * (end - begin)
435
                       must be less or equal than merge_buffer_size.
436
    begin              pointer to BUFFPEK struct for the first tree.
437
    end                pointer to BUFFPEK struct for the last tree;
438
                       end > begin and [begin, end) form a consecutive
439
                       range. BUFFPEKs structs in that range are used and
440
                       overwritten in merge_walk().
441
    walk_action        element visitor. Action is called for each unique
442
                       key.
443
    walk_action_arg    argument to walk action. Passed to it on each call.
444
    compare            elements comparison function
445
    compare_arg        comparison function argument
446
    file               file with all trees dumped. Trees in the file
447
                       must contain sorted unique values. Cache must be
448
                       initialized in read mode.
449
  RETURN VALUE
450
    0     ok
451
    <> 0  error
452
*/
453
481 by Brian Aker
Remove all of uchar.
454
static bool merge_walk(unsigned char *merge_buffer, ulong merge_buffer_size,
482 by Brian Aker
Remove uint.
455
                       uint32_t key_length, BUFFPEK *begin, BUFFPEK *end,
1 by brian
clean slate
456
                       tree_walk_action walk_action, void *walk_action_arg,
457
                       qsort_cmp2 compare, void *compare_arg,
458
                       IO_CACHE *file)
459
{
460
  if (end <= begin ||
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
461
      merge_buffer_size < (ulong) (key_length * (end - begin + 1))) 
1 by brian
clean slate
462
    return 1;
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
463
  priority_queue<BUFFPEK *, vector<BUFFPEK *>, buffpek_compare_functor >
464
    queue(buffpek_compare_functor(compare, compare_arg));
1 by brian
clean slate
465
  /* we need space for one key when a piece of merge buffer is re-read */
466
  merge_buffer_size-= key_length;
481 by Brian Aker
Remove all of uchar.
467
  unsigned char *save_key_buff= merge_buffer + merge_buffer_size;
895 by Brian Aker
Completion (?) of uint conversion.
468
  uint32_t max_key_count_per_piece= (uint32_t) (merge_buffer_size/(end-begin) /
1 by brian
clean slate
469
                                        key_length);
470
  /* if piece_size is aligned reuse_freed_buffer will always hit */
482 by Brian Aker
Remove uint.
471
  uint32_t piece_size= max_key_count_per_piece * key_length;
472
  uint32_t bytes_read;               /* to hold return value of read_to_buffer */
1 by brian
clean slate
473
  BUFFPEK *top;
474
  int res= 1;
475
  /*
476
    Invariant: queue must contain top element from each tree, until a tree
477
    is not completely walked through.
478
    Here we're forcing the invariant, inserting one element from each tree
479
    to the queue.
480
  */
481
  for (top= begin; top != end; ++top)
482
  {
483
    top->base= merge_buffer + (top - begin) * piece_size;
484
    top->max_keys= max_key_count_per_piece;
485
    bytes_read= read_to_buffer(file, top, key_length);
895 by Brian Aker
Completion (?) of uint conversion.
486
    if (bytes_read == (uint32_t) (-1))
1 by brian
clean slate
487
      goto end;
51.1.73 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
488
    assert(bytes_read);
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
489
    queue.push(top);
1 by brian
clean slate
490
  }
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
491
  top= queue.top();
492
  while (queue.size() > 1)
1 by brian
clean slate
493
  {
494
    /*
495
      Every iteration one element is removed from the queue, and one is
496
      inserted by the rules of the invariant. If two adjacent elements on
497
      the top of the queue are not equal, biggest one is unique, because all
498
      elements in each tree are unique. Action is applied only to unique
499
      elements.
500
    */
501
    void *old_key= top->key;
502
    /*
503
      read next key from the cache or from the file and push it to the
504
      queue; this gives new top.
505
    */
506
    top->key+= key_length;
507
    if (--top->mem_count)
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
508
    {
509
      queue.pop();
510
      queue.push(top);
511
    }
1 by brian
clean slate
512
    else /* next piece should be read */
513
    {
514
      /* save old_key not to overwrite it in read_to_buffer */
515
      memcpy(save_key_buff, old_key, key_length);
516
      old_key= save_key_buff;
517
      bytes_read= read_to_buffer(file, top, key_length);
895 by Brian Aker
Completion (?) of uint conversion.
518
      if (bytes_read == (uint32_t) (-1))
1 by brian
clean slate
519
        goto end;
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
520
      else if (bytes_read > 0) /* top->key, top->mem_count are reset */
521
      {                        /* in read_to_buffer */
522
        queue.pop();
523
        queue.push(top);
524
      }
1 by brian
clean slate
525
      else
526
      {
527
        /*
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
528
          Tree for old 'top' element is empty: remove it from the queue. 
1 by brian
clean slate
529
        */
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
530
        queue.pop();
1 by brian
clean slate
531
      }
532
    }
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
533
    top= queue.top();
1 by brian
clean slate
534
    /* new top has been obtained; if old top is unique, apply the action */
535
    if (compare(compare_arg, old_key, top->key))
536
    {
537
      if (walk_action(old_key, 1, walk_action_arg))
538
        goto end;
539
    }
540
  }
541
  /*
542
    Applying walk_action to the tail of the last tree: this is safe because
543
    either we had only one tree in the beginning, either we work with the
544
    last tree in the queue.
545
  */
546
  do
547
  {
548
    do
549
    {
550
      if (walk_action(top->key, 1, walk_action_arg))
551
        goto end;
552
      top->key+= key_length;
553
    }
554
    while (--top->mem_count);
555
    bytes_read= read_to_buffer(file, top, key_length);
895 by Brian Aker
Completion (?) of uint conversion.
556
    if (bytes_read == (uint32_t) (-1))
1 by brian
clean slate
557
      goto end;
558
  }
559
  while (bytes_read);
560
  res= 0;
561
end:
562
  return res;
563
}
564
565
566
/*
567
  DESCRIPTION
568
    Walks consecutively through all unique elements:
569
    if all elements are in memory, then it simply invokes 'tree_walk', else
570
    all flushed trees are loaded to memory piece-by-piece, pieces are
571
    sorted, and action is called for each unique value.
572
    Note: so as merging resets file_ptrs state, this method can change
573
    internal Unique state to undefined: if you want to reuse Unique after
574
    walk() you must call reset() first!
575
  SYNOPSIS
576
    Unique:walk()
577
  All params are 'IN':
578
    action  function-visitor, typed in include/my_tree.h
579
            function is called for each unique element
580
    arg     argument for visitor, which is passed to it on each call
581
  RETURN VALUE
582
    0    OK
583
    <> 0 error
584
 */
585
586
bool Unique::walk(tree_walk_action action, void *walk_action_arg)
587
{
588
  int res;
481 by Brian Aker
Remove all of uchar.
589
  unsigned char *merge_buffer;
1 by brian
clean slate
590
591
  if (elements == 0)                       /* the whole tree is in memory */
592
    return tree_walk(&tree, action, walk_action_arg, left_root_right);
593
594
  /* flush current tree to the file to have some memory for merge buffer */
595
  if (flush())
596
    return 1;
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
597
  if (flush_io_cache(file) || reinit_io_cache(file, READ_CACHE, 0L, 0, 0))
1 by brian
clean slate
598
    return 1;
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
599
  if (!(merge_buffer= (unsigned char *) malloc(max_in_memory_size)))
1 by brian
clean slate
600
    return 1;
601
  res= merge_walk(merge_buffer, (ulong) max_in_memory_size, size,
602
                  (BUFFPEK *) file_ptrs.buffer,
603
                  (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
604
                  action, walk_action_arg,
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
605
                  tree.compare, tree.custom_arg, file);
477 by Monty Taylor
Removed my_free(). It turns out that it had been def'd to ignore the flags passed to it in the second arg anyway. Gotta love that.
606
  free((char*) merge_buffer);
1 by brian
clean slate
607
  return res;
608
}
609
610
/*
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
611
  Modify the Table element so that when one calls init_records()
1 by brian
clean slate
612
  the rows will be read in priority order.
613
*/
614
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
615
bool Unique::get(Table *table)
1 by brian
clean slate
616
{
617
  SORTPARAM sort_param;
618
  table->sort.found_records=elements+tree.elements_in_tree;
619
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
620
  if (my_b_tell(file) == 0)
1 by brian
clean slate
621
  {
622
    /* Whole tree is in memory;  Don't use disk if you don't need to */
481 by Brian Aker
Remove all of uchar.
623
    if ((record_pointers=table->sort.record_pointers= (unsigned char*)
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
624
	 malloc(size * tree.elements_in_tree)))
1 by brian
clean slate
625
    {
626
      (void) tree_walk(&tree, (tree_walk_action) unique_write_to_ptrs,
627
		       this, left_root_right);
628
      return 0;
629
    }
630
  }
631
  /* Not enough memory; Save the result to file && free memory used by tree */
632
  if (flush())
633
    return 1;
634
635
  IO_CACHE *outfile=table->sort.io_cache;
636
  BUFFPEK *file_ptr= (BUFFPEK*) file_ptrs.buffer;
482 by Brian Aker
Remove uint.
637
  uint32_t maxbuffer= file_ptrs.elements - 1;
481 by Brian Aker
Remove all of uchar.
638
  unsigned char *sort_buffer;
1241.13.1 by Monty Taylor
Put my_off_t back... but this time localized only to myisam and mysys.
639
  my_off_t save_pos;
1 by brian
clean slate
640
  bool error=1;
641
642
      /* Open cached file if it isn't open */
684 by Brian Aker
Mass cleanup for casting.
643
  outfile=table->sort.io_cache= new IO_CACHE;
641.3.9 by Monty Taylor
More removal of my_malloc.
644
  memset(outfile, 0, sizeof(IO_CACHE));
1 by brian
clean slate
645
575.4.3 by ysano
Rename mysql to drizzle.
646
  if (!outfile || (! my_b_inited(outfile) && open_cached_file(outfile,drizzle_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER, MYF(MY_WME))))
1 by brian
clean slate
647
    return 1;
684 by Brian Aker
Mass cleanup for casting.
648
  reinit_io_cache(outfile, WRITE_CACHE, 0L, 0, 0);
1 by brian
clean slate
649
212.6.6 by Mats Kindahl
Removing redundant use of casts in drizzled/ for memcmp(), memcpy(), memset(), and memmove().
650
  memset(&sort_param, 0, sizeof(sort_param));
1 by brian
clean slate
651
  sort_param.max_rows= elements;
652
  sort_param.sort_form=table;
653
  sort_param.rec_length= sort_param.sort_length= sort_param.ref_length=
654
    size;
895 by Brian Aker
Completion (?) of uint conversion.
655
  sort_param.keys= (uint32_t) (max_in_memory_size / sort_param.sort_length);
1 by brian
clean slate
656
  sort_param.not_killable=1;
657
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
658
  if (!(sort_buffer=(unsigned char*) malloc((sort_param.keys+1) *
659
				            sort_param.sort_length)))
1 by brian
clean slate
660
    return 1;
661
  sort_param.unique_buff= sort_buffer+(sort_param.keys*
662
				       sort_param.sort_length);
663
664
  sort_param.compare= (qsort2_cmp) buffpek_compare;
665
  sort_param.cmp_context.key_compare= tree.compare;
666
  sort_param.cmp_context.key_compare_arg= tree.custom_arg;
667
668
  /* Merge the buffers to one file, removing duplicates */
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
669
  if (merge_many_buff(&sort_param,sort_buffer,file_ptr,&maxbuffer,file))
670
    goto err;
671
  if (flush_io_cache(file) ||
672
      reinit_io_cache(file,READ_CACHE,0L,0,0))
673
    goto err;
674
  if (merge_buffers(&sort_param, file, outfile, sort_buffer, file_ptr,
1 by brian
clean slate
675
		    file_ptr, file_ptr+maxbuffer,0))
676
    goto err;
677
  error=0;
678
err:
460 by Monty Taylor
Removed x_free calls.
679
  if (sort_buffer)
680
    free(sort_buffer);
1 by brian
clean slate
681
  if (flush_io_cache(outfile))
682
    error=1;
683
684
  /* Setup io_cache for reading */
685
  save_pos=outfile->pos_in_file;
686
  if (reinit_io_cache(outfile,READ_CACHE,0L,0,0))
687
    error=1;
688
  outfile->end_of_file=save_pos;
689
  return error;
690
}