~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2001 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/*
17
  Function to handle quick removal of duplicates
18
  This code is used when doing multi-table deletes to find the rows in
19
  reference tables that needs to be deleted.
20
21
  The basic idea is as follows:
22
23
  Store first all strings in a binary tree, ignoring duplicates.
24
  When the tree uses more memory than 'max_heap_table_size',
25
  write the tree (in sorted order) out to disk and start with a new tree.
26
  When all data has been generated, merge the trees (removing any found
27
  duplicates).
28
29
  The unique entries will be returned in sort order, to ensure that we do the
30
  deletes in disk order.
31
*/
32
243.1.17 by Jay Pipes
FINAL PHASE removal of mysql_priv.h (Bye, bye my friend.)
33
#include <drizzled/server_includes.h>
572.1.4 by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf.
34
#include <drizzled/sql_sort.h>
584.1.15 by Monty Taylor
The mega-patch from hell. Renamed sql_class to session (since that's what it is) and removed it and field and table from common_includes.
35
#include <drizzled/session.h>
572.1.4 by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf.
36
#include CMATH_H
37
38
#if defined(CMATH_NAMESPACE)
39
using namespace CMATH_NAMESPACE;
40
#endif
1 by brian
clean slate
41
42
654 by Brian Aker
Remove unused (yet more)
43
int unique_write_to_file(unsigned char* key, element_count,
77.1.45 by Monty Taylor
Warning fixes.
44
                         Unique *unique)
1 by brian
clean slate
45
{
46
  /*
47
    Use unique->size (size of element stored in the tree) and not
48
    unique->tree.size_of_element. The latter is different from unique->size
49
    when tree implementation chooses to store pointer to key in TREE_ELEMENT
50
    (instead of storing the element itself there)
51
  */
52
  return my_b_write(&unique->file, key, unique->size) ? 1 : 0;
53
}
54
481 by Brian Aker
Remove all of uchar.
55
int unique_write_to_ptrs(unsigned char* key,
654 by Brian Aker
Remove unused (yet more)
56
                         element_count, Unique *unique)
1 by brian
clean slate
57
{
58
  memcpy(unique->record_pointers, key, unique->size);
59
  unique->record_pointers+=unique->size;
60
  return 0;
61
}
62
63
Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
64
	       uint32_t size_arg, size_t max_in_memory_size_arg)
1 by brian
clean slate
65
  :max_in_memory_size(max_in_memory_size_arg), size(size_arg), elements(0)
66
{
67
  my_b_clear(&file);
68
  init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, 0,
69
            NULL, comp_func_fixed_arg);
70
  /* If the following fail's the next add will also fail */
71
  my_init_dynamic_array(&file_ptrs, sizeof(BUFFPEK), 16, 16);
72
  /*
73
    If you change the following, change it in get_max_elements function, too.
74
  */
75
  max_elements= (ulong) (max_in_memory_size /
76
                         ALIGN_SIZE(sizeof(TREE_ELEMENT)+size));
575.4.3 by ysano
Rename mysql to drizzle.
77
  open_cached_file(&file, drizzle_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE,
398.1.10 by Monty Taylor
Actually removed VOID() this time.
78
                   MYF(MY_WME));
1 by brian
clean slate
79
}
80
81
82
/*
83
  Calculate log2(n!)
84
85
  NOTES
86
    Stirling's approximate formula is used:
87
88
      n! ~= sqrt(2*M_PI*n) * (n/M_E)^n
89
90
    Derivation of formula used for calculations is as follows:
91
92
    log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
93
94
      = (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
95
*/
96
97
inline double log2_n_fact(double x)
98
{
99
  return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2;
100
}
101
102
103
/*
104
  Calculate cost of merge_buffers function call for given sequence of
105
  input stream lengths and store the number of rows in result stream in *last.
106
107
  SYNOPSIS
108
    get_merge_buffers_cost()
109
      buff_elems  Array of #s of elements in buffers
110
      elem_size   Size of element stored in buffer
111
      first       Pointer to first merged element size
112
      last        Pointer to last merged element size
113
114
  RETURN
115
    Cost of merge_buffers operation in disk seeks.
116
117
  NOTES
118
    It is assumed that no rows are eliminated during merge.
119
    The cost is calculated as
120
121
      cost(read_and_write) + cost(merge_comparisons).
122
123
    All bytes in the sequences is read and written back during merge so cost
124
    of disk io is 2*elem_size*total_buf_elems/IO_SIZE (2 is for read + write)
125
126
    For comparisons cost calculations we assume that all merged sequences have
127
    the same length, so each of total_buf_size elements will be added to a sort
128
    heap with (n_buffers-1) elements. This gives the comparison cost:
129
130
      total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID;
131
*/
132
654 by Brian Aker
Remove unused (yet more)
133
static double get_merge_buffers_cost(uint32_t *, uint32_t elem_size,
482 by Brian Aker
Remove uint.
134
                                     uint32_t *first, uint32_t *last)
1 by brian
clean slate
135
{
482 by Brian Aker
Remove uint.
136
  uint32_t total_buf_elems= 0;
137
  for (uint32_t *pbuf= first; pbuf <= last; pbuf++)
1 by brian
clean slate
138
    total_buf_elems+= *pbuf;
139
  *last= total_buf_elems;
140
141
  int n_buffers= last - first + 1;
142
143
  /* Using log2(n)=log(n)/log(2) formula */
144
  return 2*((double)total_buf_elems*elem_size) / IO_SIZE +
145
     total_buf_elems*log((double) n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2);
146
}
147
148
149
/*
150
  Calculate cost of merging buffers into one in Unique::get, i.e. calculate
151
  how long (in terms of disk seeks) the two calls
152
    merge_many_buffs(...);
153
    merge_buffers(...);
154
  will take.
155
156
  SYNOPSIS
157
    get_merge_many_buffs_cost()
158
      buffer        buffer space for temporary data, at least
159
                    Unique::get_cost_calc_buff_size bytes
160
      maxbuffer     # of full buffers
161
      max_n_elems   # of elements in first maxbuffer buffers
162
      last_n_elems  # of elements in last buffer
163
      elem_size     size of buffer element
164
165
  NOTES
166
    maxbuffer+1 buffers are merged, where first maxbuffer buffers contain
167
    max_n_elems elements each and last buffer contains last_n_elems elements.
168
169
    The current implementation does a dumb simulation of merge_many_buffs
170
    function actions.
171
172
  RETURN
173
    Cost of merge in disk seeks.
174
*/
175
482 by Brian Aker
Remove uint.
176
static double get_merge_many_buffs_cost(uint32_t *buffer,
177
                                        uint32_t maxbuffer, uint32_t max_n_elems,
178
                                        uint32_t last_n_elems, int elem_size)
1 by brian
clean slate
179
{
180
  register int i;
181
  double total_cost= 0.0;
482 by Brian Aker
Remove uint.
182
  uint32_t *buff_elems= buffer; /* #s of elements in each of merged sequences */
1 by brian
clean slate
183
184
  /*
185
    Set initial state: first maxbuffer sequences contain max_n_elems elements
186
    each, last sequence contains last_n_elems elements.
187
  */
188
  for (i = 0; i < (int)maxbuffer; i++)
189
    buff_elems[i]= max_n_elems;
190
  buff_elems[maxbuffer]= last_n_elems;
191
192
  /*
193
    Do it exactly as merge_many_buff function does, calling
194
    get_merge_buffers_cost to get cost of merge_buffers.
195
  */
196
  if (maxbuffer >= MERGEBUFF2)
197
  {
198
    while (maxbuffer >= MERGEBUFF2)
199
    {
482 by Brian Aker
Remove uint.
200
      uint32_t lastbuff= 0;
1 by brian
clean slate
201
      for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF)
202
      {
203
        total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
204
                                           buff_elems + i,
205
                                           buff_elems + i + MERGEBUFF-1);
206
	lastbuff++;
207
      }
208
      total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
209
                                         buff_elems + i,
210
                                         buff_elems + maxbuffer);
211
      maxbuffer= lastbuff;
212
    }
213
  }
214
215
  /* Simulate final merge_buff call. */
216
  total_cost += get_merge_buffers_cost(buff_elems, elem_size,
217
                                       buff_elems, buff_elems + maxbuffer);
218
  return total_cost;
219
}
220
221
222
/*
223
  Calculate cost of using Unique for processing nkeys elements of size
224
  key_size using max_in_memory_size memory.
225
226
  SYNOPSIS
227
    Unique::get_use_cost()
228
      buffer    space for temporary data, use Unique::get_cost_calc_buff_size
229
                to get # bytes needed.
230
      nkeys     #of elements in Unique
231
      key_size  size of each elements in bytes
232
      max_in_memory_size amount of memory Unique will be allowed to use
233
234
  RETURN
235
    Cost in disk seeks.
236
237
  NOTES
238
    cost(using_unqiue) =
239
      cost(create_trees) +  (see #1)
240
      cost(merge) +         (see #2)
241
      cost(read_result)     (see #3)
242
243
    1. Cost of trees creation
244
      For each Unique::put operation there will be 2*log2(n+1) elements
245
      comparisons, where n runs from 1 tree_size (we assume that all added
246
      elements are different). Together this gives:
247
248
      n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
249
250
      then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
251
252
      Total cost of creating trees:
253
      (n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
254
255
      Approximate value of log2(N!) is calculated by log2_n_fact function.
256
257
    2. Cost of merging.
258
      If only one tree is created by Unique no merging will be necessary.
259
      Otherwise, we model execution of merge_many_buff function and count
260
      #of merges. (The reason behind this is that number of buffers is small,
261
      while size of buffers is big and we don't want to loose precision with
262
      O(x)-style formula)
263
264
    3. If only one tree is created by Unique no disk io will happen.
265
      Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
266
      these will be random seeks.
267
*/
268
482 by Brian Aker
Remove uint.
269
double Unique::get_use_cost(uint32_t *buffer, uint32_t nkeys, uint32_t key_size,
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
270
                            size_t max_in_memory_size)
1 by brian
clean slate
271
{
272
  ulong max_elements_in_tree;
273
  ulong last_tree_elems;
274
  int   n_full_trees; /* number of trees in unique - 1 */
275
  double result;
276
277
  max_elements_in_tree= ((ulong) max_in_memory_size /
278
                         ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
279
280
  n_full_trees=    nkeys / max_elements_in_tree;
281
  last_tree_elems= nkeys % max_elements_in_tree;
282
283
  /* Calculate cost of creating trees */
284
  result= 2*log2_n_fact(last_tree_elems + 1.0);
285
  if (n_full_trees)
286
    result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
287
  result /= TIME_FOR_COMPARE_ROWID;
288
289
290
  if (!n_full_trees)
291
    return result;
292
293
  /*
294
    There is more then one tree and merging is necessary.
295
    First, add cost of writing all trees to disk, assuming that all disk
296
    writes are sequential.
297
  */
298
  result += DISK_SEEK_BASE_COST * n_full_trees *
299
              ceil(((double) key_size)*max_elements_in_tree / IO_SIZE);
300
  result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE);
301
302
  /* Cost of merge */
303
  double merge_cost= get_merge_many_buffs_cost(buffer, n_full_trees,
304
                                               max_elements_in_tree,
305
                                               last_tree_elems, key_size);
306
  if (merge_cost < 0.0)
307
    return merge_cost;
308
309
  result += merge_cost;
310
  /*
311
    Add cost of reading the resulting sequence, assuming there were no
312
    duplicate elements.
313
  */
314
  result += ceil((double)key_size*nkeys/IO_SIZE);
315
316
  return result;
317
}
318
319
Unique::~Unique()
320
{
321
  close_cached_file(&file);
322
  delete_tree(&tree);
323
  delete_dynamic(&file_ptrs);
324
}
325
326
327
    /* Write tree to disk; clear tree */
328
bool Unique::flush()
329
{
330
  BUFFPEK file_ptr;
331
  elements+= tree.elements_in_tree;
332
  file_ptr.count=tree.elements_in_tree;
333
  file_ptr.file_pos=my_b_tell(&file);
334
335
  if (tree_walk(&tree, (tree_walk_action) unique_write_to_file,
336
		(void*) this, left_root_right) ||
481 by Brian Aker
Remove all of uchar.
337
      insert_dynamic(&file_ptrs, (unsigned char*) &file_ptr))
1 by brian
clean slate
338
    return 1;
339
  delete_tree(&tree);
340
  return 0;
341
}
342
343
344
/*
345
  Clear the tree and the file.
346
  You must call reset() if you want to reuse Unique after walk().
347
*/
348
349
void
350
Unique::reset()
351
{
352
  reset_tree(&tree);
353
  /*
354
    If elements != 0, some trees were stored in the file (see how
355
    flush() works). Note, that we can not count on my_b_tell(&file) == 0
356
    here, because it can return 0 right after walk(), and walk() does not
357
    reset any Unique member.
358
  */
359
  if (elements)
360
  {
361
    reset_dynamic(&file_ptrs);
362
    reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1);
363
  }
364
  elements= 0;
365
}
366
367
/*
368
  The comparison function, passed to queue_init() in merge_walk() and in
369
  merge_buffers() when the latter is called from Uniques::get() must
370
  use comparison function of Uniques::tree, but compare members of struct
371
  BUFFPEK.
372
*/
373
398.1.9 by Monty Taylor
Cleaned up stuff out of global.h.
374
#ifdef __cplusplus
375
extern "C" {
376
#endif
1 by brian
clean slate
377
481 by Brian Aker
Remove all of uchar.
378
static int buffpek_compare(void *arg, unsigned char *key_ptr1, unsigned char *key_ptr2)
1 by brian
clean slate
379
{
380
  BUFFPEK_COMPARE_CONTEXT *ctx= (BUFFPEK_COMPARE_CONTEXT *) arg;
381
  return ctx->key_compare(ctx->key_compare_arg,
481 by Brian Aker
Remove all of uchar.
382
                          *((unsigned char **) key_ptr1), *((unsigned char **)key_ptr2));
1 by brian
clean slate
383
}
384
398.1.9 by Monty Taylor
Cleaned up stuff out of global.h.
385
#ifdef __cplusplus
386
}
387
#endif
388
1 by brian
clean slate
389
390
391
/*
392
  DESCRIPTION
393
394
    Function is very similar to merge_buffers, but instead of writing sorted
395
    unique keys to the output file, it invokes walk_action for each key.
396
    This saves I/O if you need to pass through all unique keys only once.
397
398
  SYNOPSIS
399
    merge_walk()
400
  All params are 'IN' (but see comment for begin, end):
401
    merge_buffer       buffer to perform cached piece-by-piece loading
402
                       of trees; initially the buffer is empty
403
    merge_buffer_size  size of merge_buffer. Must be aligned with
404
                       key_length
405
    key_length         size of tree element; key_length * (end - begin)
406
                       must be less or equal than merge_buffer_size.
407
    begin              pointer to BUFFPEK struct for the first tree.
408
    end                pointer to BUFFPEK struct for the last tree;
409
                       end > begin and [begin, end) form a consecutive
410
                       range. BUFFPEKs structs in that range are used and
411
                       overwritten in merge_walk().
412
    walk_action        element visitor. Action is called for each unique
413
                       key.
414
    walk_action_arg    argument to walk action. Passed to it on each call.
415
    compare            elements comparison function
416
    compare_arg        comparison function argument
417
    file               file with all trees dumped. Trees in the file
418
                       must contain sorted unique values. Cache must be
419
                       initialized in read mode.
420
  RETURN VALUE
421
    0     ok
422
    <> 0  error
423
*/
424
481 by Brian Aker
Remove all of uchar.
425
static bool merge_walk(unsigned char *merge_buffer, ulong merge_buffer_size,
482 by Brian Aker
Remove uint.
426
                       uint32_t key_length, BUFFPEK *begin, BUFFPEK *end,
1 by brian
clean slate
427
                       tree_walk_action walk_action, void *walk_action_arg,
428
                       qsort_cmp2 compare, void *compare_arg,
429
                       IO_CACHE *file)
430
{
431
  BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
432
  QUEUE queue;
433
  if (end <= begin ||
434
      merge_buffer_size < (ulong) (key_length * (end - begin + 1)) ||
435
      init_queue(&queue, (uint) (end - begin), offsetof(BUFFPEK, key), 0,
436
                 buffpek_compare, &compare_context))
437
    return 1;
438
  /* we need space for one key when a piece of merge buffer is re-read */
439
  merge_buffer_size-= key_length;
481 by Brian Aker
Remove all of uchar.
440
  unsigned char *save_key_buff= merge_buffer + merge_buffer_size;
482 by Brian Aker
Remove uint.
441
  uint32_t max_key_count_per_piece= (uint) (merge_buffer_size/(end-begin) /
1 by brian
clean slate
442
                                        key_length);
443
  /* if piece_size is aligned reuse_freed_buffer will always hit */
482 by Brian Aker
Remove uint.
444
  uint32_t piece_size= max_key_count_per_piece * key_length;
445
  uint32_t bytes_read;               /* to hold return value of read_to_buffer */
1 by brian
clean slate
446
  BUFFPEK *top;
447
  int res= 1;
448
  /*
449
    Invariant: queue must contain top element from each tree, until a tree
450
    is not completely walked through.
451
    Here we're forcing the invariant, inserting one element from each tree
452
    to the queue.
453
  */
454
  for (top= begin; top != end; ++top)
455
  {
456
    top->base= merge_buffer + (top - begin) * piece_size;
457
    top->max_keys= max_key_count_per_piece;
458
    bytes_read= read_to_buffer(file, top, key_length);
459
    if (bytes_read == (uint) (-1))
460
      goto end;
51.1.73 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
461
    assert(bytes_read);
481 by Brian Aker
Remove all of uchar.
462
    queue_insert(&queue, (unsigned char *) top);
1 by brian
clean slate
463
  }
464
  top= (BUFFPEK *) queue_top(&queue);
465
  while (queue.elements > 1)
466
  {
467
    /*
468
      Every iteration one element is removed from the queue, and one is
469
      inserted by the rules of the invariant. If two adjacent elements on
470
      the top of the queue are not equal, biggest one is unique, because all
471
      elements in each tree are unique. Action is applied only to unique
472
      elements.
473
    */
474
    void *old_key= top->key;
475
    /*
476
      read next key from the cache or from the file and push it to the
477
      queue; this gives new top.
478
    */
479
    top->key+= key_length;
480
    if (--top->mem_count)
481
      queue_replaced(&queue);
482
    else /* next piece should be read */
483
    {
484
      /* save old_key not to overwrite it in read_to_buffer */
485
      memcpy(save_key_buff, old_key, key_length);
486
      old_key= save_key_buff;
487
      bytes_read= read_to_buffer(file, top, key_length);
488
      if (bytes_read == (uint) (-1))
489
        goto end;
490
      else if (bytes_read > 0)      /* top->key, top->mem_count are reset */
491
        queue_replaced(&queue);     /* in read_to_buffer */
492
      else
493
      {
494
        /*
495
          Tree for old 'top' element is empty: remove it from the queue and
496
          give all its memory to the nearest tree.
497
        */
498
        queue_remove(&queue, 0);
499
        reuse_freed_buff(&queue, top, key_length);
500
      }
501
    }
502
    top= (BUFFPEK *) queue_top(&queue);
503
    /* new top has been obtained; if old top is unique, apply the action */
504
    if (compare(compare_arg, old_key, top->key))
505
    {
506
      if (walk_action(old_key, 1, walk_action_arg))
507
        goto end;
508
    }
509
  }
510
  /*
511
    Applying walk_action to the tail of the last tree: this is safe because
512
    either we had only one tree in the beginning, either we work with the
513
    last tree in the queue.
514
  */
515
  do
516
  {
517
    do
518
    {
519
      if (walk_action(top->key, 1, walk_action_arg))
520
        goto end;
521
      top->key+= key_length;
522
    }
523
    while (--top->mem_count);
524
    bytes_read= read_to_buffer(file, top, key_length);
525
    if (bytes_read == (uint) (-1))
526
      goto end;
527
  }
528
  while (bytes_read);
529
  res= 0;
530
end:
531
  delete_queue(&queue);
532
  return res;
533
}
534
535
536
/*
537
  DESCRIPTION
538
    Walks consecutively through all unique elements:
539
    if all elements are in memory, then it simply invokes 'tree_walk', else
540
    all flushed trees are loaded to memory piece-by-piece, pieces are
541
    sorted, and action is called for each unique value.
542
    Note: so as merging resets file_ptrs state, this method can change
543
    internal Unique state to undefined: if you want to reuse Unique after
544
    walk() you must call reset() first!
545
  SYNOPSIS
546
    Unique:walk()
547
  All params are 'IN':
548
    action  function-visitor, typed in include/my_tree.h
549
            function is called for each unique element
550
    arg     argument for visitor, which is passed to it on each call
551
  RETURN VALUE
552
    0    OK
553
    <> 0 error
554
 */
555
556
bool Unique::walk(tree_walk_action action, void *walk_action_arg)
557
{
558
  int res;
481 by Brian Aker
Remove all of uchar.
559
  unsigned char *merge_buffer;
1 by brian
clean slate
560
561
  if (elements == 0)                       /* the whole tree is in memory */
562
    return tree_walk(&tree, action, walk_action_arg, left_root_right);
563
564
  /* flush current tree to the file to have some memory for merge buffer */
565
  if (flush())
566
    return 1;
567
  if (flush_io_cache(&file) || reinit_io_cache(&file, READ_CACHE, 0L, 0, 0))
568
    return 1;
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
569
  if (!(merge_buffer= (unsigned char *) malloc(max_in_memory_size)))
1 by brian
clean slate
570
    return 1;
571
  res= merge_walk(merge_buffer, (ulong) max_in_memory_size, size,
572
                  (BUFFPEK *) file_ptrs.buffer,
573
                  (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
574
                  action, walk_action_arg,
575
                  tree.compare, tree.custom_arg, &file);
477 by Monty Taylor
Removed my_free(). It turns out that it had been def'd to ignore the flags passed to it in the second arg anyway. Gotta love that.
576
  free((char*) merge_buffer);
1 by brian
clean slate
577
  return res;
578
}
579
580
/*
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
581
  Modify the Table element so that when one calls init_records()
1 by brian
clean slate
582
  the rows will be read in priority order.
583
*/
584
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
585
bool Unique::get(Table *table)
1 by brian
clean slate
586
{
587
  SORTPARAM sort_param;
588
  table->sort.found_records=elements+tree.elements_in_tree;
589
590
  if (my_b_tell(&file) == 0)
591
  {
592
    /* Whole tree is in memory;  Don't use disk if you don't need to */
481 by Brian Aker
Remove all of uchar.
593
    if ((record_pointers=table->sort.record_pointers= (unsigned char*)
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
594
	 malloc(size * tree.elements_in_tree)))
1 by brian
clean slate
595
    {
596
      (void) tree_walk(&tree, (tree_walk_action) unique_write_to_ptrs,
597
		       this, left_root_right);
598
      return 0;
599
    }
600
  }
601
  /* Not enough memory; Save the result to file && free memory used by tree */
602
  if (flush())
603
    return 1;
604
605
  IO_CACHE *outfile=table->sort.io_cache;
606
  BUFFPEK *file_ptr= (BUFFPEK*) file_ptrs.buffer;
482 by Brian Aker
Remove uint.
607
  uint32_t maxbuffer= file_ptrs.elements - 1;
481 by Brian Aker
Remove all of uchar.
608
  unsigned char *sort_buffer;
1 by brian
clean slate
609
  my_off_t save_pos;
610
  bool error=1;
611
612
      /* Open cached file if it isn't open */
684 by Brian Aker
Mass cleanup for casting.
613
  outfile=table->sort.io_cache= new IO_CACHE;
641.3.9 by Monty Taylor
More removal of my_malloc.
614
  memset(outfile, 0, sizeof(IO_CACHE));
1 by brian
clean slate
615
575.4.3 by ysano
Rename mysql to drizzle.
616
  if (!outfile || (! my_b_inited(outfile) && open_cached_file(outfile,drizzle_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER, MYF(MY_WME))))
1 by brian
clean slate
617
    return 1;
684 by Brian Aker
Mass cleanup for casting.
618
  reinit_io_cache(outfile, WRITE_CACHE, 0L, 0, 0);
1 by brian
clean slate
619
212.6.6 by Mats Kindahl
Removing redundant use of casts in drizzled/ for memcmp(), memcpy(), memset(), and memmove().
620
  memset(&sort_param, 0, sizeof(sort_param));
1 by brian
clean slate
621
  sort_param.max_rows= elements;
622
  sort_param.sort_form=table;
623
  sort_param.rec_length= sort_param.sort_length= sort_param.ref_length=
624
    size;
625
  sort_param.keys= (uint) (max_in_memory_size / sort_param.sort_length);
626
  sort_param.not_killable=1;
627
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
628
  if (!(sort_buffer=(unsigned char*) malloc((sort_param.keys+1) *
629
				            sort_param.sort_length)))
1 by brian
clean slate
630
    return 1;
631
  sort_param.unique_buff= sort_buffer+(sort_param.keys*
632
				       sort_param.sort_length);
633
634
  sort_param.compare= (qsort2_cmp) buffpek_compare;
635
  sort_param.cmp_context.key_compare= tree.compare;
636
  sort_param.cmp_context.key_compare_arg= tree.custom_arg;
637
638
  /* Merge the buffers to one file, removing duplicates */
639
  if (merge_many_buff(&sort_param,sort_buffer,file_ptr,&maxbuffer,&file))
640
    goto err;
641
  if (flush_io_cache(&file) ||
642
      reinit_io_cache(&file,READ_CACHE,0L,0,0))
643
    goto err;
644
  if (merge_buffers(&sort_param, &file, outfile, sort_buffer, file_ptr,
645
		    file_ptr, file_ptr+maxbuffer,0))
646
    goto err;
647
  error=0;
648
err:
460 by Monty Taylor
Removed x_free calls.
649
  if (sort_buffer)
650
    free(sort_buffer);
1 by brian
clean slate
651
  if (flush_io_cache(outfile))
652
    error=1;
653
654
  /* Setup io_cache for reading */
655
  save_pos=outfile->pos_in_file;
656
  if (reinit_io_cache(outfile,READ_CACHE,0L,0,0))
657
    error=1;
658
  outfile->end_of_file=save_pos;
659
  return error;
660
}