~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2001 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
1802.10.2 by Monty Taylor
Update all of the copyright headers to include the correct address.
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
1 by brian
clean slate
15
16
/*
17
  Function to handle quick removal of duplicates
18
  This code is used when doing multi-table deletes to find the rows in
19
  reference tables that needs to be deleted.
20
21
  The basic idea is as follows:
22
23
  Store first all strings in a binary tree, ignoring duplicates.
24
25
  The unique entries will be returned in sort order, to ensure that we do the
26
  deletes in disk order.
27
*/
28
1241.9.36 by Monty Taylor
ZOMG. I deleted drizzled/server_includes.h.
29
#include "config.h"
1241.9.1 by Monty Taylor
Removed global.h. Fixed all the headers.
30
31
#include <math.h>
32
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
33
#include <queue>
572.1.4 by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf.
34
1241.9.23 by Monty Taylor
Removed sql_table.h from server_includes.h.
35
#include "drizzled/sql_sort.h"
36
#include "drizzled/session.h"
37
#include "drizzled/sql_list.h"
1241.9.64 by Monty Taylor
Moved remaining non-public portions of mysys and mystrings to drizzled/internal.
38
#include "drizzled/internal/iocache.h"
1241.9.23 by Monty Taylor
Removed sql_table.h from server_includes.h.
39
572.1.4 by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf.
40
#if defined(CMATH_NAMESPACE)
41
using namespace CMATH_NAMESPACE;
42
#endif
1 by brian
clean slate
43
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
44
using namespace std;
45
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
46
namespace drizzled
47
{
1 by brian
clean slate
48
481 by Brian Aker
Remove all of uchar.
49
int unique_write_to_ptrs(unsigned char* key,
1241.9.46 by Monty Taylor
Removed some more evil.
50
                         uint32_t, Unique *unique)
1 by brian
clean slate
51
{
52
  memcpy(unique->record_pointers, key, unique->size);
53
  unique->record_pointers+=unique->size;
54
  return 0;
55
}
56
57
Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
58
	       uint32_t size_arg, size_t max_in_memory_size_arg)
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
59
  : max_in_memory_size(max_in_memory_size_arg),
60
    size(size_arg),
61
    elements(0)
1 by brian
clean slate
62
{
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
63
  // Second element is max size for memory use in unique sort
64
  init_tree(&tree, 0, 0, size, comp_func, false,
1 by brian
clean slate
65
            NULL, comp_func_fixed_arg);
66
}
67
68
69
/*
70
  Calculate log2(n!)
71
72
  NOTES
73
    Stirling's approximate formula is used:
74
75
      n! ~= sqrt(2*M_PI*n) * (n/M_E)^n
76
77
    Derivation of formula used for calculations is as follows:
78
79
    log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
80
81
      = (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
82
*/
83
84
inline double log2_n_fact(double x)
85
{
86
  return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2;
87
}
88
89
90
/*
91
  Calculate cost of using Unique for processing nkeys elements of size
92
  key_size using max_in_memory_size memory.
93
94
  SYNOPSIS
95
    Unique::get_use_cost()
96
      buffer    space for temporary data, use Unique::get_cost_calc_buff_size
97
                to get # bytes needed.
98
      nkeys     #of elements in Unique
99
      key_size  size of each elements in bytes
100
      max_in_memory_size amount of memory Unique will be allowed to use
101
102
  RETURN
103
    Cost in disk seeks.
104
105
  NOTES
106
    cost(using_unqiue) =
107
      cost(create_trees) +  (see #1)
108
      cost(merge) +         (see #2)
109
      cost(read_result)     (see #3)
110
111
    1. Cost of trees creation
112
      For each Unique::put operation there will be 2*log2(n+1) elements
113
      comparisons, where n runs from 1 tree_size (we assume that all added
114
      elements are different). Together this gives:
115
116
      n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
117
118
      then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
119
120
      Total cost of creating trees:
121
      (n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
122
123
      Approximate value of log2(N!) is calculated by log2_n_fact function.
124
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
125
       
126
      (The Next two are historical, we do all unique operations in memory or fail)
127
1 by brian
clean slate
128
    2. Cost of merging.
129
      If only one tree is created by Unique no merging will be necessary.
130
      Otherwise, we model execution of merge_many_buff function and count
131
      #of merges. (The reason behind this is that number of buffers is small,
132
      while size of buffers is big and we don't want to loose precision with
133
      O(x)-style formula)
134
135
    3. If only one tree is created by Unique no disk io will happen.
136
      Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
137
      these will be random seeks.
138
*/
139
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
140
double Unique::get_use_cost(uint32_t *, uint32_t nkeys, uint32_t key_size,
1891.2.1 by Monty Taylor
Fixed things to make things compile with clang
141
                            size_t max_in_memory_size_arg)
1 by brian
clean slate
142
{
143
  ulong max_elements_in_tree;
144
  ulong last_tree_elems;
145
  double result;
146
1891.2.1 by Monty Taylor
Fixed things to make things compile with clang
147
  max_elements_in_tree= ((ulong) max_in_memory_size_arg /
1 by brian
clean slate
148
                         ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
149
150
  last_tree_elems= nkeys % max_elements_in_tree;
151
152
  /* Calculate cost of creating trees */
153
  result= 2*log2_n_fact(last_tree_elems + 1.0);
154
  result /= TIME_FOR_COMPARE_ROWID;
155
156
  return result;
157
}
158
159
Unique::~Unique()
160
{
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
161
  delete_tree(&tree);
1 by brian
clean slate
162
}
163
164
165
/*
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
166
  Clear the tree.
1 by brian
clean slate
167
  You must call reset() if you want to reuse Unique after walk().
168
*/
169
170
void
171
Unique::reset()
172
{
173
  reset_tree(&tree);
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
174
  assert(elements == 0);
1 by brian
clean slate
175
}
176
177
178
/*
179
  DESCRIPTION
180
    Walks consecutively through all unique elements:
181
    if all elements are in memory, then it simply invokes 'tree_walk', else
182
    all flushed trees are loaded to memory piece-by-piece, pieces are
183
    sorted, and action is called for each unique value.
184
    Note: so as merging resets file_ptrs state, this method can change
185
    internal Unique state to undefined: if you want to reuse Unique after
186
    walk() you must call reset() first!
187
  SYNOPSIS
188
    Unique:walk()
189
  All params are 'IN':
1410.3.4 by Djellel E. Difallah
update references to old my_'s
190
    action  function-visitor, typed in include/tree.h
1 by brian
clean slate
191
            function is called for each unique element
192
    arg     argument for visitor, which is passed to it on each call
193
  RETURN VALUE
194
    0    OK
195
    <> 0 error
196
 */
197
198
bool Unique::walk(tree_walk_action action, void *walk_action_arg)
199
{
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
200
  return tree_walk(&tree, action, walk_action_arg, left_root_right);
1 by brian
clean slate
201
}
202
203
/*
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
204
  Modify the Table element so that when one calls init_records()
1 by brian
clean slate
205
  the rows will be read in priority order.
206
*/
207
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
208
bool Unique::get(Table *table)
1 by brian
clean slate
209
{
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
210
  table->sort.found_records= elements+tree.elements_in_tree;
1 by brian
clean slate
211
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
212
  if ((record_pointers=table->sort.record_pointers= (unsigned char*)
213
       malloc(size * tree.elements_in_tree)))
1 by brian
clean slate
214
  {
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
215
    (void) tree_walk(&tree, (tree_walk_action) unique_write_to_ptrs,
216
                     this, left_root_right);
217
    return 0;
1 by brian
clean slate
218
  }
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
219
  /* Not enough memory */
220
  return 1;
1 by brian
clean slate
221
}
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
222
223
} /* namespace drizzled */