~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2001 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
1802.10.2 by Monty Taylor
Update all of the copyright headers to include the correct address.
14
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
1 by brian
clean slate
15
16
/*
17
  Function to handle quick removal of duplicates
18
  This code is used when doing multi-table deletes to find the rows in
19
  reference tables that needs to be deleted.
20
21
  The basic idea is as follows:
22
23
  Store first all strings in a binary tree, ignoring duplicates.
24
25
  The unique entries will be returned in sort order, to ensure that we do the
26
  deletes in disk order.
27
*/
28
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
29
#include <config.h>
1241.9.1 by Monty Taylor
Removed global.h. Fixed all the headers.
30
31
#include <math.h>
32
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
33
#include <queue>
572.1.4 by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf.
34
2173.2.1 by Monty Taylor
Fixes incorrect usage of include
35
#include <drizzled/sql_sort.h>
36
#include <drizzled/session.h>
37
#include <drizzled/sql_list.h>
38
#include <drizzled/internal/iocache.h>
2154.2.24 by Brian Aker
Merge in all changes for current_session, etc.
39
#include <drizzled/unique.h>
2241.2.14 by Olaf van der Spek
Refactor
40
#include <drizzled/table.h>
1241.9.23 by Monty Taylor
Removed sql_table.h from server_includes.h.
41
572.1.4 by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf.
42
#if defined(CMATH_NAMESPACE)
43
using namespace CMATH_NAMESPACE;
44
#endif
1 by brian
clean slate
45
897.1.1 by Padraig
Replaced the instance of QUEUE in the merge_walk() function with
46
using namespace std;
47
2241.2.14 by Olaf van der Spek
Refactor
48
namespace drizzled {
1 by brian
clean slate
49
481 by Brian Aker
Remove all of uchar.
50
int unique_write_to_ptrs(unsigned char* key,
1241.9.46 by Monty Taylor
Removed some more evil.
51
                         uint32_t, Unique *unique)
1 by brian
clean slate
52
{
53
  memcpy(unique->record_pointers, key, unique->size);
54
  unique->record_pointers+=unique->size;
55
  return 0;
56
}
57
58
Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
641.3.8 by Monty Taylor
Removed my_malloc from drizzled.
59
	       uint32_t size_arg, size_t max_in_memory_size_arg)
1241.9.50 by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/
60
  : max_in_memory_size(max_in_memory_size_arg),
61
    size(size_arg),
62
    elements(0)
1 by brian
clean slate
63
{
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
64
  // Second element is max size for memory use in unique sort
2455.1.1 by Ivo Roper
refactoring of TREE and TREE_ELEMENT structs into Tree and Tree_Element classes
65
  tree.init_tree(0, 0, size, comp_func, false,
1 by brian
clean slate
66
            NULL, comp_func_fixed_arg);
67
}
68
69
70
/*
71
  Calculate log2(n!)
72
73
  NOTES
74
    Stirling's approximate formula is used:
75
76
      n! ~= sqrt(2*M_PI*n) * (n/M_E)^n
77
78
    Derivation of formula used for calculations is as follows:
79
80
    log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
81
82
      = (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
83
*/
84
85
inline double log2_n_fact(double x)
86
{
87
  return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2;
88
}
89
90
91
/*
92
  Calculate cost of using Unique for processing nkeys elements of size
93
  key_size using max_in_memory_size memory.
94
95
  SYNOPSIS
96
    Unique::get_use_cost()
97
      buffer    space for temporary data, use Unique::get_cost_calc_buff_size
98
                to get # bytes needed.
99
      nkeys     #of elements in Unique
100
      key_size  size of each elements in bytes
101
      max_in_memory_size amount of memory Unique will be allowed to use
102
103
  RETURN
104
    Cost in disk seeks.
105
106
  NOTES
107
    cost(using_unqiue) =
108
      cost(create_trees) +  (see #1)
109
      cost(merge) +         (see #2)
110
      cost(read_result)     (see #3)
111
112
    1. Cost of trees creation
113
      For each Unique::put operation there will be 2*log2(n+1) elements
114
      comparisons, where n runs from 1 tree_size (we assume that all added
115
      elements are different). Together this gives:
116
117
      n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
118
119
      then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
120
121
      Total cost of creating trees:
122
      (n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
123
124
      Approximate value of log2(N!) is calculated by log2_n_fact function.
125
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
126
       
127
      (The Next two are historical, we do all unique operations in memory or fail)
128
1 by brian
clean slate
129
    2. Cost of merging.
130
      If only one tree is created by Unique no merging will be necessary.
131
      Otherwise, we model execution of merge_many_buff function and count
132
      #of merges. (The reason behind this is that number of buffers is small,
133
      while size of buffers is big and we don't want to loose precision with
134
      O(x)-style formula)
135
136
    3. If only one tree is created by Unique no disk io will happen.
137
      Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
138
      these will be random seeks.
139
*/
140
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
141
double Unique::get_use_cost(uint32_t *, uint32_t nkeys, uint32_t key_size,
1891.2.1 by Monty Taylor
Fixed things to make things compile with clang
142
                            size_t max_in_memory_size_arg)
1 by brian
clean slate
143
{
144
  ulong max_elements_in_tree;
145
  ulong last_tree_elems;
146
  double result;
147
1891.2.1 by Monty Taylor
Fixed things to make things compile with clang
148
  max_elements_in_tree= ((ulong) max_in_memory_size_arg /
2455.1.1 by Ivo Roper
refactoring of TREE and TREE_ELEMENT structs into Tree and Tree_Element classes
149
                         ALIGN_SIZE(sizeof(Tree_Element)+key_size));
1 by brian
clean slate
150
151
  last_tree_elems= nkeys % max_elements_in_tree;
152
153
  /* Calculate cost of creating trees */
154
  result= 2*log2_n_fact(last_tree_elems + 1.0);
155
  result /= TIME_FOR_COMPARE_ROWID;
156
157
  return result;
158
}
159
160
Unique::~Unique()
161
{
2455.1.1 by Ivo Roper
refactoring of TREE and TREE_ELEMENT structs into Tree and Tree_Element classes
162
   tree.delete_tree();
1 by brian
clean slate
163
}
164
165
166
/*
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
167
  Clear the tree.
1 by brian
clean slate
168
  You must call reset() if you want to reuse Unique after walk().
169
*/
170
171
void
172
Unique::reset()
173
{
2455.1.1 by Ivo Roper
refactoring of TREE and TREE_ELEMENT structs into Tree and Tree_Element classes
174
  tree.reset_tree();
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
175
  assert(elements == 0);
1 by brian
clean slate
176
}
177
178
179
/*
180
  DESCRIPTION
181
    Walks consecutively through all unique elements:
182
    if all elements are in memory, then it simply invokes 'tree_walk', else
183
    all flushed trees are loaded to memory piece-by-piece, pieces are
184
    sorted, and action is called for each unique value.
185
    Note: so as merging resets file_ptrs state, this method can change
186
    internal Unique state to undefined: if you want to reuse Unique after
187
    walk() you must call reset() first!
188
  SYNOPSIS
189
    Unique:walk()
190
  All params are 'IN':
1410.3.4 by Djellel E. Difallah
update references to old my_'s
191
    action  function-visitor, typed in include/tree.h
1 by brian
clean slate
192
            function is called for each unique element
193
    arg     argument for visitor, which is passed to it on each call
194
  RETURN VALUE
195
    0    OK
196
    <> 0 error
197
 */
198
199
bool Unique::walk(tree_walk_action action, void *walk_action_arg)
200
{
2455.1.1 by Ivo Roper
refactoring of TREE and TREE_ELEMENT structs into Tree and Tree_Element classes
201
  return tree.tree_walk(action, walk_action_arg, left_root_right);
1 by brian
clean slate
202
}
203
204
/*
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
205
  Modify the Table element so that when one calls init_records()
1 by brian
clean slate
206
  the rows will be read in priority order.
207
*/
208
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
209
bool Unique::get(Table *table)
1 by brian
clean slate
210
{
2455.1.5 by Ivo Roper
switched global TREE function calls over to Tree method calls
211
  table->sort.found_records= elements+tree.getElementsInTree();
1 by brian
clean slate
212
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
213
  if ((record_pointers=table->sort.record_pointers= (unsigned char*)
2455.1.5 by Ivo Roper
switched global TREE function calls over to Tree method calls
214
       malloc(size * tree.getElementsInTree())))
1 by brian
clean slate
215
  {
2455.1.1 by Ivo Roper
refactoring of TREE and TREE_ELEMENT structs into Tree and Tree_Element classes
216
    (void) tree.tree_walk((tree_walk_action) unique_write_to_ptrs,
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
217
                     this, left_root_right);
218
    return 0;
1 by brian
clean slate
219
  }
1749.3.11 by Brian Aker
Force unique to just use memory and let the OS handle paging.
220
  /* Not enough memory */
221
  return 1;
1 by brian
clean slate
222
}
1280.1.10 by Monty Taylor
Put everything in drizzled into drizzled namespace.
223
224
} /* namespace drizzled */