1
by brian
clean slate |
1 |
/* Copyright (C) 2001 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
1802.10.2
by Monty Taylor
Update all of the copyright headers to include the correct address. |
14 |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
1
by brian
clean slate |
15 |
|
16 |
/*
|
|
17 |
Function to handle quick removal of duplicates
|
|
18 |
This code is used when doing multi-table deletes to find the rows in
|
|
19 |
reference tables that needs to be deleted.
|
|
20 |
||
21 |
The basic idea is as follows:
|
|
22 |
||
23 |
Store first all strings in a binary tree, ignoring duplicates.
|
|
24 |
||
25 |
The unique entries will be returned in sort order, to ensure that we do the
|
|
26 |
deletes in disk order.
|
|
27 |
*/
|
|
28 |
||
2173.2.1
by Monty Taylor
Fixes incorrect usage of include |
29 |
#include <config.h> |
1241.9.1
by Monty Taylor
Removed global.h. Fixed all the headers. |
30 |
|
31 |
#include <math.h> |
|
32 |
||
897.1.1
by Padraig
Replaced the instance of QUEUE in the merge_walk() function with |
33 |
#include <queue> |
572.1.4
by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf. |
34 |
|
2173.2.1
by Monty Taylor
Fixes incorrect usage of include |
35 |
#include <drizzled/sql_sort.h> |
36 |
#include <drizzled/session.h> |
|
37 |
#include <drizzled/sql_list.h> |
|
38 |
#include <drizzled/internal/iocache.h> |
|
2154.2.24
by Brian Aker
Merge in all changes for current_session, etc. |
39 |
#include <drizzled/unique.h> |
2241.2.14
by Olaf van der Spek
Refactor |
40 |
#include <drizzled/table.h> |
1241.9.23
by Monty Taylor
Removed sql_table.h from server_includes.h. |
41 |
|
572.1.4
by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf. |
42 |
#if defined(CMATH_NAMESPACE)
|
43 |
using namespace CMATH_NAMESPACE; |
|
44 |
#endif
|
|
1
by brian
clean slate |
45 |
|
897.1.1
by Padraig
Replaced the instance of QUEUE in the merge_walk() function with |
46 |
using namespace std; |
47 |
||
2241.2.14
by Olaf van der Spek
Refactor |
48 |
namespace drizzled { |
1
by brian
clean slate |
49 |
|
481
by Brian Aker
Remove all of uchar. |
50 |
int unique_write_to_ptrs(unsigned char* key, |
1241.9.46
by Monty Taylor
Removed some more evil. |
51 |
uint32_t, Unique *unique) |
1
by brian
clean slate |
52 |
{
|
53 |
memcpy(unique->record_pointers, key, unique->size); |
|
54 |
unique->record_pointers+=unique->size; |
|
55 |
return 0; |
|
56 |
}
|
|
57 |
||
58 |
Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, |
|
641.3.8
by Monty Taylor
Removed my_malloc from drizzled. |
59 |
uint32_t size_arg, size_t max_in_memory_size_arg) |
1241.9.50
by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/ |
60 |
: max_in_memory_size(max_in_memory_size_arg), |
61 |
size(size_arg), |
|
62 |
elements(0) |
|
1
by brian
clean slate |
63 |
{
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
64 |
// Second element is max size for memory use in unique sort
|
65 |
init_tree(&tree, 0, 0, size, comp_func, false, |
|
1
by brian
clean slate |
66 |
NULL, comp_func_fixed_arg); |
67 |
}
|
|
68 |
||
69 |
||
70 |
/*
|
|
71 |
Calculate log2(n!)
|
|
72 |
||
73 |
NOTES
|
|
74 |
Stirling's approximate formula is used:
|
|
75 |
||
76 |
n! ~= sqrt(2*M_PI*n) * (n/M_E)^n
|
|
77 |
||
78 |
Derivation of formula used for calculations is as follows:
|
|
79 |
||
80 |
log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
|
|
81 |
||
82 |
= (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
|
|
83 |
*/
|
|
84 |
||
85 |
inline double log2_n_fact(double x) |
|
86 |
{
|
|
87 |
return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2; |
|
88 |
}
|
|
89 |
||
90 |
||
91 |
/*
|
|
92 |
Calculate cost of using Unique for processing nkeys elements of size
|
|
93 |
key_size using max_in_memory_size memory.
|
|
94 |
||
95 |
SYNOPSIS
|
|
96 |
Unique::get_use_cost()
|
|
97 |
buffer space for temporary data, use Unique::get_cost_calc_buff_size
|
|
98 |
to get # bytes needed.
|
|
99 |
nkeys #of elements in Unique
|
|
100 |
key_size size of each elements in bytes
|
|
101 |
max_in_memory_size amount of memory Unique will be allowed to use
|
|
102 |
||
103 |
RETURN
|
|
104 |
Cost in disk seeks.
|
|
105 |
||
106 |
NOTES
|
|
107 |
cost(using_unqiue) =
|
|
108 |
cost(create_trees) + (see #1)
|
|
109 |
cost(merge) + (see #2)
|
|
110 |
cost(read_result) (see #3)
|
|
111 |
||
112 |
1. Cost of trees creation
|
|
113 |
For each Unique::put operation there will be 2*log2(n+1) elements
|
|
114 |
comparisons, where n runs from 1 tree_size (we assume that all added
|
|
115 |
elements are different). Together this gives:
|
|
116 |
||
117 |
n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
|
|
118 |
||
119 |
then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
|
|
120 |
||
121 |
Total cost of creating trees:
|
|
122 |
(n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
|
|
123 |
||
124 |
Approximate value of log2(N!) is calculated by log2_n_fact function.
|
|
125 |
||
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
126 |
|
127 |
(The Next two are historical, we do all unique operations in memory or fail)
|
|
128 |
||
1
by brian
clean slate |
129 |
2. Cost of merging.
|
130 |
If only one tree is created by Unique no merging will be necessary.
|
|
131 |
Otherwise, we model execution of merge_many_buff function and count
|
|
132 |
#of merges. (The reason behind this is that number of buffers is small,
|
|
133 |
while size of buffers is big and we don't want to loose precision with
|
|
134 |
O(x)-style formula)
|
|
135 |
||
136 |
3. If only one tree is created by Unique no disk io will happen.
|
|
137 |
Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
|
|
138 |
these will be random seeks.
|
|
139 |
*/
|
|
140 |
||
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
141 |
double Unique::get_use_cost(uint32_t *, uint32_t nkeys, uint32_t key_size, |
1891.2.1
by Monty Taylor
Fixed things to make things compile with clang |
142 |
size_t max_in_memory_size_arg) |
1
by brian
clean slate |
143 |
{
|
144 |
ulong max_elements_in_tree; |
|
145 |
ulong last_tree_elems; |
|
146 |
double result; |
|
147 |
||
1891.2.1
by Monty Taylor
Fixed things to make things compile with clang |
148 |
max_elements_in_tree= ((ulong) max_in_memory_size_arg / |
1
by brian
clean slate |
149 |
ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size)); |
150 |
||
151 |
last_tree_elems= nkeys % max_elements_in_tree; |
|
152 |
||
153 |
/* Calculate cost of creating trees */
|
|
154 |
result= 2*log2_n_fact(last_tree_elems + 1.0); |
|
155 |
result /= TIME_FOR_COMPARE_ROWID; |
|
156 |
||
157 |
return result; |
|
158 |
}
|
|
159 |
||
160 |
Unique::~Unique() |
|
161 |
{
|
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
162 |
delete_tree(&tree); |
1
by brian
clean slate |
163 |
}
|
164 |
||
165 |
||
166 |
/*
|
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
167 |
Clear the tree.
|
1
by brian
clean slate |
168 |
You must call reset() if you want to reuse Unique after walk().
|
169 |
*/
|
|
170 |
||
171 |
void
|
|
172 |
Unique::reset() |
|
173 |
{
|
|
174 |
reset_tree(&tree); |
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
175 |
assert(elements == 0); |
1
by brian
clean slate |
176 |
}
|
177 |
||
178 |
||
179 |
/*
|
|
180 |
DESCRIPTION
|
|
181 |
Walks consecutively through all unique elements:
|
|
182 |
if all elements are in memory, then it simply invokes 'tree_walk', else
|
|
183 |
all flushed trees are loaded to memory piece-by-piece, pieces are
|
|
184 |
sorted, and action is called for each unique value.
|
|
185 |
Note: so as merging resets file_ptrs state, this method can change
|
|
186 |
internal Unique state to undefined: if you want to reuse Unique after
|
|
187 |
walk() you must call reset() first!
|
|
188 |
SYNOPSIS
|
|
189 |
Unique:walk()
|
|
190 |
All params are 'IN':
|
|
1410.3.4
by Djellel E. Difallah
update references to old my_'s |
191 |
action function-visitor, typed in include/tree.h
|
1
by brian
clean slate |
192 |
function is called for each unique element
|
193 |
arg argument for visitor, which is passed to it on each call
|
|
194 |
RETURN VALUE
|
|
195 |
0 OK
|
|
196 |
<> 0 error
|
|
197 |
*/
|
|
198 |
||
199 |
bool Unique::walk(tree_walk_action action, void *walk_action_arg) |
|
200 |
{
|
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
201 |
return tree_walk(&tree, action, walk_action_arg, left_root_right); |
1
by brian
clean slate |
202 |
}
|
203 |
||
204 |
/*
|
|
327.1.5
by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h |
205 |
Modify the Table element so that when one calls init_records()
|
1
by brian
clean slate |
206 |
the rows will be read in priority order.
|
207 |
*/
|
|
208 |
||
327.1.5
by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h |
209 |
bool Unique::get(Table *table) |
1
by brian
clean slate |
210 |
{
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
211 |
table->sort.found_records= elements+tree.elements_in_tree; |
1
by brian
clean slate |
212 |
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
213 |
if ((record_pointers=table->sort.record_pointers= (unsigned char*) |
214 |
malloc(size * tree.elements_in_tree))) |
|
1
by brian
clean slate |
215 |
{
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
216 |
(void) tree_walk(&tree, (tree_walk_action) unique_write_to_ptrs, |
217 |
this, left_root_right); |
|
218 |
return 0; |
|
1
by brian
clean slate |
219 |
}
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
220 |
/* Not enough memory */
|
221 |
return 1; |
|
1
by brian
clean slate |
222 |
}
|
1280.1.10
by Monty Taylor
Put everything in drizzled into drizzled namespace. |
223 |
|
224 |
} /* namespace drizzled */ |