1
by brian
clean slate |
1 |
/* Copyright (C) 2001 MySQL AB
|
2 |
||
3 |
This program is free software; you can redistribute it and/or modify
|
|
4 |
it under the terms of the GNU General Public License as published by
|
|
5 |
the Free Software Foundation; version 2 of the License.
|
|
6 |
||
7 |
This program is distributed in the hope that it will be useful,
|
|
8 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
9 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
10 |
GNU General Public License for more details.
|
|
11 |
||
12 |
You should have received a copy of the GNU General Public License
|
|
13 |
along with this program; if not, write to the Free Software
|
|
1802.10.2
by Monty Taylor
Update all of the copyright headers to include the correct address. |
14 |
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
1
by brian
clean slate |
15 |
|
16 |
/*
|
|
17 |
Function to handle quick removal of duplicates
|
|
18 |
This code is used when doing multi-table deletes to find the rows in
|
|
19 |
reference tables that needs to be deleted.
|
|
20 |
||
21 |
The basic idea is as follows:
|
|
22 |
||
23 |
Store first all strings in a binary tree, ignoring duplicates.
|
|
24 |
||
25 |
The unique entries will be returned in sort order, to ensure that we do the
|
|
26 |
deletes in disk order.
|
|
27 |
*/
|
|
28 |
||
1241.9.36
by Monty Taylor
ZOMG. I deleted drizzled/server_includes.h. |
29 |
#include "config.h" |
1241.9.1
by Monty Taylor
Removed global.h. Fixed all the headers. |
30 |
|
31 |
#include <math.h> |
|
32 |
||
897.1.1
by Padraig
Replaced the instance of QUEUE in the merge_walk() function with |
33 |
#include <queue> |
572.1.4
by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf. |
34 |
|
1241.9.23
by Monty Taylor
Removed sql_table.h from server_includes.h. |
35 |
#include "drizzled/sql_sort.h" |
36 |
#include "drizzled/session.h" |
|
37 |
#include "drizzled/sql_list.h" |
|
1241.9.64
by Monty Taylor
Moved remaining non-public portions of mysys and mystrings to drizzled/internal. |
38 |
#include "drizzled/internal/iocache.h" |
1241.9.23
by Monty Taylor
Removed sql_table.h from server_includes.h. |
39 |
|
572.1.4
by Monty Taylor
Removed a bunch of unusued tests and defines from autoconf. |
40 |
#if defined(CMATH_NAMESPACE)
|
41 |
using namespace CMATH_NAMESPACE; |
|
42 |
#endif
|
|
1
by brian
clean slate |
43 |
|
897.1.1
by Padraig
Replaced the instance of QUEUE in the merge_walk() function with |
44 |
using namespace std; |
45 |
||
1280.1.10
by Monty Taylor
Put everything in drizzled into drizzled namespace. |
46 |
namespace drizzled |
47 |
{
|
|
1
by brian
clean slate |
48 |
|
481
by Brian Aker
Remove all of uchar. |
49 |
int unique_write_to_ptrs(unsigned char* key, |
1241.9.46
by Monty Taylor
Removed some more evil. |
50 |
uint32_t, Unique *unique) |
1
by brian
clean slate |
51 |
{
|
52 |
memcpy(unique->record_pointers, key, unique->size); |
|
53 |
unique->record_pointers+=unique->size; |
|
54 |
return 0; |
|
55 |
}
|
|
56 |
||
57 |
Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, |
|
641.3.8
by Monty Taylor
Removed my_malloc from drizzled. |
58 |
uint32_t size_arg, size_t max_in_memory_size_arg) |
1241.9.50
by Monty Taylor
Removed last non-pointer public IO_CACHE from drizzled/ |
59 |
: max_in_memory_size(max_in_memory_size_arg), |
60 |
size(size_arg), |
|
61 |
elements(0) |
|
1
by brian
clean slate |
62 |
{
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
63 |
// Second element is max size for memory use in unique sort
|
64 |
init_tree(&tree, 0, 0, size, comp_func, false, |
|
1
by brian
clean slate |
65 |
NULL, comp_func_fixed_arg); |
66 |
}
|
|
67 |
||
68 |
||
69 |
/*
|
|
70 |
Calculate log2(n!)
|
|
71 |
||
72 |
NOTES
|
|
73 |
Stirling's approximate formula is used:
|
|
74 |
||
75 |
n! ~= sqrt(2*M_PI*n) * (n/M_E)^n
|
|
76 |
||
77 |
Derivation of formula used for calculations is as follows:
|
|
78 |
||
79 |
log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
|
|
80 |
||
81 |
= (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
|
|
82 |
*/
|
|
83 |
||
84 |
inline double log2_n_fact(double x) |
|
85 |
{
|
|
86 |
return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2; |
|
87 |
}
|
|
88 |
||
89 |
||
90 |
/*
|
|
91 |
Calculate cost of using Unique for processing nkeys elements of size
|
|
92 |
key_size using max_in_memory_size memory.
|
|
93 |
||
94 |
SYNOPSIS
|
|
95 |
Unique::get_use_cost()
|
|
96 |
buffer space for temporary data, use Unique::get_cost_calc_buff_size
|
|
97 |
to get # bytes needed.
|
|
98 |
nkeys #of elements in Unique
|
|
99 |
key_size size of each elements in bytes
|
|
100 |
max_in_memory_size amount of memory Unique will be allowed to use
|
|
101 |
||
102 |
RETURN
|
|
103 |
Cost in disk seeks.
|
|
104 |
||
105 |
NOTES
|
|
106 |
cost(using_unqiue) =
|
|
107 |
cost(create_trees) + (see #1)
|
|
108 |
cost(merge) + (see #2)
|
|
109 |
cost(read_result) (see #3)
|
|
110 |
||
111 |
1. Cost of trees creation
|
|
112 |
For each Unique::put operation there will be 2*log2(n+1) elements
|
|
113 |
comparisons, where n runs from 1 tree_size (we assume that all added
|
|
114 |
elements are different). Together this gives:
|
|
115 |
||
116 |
n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
|
|
117 |
||
118 |
then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
|
|
119 |
||
120 |
Total cost of creating trees:
|
|
121 |
(n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
|
|
122 |
||
123 |
Approximate value of log2(N!) is calculated by log2_n_fact function.
|
|
124 |
||
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
125 |
|
126 |
(The Next two are historical, we do all unique operations in memory or fail)
|
|
127 |
||
1
by brian
clean slate |
128 |
2. Cost of merging.
|
129 |
If only one tree is created by Unique no merging will be necessary.
|
|
130 |
Otherwise, we model execution of merge_many_buff function and count
|
|
131 |
#of merges. (The reason behind this is that number of buffers is small,
|
|
132 |
while size of buffers is big and we don't want to loose precision with
|
|
133 |
O(x)-style formula)
|
|
134 |
||
135 |
3. If only one tree is created by Unique no disk io will happen.
|
|
136 |
Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
|
|
137 |
these will be random seeks.
|
|
138 |
*/
|
|
139 |
||
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
140 |
double Unique::get_use_cost(uint32_t *, uint32_t nkeys, uint32_t key_size, |
1891.2.1
by Monty Taylor
Fixed things to make things compile with clang |
141 |
size_t max_in_memory_size_arg) |
1
by brian
clean slate |
142 |
{
|
143 |
ulong max_elements_in_tree; |
|
144 |
ulong last_tree_elems; |
|
145 |
double result; |
|
146 |
||
1891.2.1
by Monty Taylor
Fixed things to make things compile with clang |
147 |
max_elements_in_tree= ((ulong) max_in_memory_size_arg / |
1
by brian
clean slate |
148 |
ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size)); |
149 |
||
150 |
last_tree_elems= nkeys % max_elements_in_tree; |
|
151 |
||
152 |
/* Calculate cost of creating trees */
|
|
153 |
result= 2*log2_n_fact(last_tree_elems + 1.0); |
|
154 |
result /= TIME_FOR_COMPARE_ROWID; |
|
155 |
||
156 |
return result; |
|
157 |
}
|
|
158 |
||
159 |
Unique::~Unique() |
|
160 |
{
|
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
161 |
delete_tree(&tree); |
1
by brian
clean slate |
162 |
}
|
163 |
||
164 |
||
165 |
/*
|
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
166 |
Clear the tree.
|
1
by brian
clean slate |
167 |
You must call reset() if you want to reuse Unique after walk().
|
168 |
*/
|
|
169 |
||
170 |
void
|
|
171 |
Unique::reset() |
|
172 |
{
|
|
173 |
reset_tree(&tree); |
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
174 |
assert(elements == 0); |
1
by brian
clean slate |
175 |
}
|
176 |
||
177 |
||
178 |
/*
|
|
179 |
DESCRIPTION
|
|
180 |
Walks consecutively through all unique elements:
|
|
181 |
if all elements are in memory, then it simply invokes 'tree_walk', else
|
|
182 |
all flushed trees are loaded to memory piece-by-piece, pieces are
|
|
183 |
sorted, and action is called for each unique value.
|
|
184 |
Note: so as merging resets file_ptrs state, this method can change
|
|
185 |
internal Unique state to undefined: if you want to reuse Unique after
|
|
186 |
walk() you must call reset() first!
|
|
187 |
SYNOPSIS
|
|
188 |
Unique:walk()
|
|
189 |
All params are 'IN':
|
|
1410.3.4
by Djellel E. Difallah
update references to old my_'s |
190 |
action function-visitor, typed in include/tree.h
|
1
by brian
clean slate |
191 |
function is called for each unique element
|
192 |
arg argument for visitor, which is passed to it on each call
|
|
193 |
RETURN VALUE
|
|
194 |
0 OK
|
|
195 |
<> 0 error
|
|
196 |
*/
|
|
197 |
||
198 |
bool Unique::walk(tree_walk_action action, void *walk_action_arg) |
|
199 |
{
|
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
200 |
return tree_walk(&tree, action, walk_action_arg, left_root_right); |
1
by brian
clean slate |
201 |
}
|
202 |
||
203 |
/*
|
|
327.1.5
by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h |
204 |
Modify the Table element so that when one calls init_records()
|
1
by brian
clean slate |
205 |
the rows will be read in priority order.
|
206 |
*/
|
|
207 |
||
327.1.5
by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h |
208 |
bool Unique::get(Table *table) |
1
by brian
clean slate |
209 |
{
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
210 |
table->sort.found_records= elements+tree.elements_in_tree; |
1
by brian
clean slate |
211 |
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
212 |
if ((record_pointers=table->sort.record_pointers= (unsigned char*) |
213 |
malloc(size * tree.elements_in_tree))) |
|
1
by brian
clean slate |
214 |
{
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
215 |
(void) tree_walk(&tree, (tree_walk_action) unique_write_to_ptrs, |
216 |
this, left_root_right); |
|
217 |
return 0; |
|
1
by brian
clean slate |
218 |
}
|
1749.3.11
by Brian Aker
Force unique to just use memory and let the OS handle paging. |
219 |
/* Not enough memory */
|
220 |
return 1; |
|
1
by brian
clean slate |
221 |
}
|
1280.1.10
by Monty Taylor
Put everything in drizzled into drizzled namespace. |
222 |
|
223 |
} /* namespace drizzled */ |