~drizzle-trunk/drizzle/development

1237.13.6 by Padraig O'Sullivan
Moved the SEL_ARG class into its own header and implementation files. Cleaned up a bunch of style
1
/* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2
 *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3
 *
4
 *  Copyright (C) 2008-2009 Sun Microsystems
5
 *
6
 *  This program is free software; you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation; version 2 of the License.
9
 *
10
 *  This program is distributed in the hope that it will be useful,
11
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 *  GNU General Public License for more details.
14
 *
15
 *  You should have received a copy of the GNU General Public License
16
 *  along with this program; if not, write to the Free Software
17
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
 */
19
20
#ifndef DRIZZLED_OPTIMIZER_SEL_ARG_H
21
#define DRIZZLED_OPTIMIZER_SEL_ARG_H
22
23
namespace drizzled
24
{
25
26
namespace optimizer
27
{
28
1237.13.7 by Padraig O'Sullivan
Renamed PARAM to Parameter and RANGE_OPT_PARAM to RangeParameter.
29
class RangeParameter;
1237.13.6 by Padraig O'Sullivan
Moved the SEL_ARG class into its own header and implementation files. Cleaned up a bunch of style
30
31
/*
32
  A construction block of the SEL_ARG-graph.
33
34
  The following description only covers graphs of SEL_ARG objects with
35
  sel_arg->type==KEY_RANGE:
36
37
  One SEL_ARG object represents an "elementary interval" in form
38
39
      min_value <=?  table.keypartX  <=? max_value
40
41
  The interval is a non-empty interval of any kind: with[out] minimum/maximum
42
  bound, [half]open/closed, single-point interval, etc.
43
44
  1. SEL_ARG GRAPH STRUCTURE
45
46
  SEL_ARG objects are linked together in a graph. The meaning of the graph
47
  is better demostrated by an example:
48
49
     tree->keys[i]
50
      |
51
      |             $              $
52
      |    part=1   $     part=2   $    part=3
53
      |             $              $
54
      |  +-------+  $   +-------+  $   +--------+
55
      |  | kp1<1 |--$-->| kp2=5 |--$-->| kp3=10 |
56
      |  +-------+  $   +-------+  $   +--------+
57
      |      |      $              $       |
58
      |      |      $              $   +--------+
59
      |      |      $              $   | kp3=12 |
60
      |      |      $              $   +--------+
61
      |  +-------+  $              $
62
      \->| kp1=2 |--$--------------$-+
63
         +-------+  $              $ |   +--------+
64
             |      $              $  ==>| kp3=11 |
65
         +-------+  $              $ |   +--------+
66
         | kp1=3 |--$--------------$-+       |
67
         +-------+  $              $     +--------+
68
             |      $              $     | kp3=14 |
69
            ...     $              $     +--------+
70
71
  The entire graph is partitioned into "interval lists".
72
73
  An interval list is a sequence of ordered disjoint intervals over the same
74
  key part. SEL_ARG are linked via "next" and "prev" pointers. Additionally,
75
  all intervals in the list form an RB-tree, linked via left/right/parent
76
  pointers. The RB-tree root SEL_ARG object will be further called "root of the
77
  interval list".
78
79
    In the example pic, there are 4 interval lists:
80
    "kp<1 OR kp1=2 OR kp1=3", "kp2=5", "kp3=10 OR kp3=12", "kp3=11 OR kp3=13".
81
    The vertical lines represent SEL_ARG::next/prev pointers.
82
83
  In an interval list, each member X may have SEL_ARG::next_key_part pointer
84
  pointing to the root of another interval list Y. The pointed interval list
85
  must cover a key part with greater number (i.e. Y->part > X->part).
86
87
    In the example pic, the next_key_part pointers are represented by
88
    horisontal lines.
89
90
  2. SEL_ARG GRAPH SEMANTICS
91
92
  It represents a condition in a special form (we don't have a name for it ATM)
93
  The SEL_ARG::next/prev is "OR", and next_key_part is "AND".
94
95
  For example, the picture represents the condition in form:
96
   (kp1 < 1 AND kp2=5 AND (kp3=10 OR kp3=12)) OR
97
   (kp1=2 AND (kp3=11 OR kp3=14)) OR
98
   (kp1=3 AND (kp3=11 OR kp3=14))
99
100
101
  3. SEL_ARG GRAPH USE
102
103
  Use get_mm_tree() to construct SEL_ARG graph from WHERE condition.
104
  Then walk the SEL_ARG graph and get a list of dijsoint ordered key
105
  intervals (i.e. intervals in form
106
107
   (constA1, .., const1_K) < (keypart1,.., keypartK) < (constB1, .., constB_K)
108
109
  Those intervals can be used to access the index. The uses are in:
110
   - check_quick_select() - Walk the SEL_ARG graph and find an estimate of
111
                            how many table records are contained within all
112
                            intervals.
113
   - get_quick_select()   - Walk the SEL_ARG, materialize the key intervals,
114
                            and create QuickRangeSelect object that will
115
                            read records within these intervals.
116
117
  4. SPACE COMPLEXITY NOTES
118
119
    SEL_ARG graph is a representation of an ordered disjoint sequence of
120
    intervals over the ordered set of index tuple values.
121
122
    For multi-part keys, one can construct a WHERE expression such that its
123
    list of intervals will be of combinatorial size. Here is an example:
124
125
      (keypart1 IN (1,2, ..., n1)) AND
126
      (keypart2 IN (1,2, ..., n2)) AND
127
      (keypart3 IN (1,2, ..., n3))
128
129
    For this WHERE clause the list of intervals will have n1*n2*n3 intervals
130
    of form
131
132
      (keypart1, keypart2, keypart3) = (k1, k2, k3), where 1 <= k{i} <= n{i}
133
134
    SEL_ARG graph structure aims to reduce the amount of required space by
135
    "sharing" the elementary intervals when possible (the pic at the
136
    beginning of this comment has examples of such sharing). The sharing may
137
    prevent combinatorial blowup:
138
139
      There are WHERE clauses that have combinatorial-size interval lists but
140
      will be represented by a compact SEL_ARG graph.
141
      Example:
142
        (keypartN IN (1,2, ..., n1)) AND
143
        ...
144
        (keypart2 IN (1,2, ..., n2)) AND
145
        (keypart1 IN (1,2, ..., n3))
146
147
    but not in all cases:
148
149
    - There are WHERE clauses that do have a compact SEL_ARG-graph
150
      representation but get_mm_tree() and its callees will construct a
151
      graph of combinatorial size.
152
      Example:
153
        (keypart1 IN (1,2, ..., n1)) AND
154
        (keypart2 IN (1,2, ..., n2)) AND
155
        ...
156
        (keypartN IN (1,2, ..., n3))
157
158
    - There are WHERE clauses for which the minimal possible SEL_ARG graph
159
      representation will have combinatorial size.
160
      Example:
161
        By induction: Let's take any interval on some keypart in the middle:
162
163
           kp15=c0
164
165
        Then let's AND it with this interval 'structure' from preceding and
166
        following keyparts:
167
168
          (kp14=c1 AND kp16=c3) OR keypart14=c2) (*)
169
170
        We will obtain this SEL_ARG graph:
171
172
             kp14     $      kp15      $      kp16
173
                      $                $
174
         +---------+  $   +---------+  $   +---------+
175
         | kp14=c1 |--$-->| kp15=c0 |--$-->| kp16=c3 |
176
         +---------+  $   +---------+  $   +---------+
177
              |       $                $
178
         +---------+  $   +---------+  $
179
         | kp14=c2 |--$-->| kp15=c0 |  $
180
         +---------+  $   +---------+  $
181
                      $                $
182
183
       Note that we had to duplicate "kp15=c0" and there was no way to avoid
184
       that.
185
       The induction step: AND the obtained expression with another "wrapping"
186
       expression like (*).
187
       When the process ends because of the limit on max. number of keyparts
188
       we'll have:
189
190
         WHERE clause length  is O(3*#max_keyparts)
191
         SEL_ARG graph size   is O(2^(#max_keyparts/2))
192
193
       (it is also possible to construct a case where instead of 2 in 2^n we
194
        have a bigger constant, e.g. 4, and get a graph with 4^(31/2)= 2^31
195
        nodes)
196
197
    We avoid consuming too much memory by setting a limit on the number of
198
    SEL_ARG object we can construct during one range analysis invocation.
199
*/
200
1253.1.4 by Monty Taylor
Moved sql_alloc into memory.
201
class SEL_ARG :public drizzled::memory::SqlAlloc
1237.13.6 by Padraig O'Sullivan
Moved the SEL_ARG class into its own header and implementation files. Cleaned up a bunch of style
202
{
203
public:
204
  uint8_t min_flag,max_flag,maybe_flag;
205
  uint8_t part;					// Which key part
206
  uint8_t maybe_null;
207
  /*
208
    Number of children of this element in the RB-tree, plus 1 for this
209
    element itself.
210
  */
211
  uint16_t elements;
212
  /*
213
    Valid only for elements which are RB-tree roots: Number of times this
214
    RB-tree is referred to (it is referred by SEL_ARG::next_key_part or by
215
    SEL_TREE::keys[i] or by a temporary SEL_ARG* variable)
216
  */
217
  ulong use_count;
218
219
  Field *field;
220
  unsigned char *min_value,*max_value;			// Pointer to range
221
222
  /*
223
    eq_tree() requires that left == right == 0 if the type is MAYBE_KEY.
224
   */
225
  SEL_ARG *left,*right;   /* R-B tree children */
226
  SEL_ARG *next,*prev;    /* Links for bi-directional interval list */
227
  SEL_ARG *parent;        /* R-B tree parent */
228
  SEL_ARG *next_key_part;
229
  enum leaf_color { BLACK,RED } color;
230
  enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type;
231
232
  enum 
233
  { 
234
    MAX_SEL_ARGS = 16000 
235
  };
236
237
  SEL_ARG() {}
238
239
  SEL_ARG(SEL_ARG &);
240
241
  SEL_ARG(Field *,const unsigned char *, const unsigned char *);
242
243
  SEL_ARG(Field *field, 
244
          uint8_t part, 
245
          unsigned char *min_value, 
246
          unsigned char *max_value,
247
	        uint8_t min_flag, 
248
          uint8_t max_flag, 
249
          uint8_t maybe_flag);
250
251
  SEL_ARG(enum Type type_arg)
252
    :
253
      min_flag(0),
254
      elements(1),
255
      use_count(1),
256
      left(0),
257
      right(0),
258
      next_key_part(0),
259
      color(BLACK), 
260
      type(type_arg)
261
  {}
262
263
  inline bool is_same(SEL_ARG *arg)
264
  {
265
    if (type != arg->type || part != arg->part)
266
      return 0;
267
    if (type != KEY_RANGE)
268
      return 1;
269
    return (cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0);
270
  }
271
272
  inline void merge_flags(SEL_ARG *arg) 
273
  { 
274
    maybe_flag|= arg->maybe_flag; 
275
  }
276
277
  inline void maybe_smaller() 
278
  { 
279
    maybe_flag= 1; 
280
  }
281
282
  /* Return true iff it's a single-point null interval */
283
  inline bool is_null_interval() 
284
  { 
285
    return (maybe_null && max_value[0] == 1);
286
  }
287
288
  inline int cmp_min_to_min(SEL_ARG *arg)
289
  {
290
    return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag);
291
  }
292
293
  inline int cmp_min_to_max(SEL_ARG *arg)
294
  {
295
    return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag);
296
  }
297
298
  inline int cmp_max_to_max(SEL_ARG *arg)
299
  {
300
    return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag);
301
  }
302
303
  inline int cmp_max_to_min(SEL_ARG *arg)
304
  {
305
    return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag);
306
  }
307
308
  SEL_ARG *clone_and(SEL_ARG *arg);
309
310
  SEL_ARG *clone_first(SEL_ARG *arg);
311
312
  SEL_ARG *clone_last(SEL_ARG *arg);
313
1237.13.7 by Padraig O'Sullivan
Renamed PARAM to Parameter and RANGE_OPT_PARAM to RangeParameter.
314
  SEL_ARG *clone(RangeParameter *param, SEL_ARG *new_parent, SEL_ARG **next);
1237.13.6 by Padraig O'Sullivan
Moved the SEL_ARG class into its own header and implementation files. Cleaned up a bunch of style
315
316
  bool copy_min(SEL_ARG *arg);
317
318
  bool copy_max(SEL_ARG *arg);
319
320
  void copy_min_to_min(SEL_ARG *arg);
321
322
  void copy_min_to_max(SEL_ARG *arg);
323
324
  void copy_max_to_min(SEL_ARG *arg);
325
326
  /* returns a number of keypart values (0 or 1) appended to the key buffer */
327
  int store_min(uint32_t length, unsigned char **min_key, uint32_t min_key_flag);
328
329
  /* returns a number of keypart values (0 or 1) appended to the key buffer */
330
  int store_max(uint32_t length, unsigned char **max_key, uint32_t max_key_flag);
331
332
  /* returns a number of keypart values appended to the key buffer */
333
  int store_min_key(KEY_PART *key, unsigned char **range_key, uint32_t *range_key_flag);
334
335
  /* returns a number of keypart values appended to the key buffer */
336
  int store_max_key(KEY_PART *key, unsigned char **range_key, uint32_t *range_key_flag);
337
338
  SEL_ARG *insert(SEL_ARG *key);
339
  SEL_ARG *tree_delete(SEL_ARG *key);
340
  SEL_ARG *find_range(SEL_ARG *key);
341
  SEL_ARG *rb_insert(SEL_ARG *leaf);
342
343
  friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par);
344
345
  SEL_ARG *first();
346
347
  SEL_ARG *last();
348
349
  void make_root();
350
351
  inline bool simple_key()
352
  {
353
    return (! next_key_part && elements == 1);
354
  }
355
356
  void increment_use_count(long count)
357
  {
358
    if (next_key_part)
359
    {
360
      next_key_part->use_count+= count;
361
      count*= (next_key_part->use_count - count);
362
      for (SEL_ARG *pos= next_key_part->first(); pos; pos= pos->next)
363
        if (pos->next_key_part)
364
          pos->increment_use_count(count);
365
    }
366
  }
367
368
  void free_tree()
369
  {
370
    for (SEL_ARG *pos= first(); pos; pos= pos->next)
371
      if (pos->next_key_part)
372
      {
373
        pos->next_key_part->use_count--;
374
        pos->next_key_part->free_tree();
375
      }
376
  }
377
378
  inline SEL_ARG **parent_ptr()
379
  {
380
    return parent->left == this ? &parent->left : &parent->right;
381
  }
382
383
384
  /*
385
    Check if this SEL_ARG object represents a single-point interval
386
387
    SYNOPSIS
388
      is_singlepoint()
389
390
    DESCRIPTION
391
      Check if this SEL_ARG object (not tree) represents a single-point
392
      interval, i.e. if it represents a "keypart = const" or
393
      "keypart IS NULL".
394
395
    RETURN
396
      true   This SEL_ARG object represents a singlepoint interval
397
      false  Otherwise
398
  */
399
400
  bool is_singlepoint()
401
  {
402
    /*
403
      Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field)
404
      flags, and the same for right edge.
405
    */
406
    if (min_flag || max_flag)
407
      return false;
408
    unsigned char *min_val= min_value;
409
    unsigned char *max_val= max_value;
410
411
    if (maybe_null)
412
    {
413
      /* First byte is a NULL value indicator */
414
      if (*min_val != *max_val)
415
        return false;
416
417
      if (*min_val)
418
        return true; /* This "x IS NULL" */
419
      min_val++;
420
      max_val++;
421
    }
422
    return ! field->key_cmp(min_val, max_val);
423
  }
424
1237.13.7 by Padraig O'Sullivan
Renamed PARAM to Parameter and RANGE_OPT_PARAM to RangeParameter.
425
  SEL_ARG *clone_tree(RangeParameter *param);
1237.13.6 by Padraig O'Sullivan
Moved the SEL_ARG class into its own header and implementation files. Cleaned up a bunch of style
426
427
private:
428
429
  /*
430
     Check if a compare is ok, when one takes ranges in account
431
     Returns -2 or 2 if the ranges where 'joined' like  < 2 and >= 2
432
   */
433
  int sel_cmp(Field *in_field, 
434
              unsigned char *a, 
435
              unsigned char *b, 
436
              uint8_t a_flag,
437
              uint8_t b_flag)
438
  {
439
    int cmp= 0;
440
    /* First check if there was a compare to a min or max element */
441
    if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
442
    {
443
      if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
444
          (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
445
        return 0;
446
      return (a_flag & NO_MIN_RANGE) ? -1 : 1;
447
    }
448
    if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
449
      return (b_flag & NO_MIN_RANGE) ? 1 : -1;
450
451
    if (in_field->real_maybe_null())			// If null is part of key
452
    {
453
      if (*a != *b)
454
      {
455
        return *a ? -1 : 1;
456
      }
457
      if (*a)
458
        goto end;					// NULL where equal
459
      a++; b++;					// Skip NULL marker
460
    }
461
    cmp= in_field->key_cmp(a , b);
462
    if (cmp) return cmp < 0 ? -1 : 1;		// The values differed
463
464
    // Check if the compared equal arguments was defined with open/closed range
465
end:
466
    if (a_flag & (NEAR_MIN | NEAR_MAX))
467
    {
468
      if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
469
        return 0;
470
      if (! (b_flag & (NEAR_MIN | NEAR_MAX)))
471
        return (a_flag & NEAR_MIN) ? 2 : -2;
472
      return (a_flag & NEAR_MIN) ? 1 : -1;
473
    }
474
    if (b_flag & (NEAR_MIN | NEAR_MAX))
475
      return (b_flag & NEAR_MIN) ? -2 : 2;
476
    return 0;					// The elements where equal
477
  }
478
479
  
480
};
481
482
SEL_ARG *rb_delete_fixup(SEL_ARG *root,
483
                         SEL_ARG *key,
484
                         SEL_ARG *par);
485
486
extern SEL_ARG null_element;
487
488
} /* namespace optimizer */
489
490
} /* namespace drizzled */
491
492
#endif /* DRIZZLED_OPTIMIZER_SEL_ARG_H */