/* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=2:tabstop=2:smarttab: * * Copyright (C) 2008 Sun Microsystems * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* classes to use when handling where clause */ #ifndef DRIZZLED_OPT_RANGE_H #define DRIZZLED_OPT_RANGE_H #include #include #include class JOIN; typedef class Item COND; typedef struct st_handler_buffer HANDLER_BUFFER; typedef struct st_key_part { uint16_t key,part; /* See KEY_PART_INFO for meaning of the next two: */ uint16_t store_length, length; uint8_t null_bit; /* Keypart flags (0 when this structure is used by partition pruning code for fake partitioning index description) */ uint8_t flag; Field *field; Field::imagetype image_type; } KEY_PART; class QUICK_RANGE :public Sql_alloc { public: unsigned char *min_key,*max_key; uint16_t min_length,max_length,flag; key_part_map min_keypart_map, // bitmap of used keyparts in min_key max_keypart_map; // bitmap of used keyparts in max_key #ifdef HAVE_purify uint16_t dummy; /* Avoid warnings on 'flag' */ #endif QUICK_RANGE(); /* Full range */ QUICK_RANGE(const unsigned char *min_key_arg, uint32_t min_length_arg, key_part_map min_keypart_map_arg, const unsigned char *max_key_arg, uint32_t max_length_arg, key_part_map max_keypart_map_arg, uint32_t flag_arg) : min_key((unsigned char*) sql_memdup(min_key_arg,min_length_arg+1)), max_key((unsigned char*) sql_memdup(max_key_arg,max_length_arg+1)), min_length((uint16_t) min_length_arg), max_length((uint16_t) max_length_arg), flag((uint16_t) flag_arg), min_keypart_map(min_keypart_map_arg), max_keypart_map(max_keypart_map_arg) { #ifdef HAVE_purify dummy=0; #endif } }; /* Quick select interface. This class is a parent for all QUICK_*_SELECT classes. The usage scenario is as follows: 1. Create quick select quick= new QUICK_XXX_SELECT(...); 2. Perform lightweight initialization. This can be done in 2 ways: 2.a: Regular initialization if (quick->init()) { //the only valid action after failed init() call is delete delete quick; } 2.b: Special initialization for quick selects merged by QUICK_ROR_*_SELECT if (quick->init_ror_merged_scan()) delete quick; 3. Perform zero, one, or more scans. while (...) { // initialize quick select for scan. This may allocate // buffers and/or prefetch rows. if (quick->reset()) { //the only valid action after failed reset() call is delete delete quick; //abort query } // perform the scan do { res= quick->get_next(); } while (res && ...) } 4. Delete the select: delete quick; */ class QUICK_SELECT_I { public: bool sorted; ha_rows records; /* estimate of # of records to be retrieved */ double read_time; /* time to perform this retrieval */ Table *head; /* Index this quick select uses, or MAX_KEY for quick selects that use several indexes */ uint32_t index; /* Total length of first used_key_parts parts of the key. Applicable if index!= MAX_KEY. */ uint32_t max_used_key_length; /* Max. number of (first) key parts this quick select uses for retrieval. eg. for "(key1p1=c1 AND key1p2=c2) OR key1p1=c2" used_key_parts == 2. Applicable if index!= MAX_KEY. For QUICK_GROUP_MIN_MAX_SELECT it includes MIN/MAX argument keyparts. */ uint32_t used_key_parts; QUICK_SELECT_I(); virtual ~QUICK_SELECT_I(){}; /* Do post-constructor initialization. SYNOPSIS init() init() performs initializations that should have been in constructor if it was possible to return errors from constructors. The join optimizer may create and then delete quick selects without retrieving any rows so init() must not contain any IO or CPU intensive code. If init() call fails the only valid action is to delete this quick select, reset() and get_next() must not be called. RETURN 0 OK other Error code */ virtual int init() = 0; /* Initialize quick select for row retrieval. SYNOPSIS reset() reset() should be called when it is certain that row retrieval will be necessary. This call may do heavyweight initialization like buffering first N records etc. If reset() call fails get_next() must not be called. Note that reset() may be called several times if * the quick select is executed in a subselect * a JOIN buffer is used RETURN 0 OK other Error code */ virtual int reset(void) = 0; virtual int get_next() = 0; /* get next record to retrieve */ /* Range end should be called when we have looped over the whole index */ virtual void range_end() {} virtual bool reverse_sorted() = 0; virtual bool unique_key_range() { return false; } enum { QS_TYPE_RANGE = 0, QS_TYPE_INDEX_MERGE = 1, QS_TYPE_RANGE_DESC = 2, QS_TYPE_ROR_INTERSECT = 4, QS_TYPE_ROR_UNION = 5, QS_TYPE_GROUP_MIN_MAX = 6 }; /* Get type of this quick select - one of the QS_TYPE_* values */ virtual int get_type() = 0; /* Initialize this quick select as a merged scan inside a ROR-union or a ROR- intersection scan. The caller must not additionally call init() if this function is called. SYNOPSIS init_ror_merged_scan() reuse_handler If true, the quick select may use table->handler, otherwise it must create and use a separate handler object. RETURN 0 Ok other Error */ virtual int init_ror_merged_scan(bool) { assert(0); return 1; } /* Save ROWID of last retrieved row in file->ref. This used in ROR-merging. */ virtual void save_last_pos(){}; /* Append comma-separated list of keys this quick select uses to key_names; append comma-separated list of corresponding used lengths to used_lengths. This is used by select_describe. */ virtual void add_keys_and_lengths(String *key_names, String *used_lengths)=0; /* Append text representation of quick select structure (what and how is merged) to str. The result is added to "Extra" field in EXPLAIN output. This function is implemented only by quick selects that merge other quick selects output and/or can produce output suitable for merging. */ virtual void add_info_string(String *) {}; /* Return 1 if any index used by this quick select uses field which is marked in passed bitmap. */ virtual bool is_keys_used(const MY_BITMAP *fields); /* rowid of last row retrieved by this quick select. This is used only when doing ROR-index_merge selects */ unsigned char *last_rowid; /* Table record buffer used by this quick select. */ unsigned char *record; }; struct st_qsel_param; class PARAM; class SEL_ARG; /* MRR range sequence, array implementation: sequence traversal context. */ typedef struct st_quick_range_seq_ctx { QUICK_RANGE **first; QUICK_RANGE **cur; QUICK_RANGE **last; } QUICK_RANGE_SEQ_CTX; range_seq_t quick_range_seq_init(void *init_param, uint32_t n_ranges, uint32_t flags); uint32_t quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range); /* Quick select that does a range scan on a single key. The records are returned in key order. */ class QUICK_RANGE_SELECT : public QUICK_SELECT_I { protected: handler *file; DYNAMIC_ARRAY ranges; /* ordered array of range ptrs */ /* Members to deal with case when this quick select is a ROR-merged scan */ bool in_ror_merged_scan; MY_BITMAP column_bitmap, *save_read_set, *save_write_set; bool free_file; /* TRUE <=> this->file is "owned" by this quick select */ /* Range pointers to be used when not using MRR interface */ QUICK_RANGE **cur_range; /* current element in ranges */ QUICK_RANGE *last_range; /* Members needed to use the MRR interface */ QUICK_RANGE_SEQ_CTX qr_traversal_ctx; public: uint32_t mrr_flags; /* Flags to be used with MRR interface */ protected: uint32_t mrr_buf_size; /* copy from session->variables.read_rnd_buff_size */ HANDLER_BUFFER *mrr_buf_desc; /* the handler buffer */ /* Info about index we're scanning */ KEY_PART *key_parts; KEY_PART_INFO *key_part_info; bool dont_free; /* Used by QUICK_SELECT_DESC */ int cmp_next(QUICK_RANGE *range); int cmp_prev(QUICK_RANGE *range); bool row_in_ranges(); public: MEM_ROOT alloc; QUICK_RANGE_SELECT(Session *session, Table *table,uint32_t index_arg,bool no_alloc, MEM_ROOT *parent_alloc, bool *create_err); ~QUICK_RANGE_SELECT(); int init(); int reset(void); int get_next(); void range_end(); int get_next_prefix(uint32_t prefix_length, key_part_map keypart_map, unsigned char *cur_prefix); bool reverse_sorted() { return 0; } bool unique_key_range(); int init_ror_merged_scan(bool reuse_handler); void save_last_pos(); int get_type() { return QS_TYPE_RANGE; } void add_keys_and_lengths(String *key_names, String *used_lengths); void add_info_string(String *str); private: /* Used only by QUICK_SELECT_DESC */ QUICK_RANGE_SELECT(const QUICK_RANGE_SELECT& org) : QUICK_SELECT_I() { memmove(this, &org, sizeof(*this)); /* Use default MRR implementation for reverse scans. No table engine currently can do an MRR scan with output in reverse index order. */ mrr_buf_desc= NULL; mrr_flags |= HA_MRR_USE_DEFAULT_IMPL; mrr_buf_size= 0; } friend class TRP_ROR_INTERSECT; friend QUICK_RANGE_SELECT *get_quick_select_for_ref(Session *session, Table *table, struct st_table_ref *ref, ha_rows records); friend bool get_quick_keys(PARAM *param, QUICK_RANGE_SELECT *quick, KEY_PART *key, SEL_ARG *key_tree, unsigned char *min_key, uint32_t min_key_flag, unsigned char *max_key, uint32_t max_key_flag); friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint32_t idx, SEL_ARG *key_tree, uint32_t mrr_flags, uint32_t mrr_buf_size, MEM_ROOT *alloc); friend class QUICK_SELECT_DESC; friend class QUICK_INDEX_MERGE_SELECT; friend class QUICK_ROR_INTERSECT_SELECT; friend class QUICK_GROUP_MIN_MAX_SELECT; friend uint32_t quick_range_seq_next(range_seq_t rseq, KEY_MULTI_RANGE *range); friend range_seq_t quick_range_seq_init(void *init_param, uint32_t n_ranges, uint32_t flags); friend void select_describe(JOIN *join, bool need_tmp_table, bool need_order, bool distinct,const char *message); }; /* QUICK_INDEX_MERGE_SELECT - index_merge access method quick select. QUICK_INDEX_MERGE_SELECT uses * QUICK_RANGE_SELECTs to get rows * Unique class to remove duplicate rows INDEX MERGE OPTIMIZER Current implementation doesn't detect all cases where index_merge could be used, in particular: * index_merge will never be used if range scan is possible (even if range scan is more expensive) * index_merge+'using index' is not supported (this the consequence of the above restriction) * If WHERE part contains complex nested AND and OR conditions, some ways to retrieve rows using index_merge will not be considered. The choice of read plan may depend on the order of conjuncts/disjuncts in WHERE part of the query, see comments near imerge_list_or_list and SEL_IMERGE::or_sel_tree_with_checks functions for details. * There is no "index_merge_ref" method (but index_merge on non-first table in join is possible with 'range checked for each record'). See comments around SEL_IMERGE class and test_quick_select for more details. ROW RETRIEVAL ALGORITHM index_merge uses Unique class for duplicates removal. index_merge takes advantage of Clustered Primary Key (CPK) if the table has one. The index_merge algorithm consists of two phases: Phase 1 (implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique): prepare() { activate 'index only'; while(retrieve next row for non-CPK scan) { if (there is a CPK scan and row will be retrieved by it) skip this row; else put its rowid into Unique; } deactivate 'index only'; } Phase 2 (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next calls): fetch() { retrieve all rows from row pointers stored in Unique; free Unique; retrieve all rows for CPK scan; } */ class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I { public: QUICK_INDEX_MERGE_SELECT(Session *session, Table *table); ~QUICK_INDEX_MERGE_SELECT(); int init(); int reset(void); int get_next(); bool reverse_sorted() { return false; } bool unique_key_range() { return false; } int get_type() { return QS_TYPE_INDEX_MERGE; } void add_keys_and_lengths(String *key_names, String *used_lengths); void add_info_string(String *str); bool is_keys_used(const MY_BITMAP *fields); bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); /* range quick selects this index_merge read consists of */ List quick_selects; /* quick select that uses clustered primary key (NULL if none) */ QUICK_RANGE_SELECT* pk_quick_select; /* true if this select is currently doing a clustered PK scan */ bool doing_pk_scan; MEM_ROOT alloc; Session *session; int read_keys_and_merge(); /* used to get rows collected in Unique */ READ_RECORD read_record; }; /* Rowid-Ordered Retrieval (ROR) index intersection quick select. This quick select produces intersection of row sequences returned by several QUICK_RANGE_SELECTs it "merges". All merged QUICK_RANGE_SELECTs must return rowids in rowid order. QUICK_ROR_INTERSECT_SELECT will return rows in rowid order, too. All merged quick selects retrieve {rowid, covered_fields} tuples (not full table records). QUICK_ROR_INTERSECT_SELECT retrieves full records if it is not being used by QUICK_ROR_INTERSECT_SELECT and all merged quick selects together don't cover needed all fields. If one of the merged quick selects is a Clustered PK range scan, it is used only to filter rowid sequence produced by other merged quick selects. */ class QUICK_ROR_INTERSECT_SELECT : public QUICK_SELECT_I { public: QUICK_ROR_INTERSECT_SELECT(Session *session, Table *table, bool retrieve_full_rows, MEM_ROOT *parent_alloc); ~QUICK_ROR_INTERSECT_SELECT(); int init(); int reset(void); int get_next(); bool reverse_sorted() { return false; } bool unique_key_range() { return false; } int get_type() { return QS_TYPE_ROR_INTERSECT; } void add_keys_and_lengths(String *key_names, String *used_lengths); void add_info_string(String *str); bool is_keys_used(const MY_BITMAP *fields); int init_ror_merged_scan(bool reuse_handler); bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range); /* Range quick selects this intersection consists of, not including cpk_quick. */ List quick_selects; /* Merged quick select that uses Clustered PK, if there is one. This quick select is not used for row retrieval, it is used for row retrieval. */ QUICK_RANGE_SELECT *cpk_quick; MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */ Session *session; /* current thread */ bool need_to_fetch_row; /* if true, do retrieve full table records. */ /* in top-level quick select, true if merged scans where initialized */ bool scans_inited; }; /* Rowid-Ordered Retrieval index union select. This quick select produces union of row sequences returned by several quick select it "merges". All merged quick selects must return rowids in rowid order. QUICK_ROR_UNION_SELECT will return rows in rowid order, too. All merged quick selects are set not to retrieve full table records. ROR-union quick select always retrieves full records. */ class QUICK_ROR_UNION_SELECT : public QUICK_SELECT_I { public: QUICK_ROR_UNION_SELECT(Session *session, Table *table); ~QUICK_ROR_UNION_SELECT(); int init(); int reset(void); int get_next(); bool reverse_sorted() { return false; } bool unique_key_range() { return false; } int get_type() { return QS_TYPE_ROR_UNION; } void add_keys_and_lengths(String *key_names, String *used_lengths); void add_info_string(String *str); bool is_keys_used(const MY_BITMAP *fields); bool push_quick_back(QUICK_SELECT_I *quick_sel_range); List quick_selects; /* Merged quick selects */ QUEUE queue; /* Priority queue for merge operation */ MEM_ROOT alloc; /* Memory pool for this and merged quick selects data. */ Session *session; /* current thread */ unsigned char *cur_rowid; /* buffer used in get_next() */ unsigned char *prev_rowid; /* rowid of last row returned by get_next() */ bool have_prev_rowid; /* true if prev_rowid has valid data */ uint32_t rowid_length; /* table rowid length */ private: bool scans_inited; }; extern "C" int quick_ror_union_select_queue_cmp(void *arg, unsigned char *val1, unsigned char *val2); /* Index scan for GROUP-BY queries with MIN/MAX aggregate functions. This class provides a specialized index access method for GROUP-BY queries of the forms: SELECT A_1,...,A_k, [B_1,...,B_m], [MIN(C)], [MAX(C)] FROM T WHERE [RNG(A_1,...,A_p ; where p <= k)] [AND EQ(B_1,...,B_m)] [AND PC(C)] [AND PA(A_i1,...,A_iq)] GROUP BY A_1,...,A_k; or SELECT DISTINCT A_i1,...,A_ik FROM T WHERE [RNG(A_1,...,A_p ; where p <= k)] [AND PA(A_i1,...,A_iq)]; where all selected fields are parts of the same index. The class of queries that can be processed by this quick select is fully specified in the description of get_best_trp_group_min_max() in opt_range.cc. The get_next() method directly produces result tuples, thus obviating the need to call end_send_group() because all grouping is already done inside get_next(). Since one of the requirements is that all select fields are part of the same index, this class produces only index keys, and not complete records. */ class QUICK_GROUP_MIN_MAX_SELECT : public QUICK_SELECT_I { private: handler *file; /* The handler used to get data. */ JOIN *join; /* Descriptor of the current query */ KEY *index_info; /* The index chosen for data access */ unsigned char *record; /* Buffer where the next record is returned. */ unsigned char *tmp_record; /* Temporary storage for next_min(), next_max(). */ unsigned char *group_prefix; /* Key prefix consisting of the GROUP fields. */ uint32_t group_prefix_len; /* Length of the group prefix. */ uint32_t group_key_parts; /* A number of keyparts in the group prefix */ unsigned char *last_prefix; /* Prefix of the last group for detecting EOF. */ bool have_min; /* Specify whether we are computing */ bool have_max; /* a MIN, a MAX, or both. */ bool seen_first_key; /* Denotes whether the first key was retrieved.*/ KEY_PART_INFO *min_max_arg_part; /* The keypart of the only argument field */ /* of all MIN/MAX functions. */ uint32_t min_max_arg_len; /* The length of the MIN/MAX argument field */ unsigned char *key_infix; /* Infix of constants from equality predicates. */ uint32_t key_infix_len; DYNAMIC_ARRAY min_max_ranges; /* Array of range ptrs for the MIN/MAX field. */ uint32_t real_prefix_len; /* Length of key prefix extended with key_infix. */ uint32_t real_key_parts; /* A number of keyparts in the above value. */ List *min_functions; List *max_functions; List_iterator *min_functions_it; List_iterator *max_functions_it; public: /* The following two members are public to allow easy access from TRP_GROUP_MIN_MAX::make_quick() */ MEM_ROOT alloc; /* Memory pool for this and quick_prefix_select data. */ QUICK_RANGE_SELECT *quick_prefix_select;/* For retrieval of group prefixes. */ private: int next_prefix(); int next_min_in_range(); int next_max_in_range(); int next_min(); int next_max(); void update_min_result(); void update_max_result(); public: QUICK_GROUP_MIN_MAX_SELECT(Table *table, JOIN *join, bool have_min, bool have_max, KEY_PART_INFO *min_max_arg_part, uint32_t group_prefix_len, uint32_t group_key_parts, uint32_t used_key_parts, KEY *index_info, uint use_index, double read_cost, ha_rows records, uint key_infix_len, unsigned char *key_infix, MEM_ROOT *parent_alloc); ~QUICK_GROUP_MIN_MAX_SELECT(); bool add_range(SEL_ARG *sel_range); void update_key_stat(); void adjust_prefix_ranges(); bool alloc_buffers(); int init(); int reset(); int get_next(); bool reverse_sorted() { return false; } bool unique_key_range() { return false; } int get_type() { return QS_TYPE_GROUP_MIN_MAX; } void add_keys_and_lengths(String *key_names, String *used_lengths); }; class QUICK_SELECT_DESC: public QUICK_RANGE_SELECT { public: QUICK_SELECT_DESC(QUICK_RANGE_SELECT *q, uint32_t used_key_parts, bool *create_err); int get_next(); bool reverse_sorted() { return 1; } int get_type() { return QS_TYPE_RANGE_DESC; } private: bool range_reads_after_key(QUICK_RANGE *range); int reset(void) { rev_it.rewind(); return QUICK_RANGE_SELECT::reset(); } List rev_ranges; List_iterator rev_it; }; class SQL_SELECT :public Sql_alloc { public: QUICK_SELECT_I *quick; // If quick-select used COND *cond; // where condition Table *head; IO_CACHE file; // Positions to used records ha_rows records; // Records in use if read from file double read_time; // Time to read rows key_map quick_keys; // Possible quick keys key_map needed_reg; // Possible quick keys after prev tables. table_map const_tables,read_tables; bool free_cond; SQL_SELECT(); ~SQL_SELECT(); void cleanup(); bool check_quick(Session *session, bool force_quick_range, ha_rows limit); bool skip_record(); int test_quick_select(Session *session, key_map keys, table_map prev_tables, ha_rows limit, bool force_quick_range, bool ordered_output); }; QUICK_RANGE_SELECT *get_quick_select_for_ref(Session *session, Table *table, struct st_table_ref *ref, ha_rows records); uint32_t get_index_for_order(Table *table, order_st *order, ha_rows limit); #endif