219
211
if ((subselect= select_lex->master_unit()->item))
221
bool do_semijoin= !test(session->variables.optimizer_switch &
222
OPTIMIZER_SWITCH_NO_SEMIJOIN);
223
213
if (subselect->substype() == Item_subselect::IN_SUBS)
224
214
in_subs= (Item_in_subselect*)subselect;
227
Check if we're in subquery that is a candidate for flattening into a
228
semi-join (which is done done in flatten_subqueries()). The
230
1. Subquery predicate is an IN/=ANY subq predicate
231
2. Subquery is a single SELECT (not a UNION)
232
3. Subquery does not have GROUP BY or order_st BY
233
4. Subquery does not use aggregate functions or HAVING
234
5. Subquery predicate is at the AND-top-level of ON/WHERE clause
235
6. No execution method was already chosen (by a prepared statement).
237
(*). We are not in a subquery of a single table UPDATE/DELETE that
238
doesn't have a JOIN (TODO: We should handle this at some
239
point by switching to multi-table UPDATE/DELETE)
241
(**). We're not in a confluent table-less subquery, like
245
!select_lex->master_unit()->first_select()->next_select() && // 2
246
!select_lex->group_list.elements && !order && // 3
247
!having && !select_lex->with_sum_func && // 4
248
session->session_marker && // 5
249
select_lex->outer_select()->join && // (*)
250
select_lex->master_unit()->first_select()->leaf_tables && // (**)
252
in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6
255
if (!in_subs->left_expr->fixed &&
256
in_subs->left_expr->fix_fields(session, &in_subs->left_expr))
261
Check that the right part of the subselect contains no more than one
262
column. E.g. in SELECT 1 IN (SELECT * ..) the right part is (SELECT * ...)
264
if (subselect->substype() == Item_subselect::IN_SUBS &&
265
(select_lex->item_list.elements !=
266
((Item_in_subselect*)subselect)->left_expr->cols()))
268
my_error(ER_OPERAND_COLUMNS, MYF(0), ((Item_in_subselect*)subselect)->left_expr->cols());
273
/* Register the subquery for further processing */
274
select_lex->outer_select()->join->sj_subselects.append(session->mem_root, in_subs);
275
in_subs->expr_join_nest= (TableList*)session->session_marker;
279
217
bool do_materialize= !test(session->variables.optimizer_switch &
280
218
OPTIMIZER_SWITCH_NO_MATERIALIZATION);
1765
Convert candidate subquery predicates to semi-joins
1768
JOIN::flatten_subqueries()
1771
Convert candidate subquery predicates to semi-joins.
1777
bool JOIN::flatten_subqueries()
1779
Item_in_subselect **in_subq;
1780
Item_in_subselect **in_subq_end;
1782
if (sj_subselects.elements() == 0)
1785
/* 1. Fix children subqueries */
1786
for (in_subq= sj_subselects.front(), in_subq_end= sj_subselects.back();
1787
in_subq != in_subq_end; in_subq++)
1789
JOIN *child_join= (*in_subq)->unit->first_select()->join;
1790
child_join->outer_tables = child_join->tables;
1791
if (child_join->flatten_subqueries())
1793
(*in_subq)->sj_convert_priority=
1794
(*in_subq)->is_correlated * MAX_TABLES + child_join->outer_tables;
1797
bool outer_join_disable_semi_join= false;
1799
* Temporary measure: disable semi-joins when they are together with outer
1802
* @see LP Bug #314911
1804
for (TableList *tbl= select_lex->leaf_tables; tbl; tbl=tbl->next_leaf)
1806
TableList *embedding= tbl->embedding;
1807
if (tbl->on_expr || (tbl->embedding && !(embedding->sj_on_expr &&
1808
!embedding->embedding)))
1810
in_subq= sj_subselects.front();
1811
outer_join_disable_semi_join= true;
1815
if (! outer_join_disable_semi_join)
1818
2. Pick which subqueries to convert:
1819
sort the subquery array
1820
- prefer correlated subqueries over uncorrelated;
1821
- prefer subqueries that have greater number of outer tables;
1823
sj_subselects.sort(subq_sj_candidate_cmp);
1824
// #tables-in-parent-query + #tables-in-subquery < MAX_TABLES
1825
/* Replace all subqueries to be flattened with Item_int(1) */
1826
for (in_subq= sj_subselects.front();
1827
in_subq != in_subq_end &&
1828
tables + ((*in_subq)->sj_convert_priority % MAX_TABLES) < MAX_TABLES;
1831
if (replace_where_subcondition(this, *in_subq, new Item_int(1), false))
1835
for (in_subq= sj_subselects.front();
1836
in_subq != in_subq_end &&
1837
tables + ((*in_subq)->sj_convert_priority % MAX_TABLES) < MAX_TABLES;
1840
if (convert_subq_to_sj(this, *in_subq))
1845
/* 3. Finalize those we didn't convert */
1846
for (; in_subq!= in_subq_end; in_subq++)
1848
JOIN *child_join= (*in_subq)->unit->first_select()->join;
1849
Item_subselect::trans_res res;
1850
(*in_subq)->changed= 0;
1851
(*in_subq)->fixed= 0;
1852
res= (*in_subq)->select_transformer(child_join);
1853
if (res == Item_subselect::RES_ERROR)
1856
(*in_subq)->changed= 1;
1857
(*in_subq)->fixed= 1;
1859
Item *substitute= (*in_subq)->substitution;
1860
bool do_fix_fields= !(*in_subq)->substitution->fixed;
1861
if (replace_where_subcondition(this, *in_subq, substitute, do_fix_fields))
1864
//if ((*in_subq)->fix_fields(session, (*in_subq)->ref_ptr))
1867
sj_subselects.clear();
1872
1699
Setup for execution all subqueries of a query, for which the optimizer
1873
1700
chose hash semi-join.
6529
Setup the strategies to eliminate semi-join duplicates.
6532
setup_semijoin_dups_elimination()
6533
join Join to process
6534
options Join options (needed to see if join buffering will be
6536
no_jbuf_after Another bit of information re where join buffering will
6540
Setup the strategies to eliminate semi-join duplicates. ATM there are 3
6543
1. DuplicateWeedout (use of temptable to remove duplicates based on rowids
6544
of row combinations)
6545
2. FirstMatch (pick only the 1st matching row combination of inner tables)
6546
3. InsideOut (scanning the sj-inner table in a way that groups duplicates
6547
together and picking the 1st one)
6549
The join order has "duplicate-generating ranges", and every range is
6550
served by one strategy or a combination of FirstMatch with with some
6553
"Duplicate-generating range" is defined as a range within the join order
6554
that contains all of the inner tables of a semi-join. All ranges must be
6555
disjoint, if tables of several semi-joins are interleaved, then the ranges
6556
are joined together, which is equivalent to converting
6557
SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 )
6559
SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...)
6562
Applicability conditions are as follows:
6564
DuplicateWeedout strategy
6565
~~~~~~~~~~~~~~~~~~~~~~~~~
6567
(ot|nt)* [ it ((it|ot|nt)* (it|ot))] (nt)*
6568
+------+ +=========================+ +---+
6571
(1) - Prefix of OuterTables (those that participate in
6572
IN-equality and/or are correlated with subquery) and outer
6573
Noncorrelated Tables.
6574
(2) - The handled range. The range starts with the first sj-inner
6575
table, and covers all sj-inner and outer tables
6576
Within the range, Inner, Outer, outer Noncorrelated tables
6577
may follow in any order.
6578
(3) - The suffix of outer Noncorrelated tables.
6583
(ot|nt)* [ it ((it|nt)* it) ] (nt)*
6584
+------+ +==================+ +---+
6587
(1) - Prefix of outer and non-correlated tables
6588
(2) - The handled range, which may contain only inner and
6589
non-correlated tables.
6590
(3) - The suffix of outer Noncorrelated tables.
6595
(ot|ct|nt) [ insideout_tbl (ot|nt|it)* it ] (ot|nt)*
6596
+--------+ +===========+ +=============+ +------+
6599
(1) - Prefix that may contain any outer tables. The prefix must contain
6600
all the non-trivially correlated outer tables. (non-trivially means
6601
that the correlation is not just through the IN-equality).
6603
(2) - Inner table for which the InsideOut scan is performed.
6605
(3) - The remainder of the duplicate-generating range. It is served by
6606
application of FirstMatch strategy, with the exception that
6607
outer IN-correlated tables are considered to be non-correlated.
6609
(4) - THe suffix of outer and outer non-correlated tables.
6611
If several strategies are applicable, their relative priorities are:
6616
This function walks over the join order and sets up the strategies by
6617
setting appropriate members in join_tab structures.
6621
true Out of memory error
6623
static int setup_semijoin_dups_elimination(JOIN *join, uint64_t options, uint32_t no_jbuf_after)
6625
table_map cur_map= join->const_table_map | PSEUDO_TABLE_BITS;
6628
0 - invalid (EOF marker),
6630
2 - Temptable (maybe confluent),
6631
3 - Temptable with join buffering
6634
uint32_t start_idx; /* Left range bound */
6635
uint32_t end_idx; /* Right range bound */
6637
For Temptable strategy: Bitmap of all outer and correlated tables from
6638
all involved join nests.
6640
table_map outer_tables;
6641
} dups_ranges [MAX_TABLES];
6643
TableList *emb_insideout_nest= NULL;
6644
table_map emb_sj_map= 0; /* A bitmap of sj-nests (that is, their sj-inner
6645
tables) whose ranges we're in */
6646
table_map emb_outer_tables= 0; /* sj-outer tables for those sj-nests */
6647
table_map range_start_map= 0; /* table_map at current range start */
6648
bool dealing_with_jbuf= false; /* true <=> table within cur range uses join buf */
6653
First pass: locate the duplicate-generating ranges and pick the strategies.
6655
for (i=join->const_tables ; i < join->tables ; i++)
6657
JoinTable *tab=join->join_tab+i;
6658
Table *table=tab->table;
6659
cur_map |= table->map;
6661
if (tab->emb_sj_nest) // Encountered an sj-inner table
6665
dups_ranges[cur_range].start_idx= i;
6666
range_start_map= cur_map & ~table->map;
6668
Remember if this is a possible start of range that is covered by
6669
the InsideOut strategy (the reason that it is not covered could
6670
be that it overlaps with anther semi-join's range. we don't
6671
support InsideOut for joined ranges)
6673
if (join->best_positions[i].use_insideout_scan)
6674
emb_insideout_nest= tab->emb_sj_nest;
6677
emb_sj_map |= tab->emb_sj_nest->sj_inner_tables;
6678
emb_outer_tables |= tab->emb_sj_nest->nested_join->sj_depends_on;
6680
if (tab->emb_sj_nest != emb_insideout_nest)
6683
Two different semi-joins interleave. This cannot be handled by
6686
emb_insideout_nest= NULL;
6690
if (emb_sj_map) /* We're in duplicate-generating range */
6692
if (i != join->const_tables && !(options & SELECT_NO_JOIN_CACHE) &&
6693
tab->type == JT_ALL && tab->use_quick != 2 && !tab->first_inner &&
6694
i <= no_jbuf_after && !dealing_with_jbuf)
6697
This table uses join buffering, which makes use of FirstMatch or
6698
InsideOut strategies impossible for the current and (we assume)
6699
preceding duplicate-producing ranges.
6700
That is, for the join order:
6702
x x [ x x] x [x x x] x [x x X* x] x
6704
+-----+ +-----+ | join buffering use
6707
we'll have to remove r1 and r2 and use duplicate-elimination
6708
strategy that spans all the tables, starting from the very 1st
6711
dealing_with_jbuf= true;
6712
emb_insideout_nest= false;
6715
Absorb all preceding duplicate-eliminating ranges. Their strategies
6718
for (int prev_range= 0; prev_range < cur_range; prev_range++)
6720
dups_ranges[cur_range].outer_tables |=
6721
dups_ranges[prev_range].outer_tables;
6723
dups_ranges[0].start_idx= 0; /* Will need to start from the 1st table */
6724
dups_ranges[0].outer_tables= dups_ranges[cur_range].outer_tables;
6729
Check if we are at the end of duplicate-producing range. We are if
6731
1. It's an InsideOut range (which presumes all correlated tables are
6732
in the prefix), and all inner tables are in the join order prefix,
6734
2. It's a DuplicateElimination range (possibly covering several
6735
SJ-nests), and all inner, outer, and correlated tables of all
6736
sj-nests are in the join order prefix.
6738
bool end_of_range= false;
6739
if (emb_insideout_nest &&
6740
bitmap_covers(cur_map, emb_insideout_nest->sj_inner_tables))
6742
/* Save that this range is handled with InsideOut: */
6743
dups_ranges[cur_range].strategy= 1;
6746
else if (bitmap_covers(cur_map, emb_outer_tables | emb_sj_map))
6749
This is a complete range to be handled with either DuplicateWeedout
6752
dups_ranges[cur_range].strategy= dealing_with_jbuf? 3 : 2;
6754
This will hold tables from within the range that need to be put
6755
into the join buffer before we can use the FirstMatch on its tail.
6757
dups_ranges[cur_range].outer_tables= emb_outer_tables &
6764
dups_ranges[cur_range].end_idx= i+1;
6765
emb_sj_map= emb_outer_tables= 0;
6766
emb_insideout_nest= NULL;
6767
dealing_with_jbuf= false;
6768
dups_ranges[++cur_range].strategy= 0;
6773
Session *session= join->session;
6774
SemiJoinTable **next_sjtbl_ptr= &join->sj_tmp_tables;
6776
Second pass: setup the chosen strategies
6778
for (int j= 0; j < cur_range; j++)
6780
JoinTable *tab=join->join_tab + dups_ranges[j].start_idx;
6782
if (dups_ranges[j].strategy == 1) // InsideOut strategy
6784
tab->insideout_match_tab= join->join_tab + dups_ranges[j].end_idx - 1;
6787
else // DuplicateWeedout strategy
6789
SemiJoinTable::TAB sjtabs[MAX_TABLES];
6790
table_map weed_cur_map= join->const_table_map | PSEUDO_TABLE_BITS;
6791
uint32_t jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes)
6792
uint32_t jt_null_bits= 0; // # null bits in tuple bytes
6793
SemiJoinTable::TAB *last_tab= sjtabs;
6794
uint32_t rowid_keep_flags= JoinTable::CALL_POSITION | JoinTable::KEEP_ROWID;
6795
JoinTable *last_outer_tab= tab - 1;
6797
Walk through the range and remember
6798
- tables that need their rowids to be put into temptable
6799
- the last outer table
6801
for (; tab < join->join_tab + dups_ranges[j].end_idx; tab++)
6803
if (sj_table_is_included(join, tab))
6805
last_tab->join_tab= tab;
6806
last_tab->rowid_offset= jt_rowid_offset;
6807
jt_rowid_offset += tab->table->file->ref_length;
6808
if (tab->table->maybe_null)
6810
last_tab->null_byte= jt_null_bits / 8;
6811
last_tab->null_bit= jt_null_bits++;
6814
tab->table->prepare_for_position();
6815
tab->rowid_keep_flags= rowid_keep_flags;
6817
weed_cur_map |= tab->table->map;
6818
if (!tab->emb_sj_nest && bitmap_covers(weed_cur_map,
6819
dups_ranges[j].outer_tables))
6820
last_outer_tab= tab;
6823
if (jt_rowid_offset) /* Temptable has at least one rowid */
6825
SemiJoinTable *sjtbl;
6826
uint32_t tabs_size= (last_tab - sjtabs) * sizeof(SemiJoinTable::TAB);
6827
if (!(sjtbl= (SemiJoinTable*)session->alloc(sizeof(SemiJoinTable))) ||
6828
!(sjtbl->tabs= (SemiJoinTable::TAB*) session->alloc(tabs_size)))
6830
memcpy(sjtbl->tabs, sjtabs, tabs_size);
6831
sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs);
6832
sjtbl->rowid_len= jt_rowid_offset;
6833
sjtbl->null_bits= jt_null_bits;
6834
sjtbl->null_bytes= (jt_null_bits + 7)/8;
6836
*next_sjtbl_ptr= sjtbl;
6837
next_sjtbl_ptr= &(sjtbl->next);
6841
sjtbl->createTable(session,
6845
join->join_tab[dups_ranges[j].start_idx].flush_weedout_table= sjtbl;
6846
join->join_tab[dups_ranges[j].end_idx - 1].check_weed_out_table= sjtbl;
6848
tab= last_outer_tab + 1;
6849
jump_to= last_outer_tab;
6852
/* Create the FirstMatch tail */
6853
for (; tab < join->join_tab + dups_ranges[j].end_idx; tab++)
6855
if (tab->emb_sj_nest)
6856
tab->do_firstmatch= jump_to;
6864
static void cleanup_sj_tmp_tables(JOIN *join)
6866
for (SemiJoinTable *sj_tbl= join->sj_tmp_tables; sj_tbl;
6867
sj_tbl= sj_tbl->next)
6869
if (sj_tbl->tmp_table)
6871
sj_tbl->tmp_table->free_tmp_table(join->session);
6874
join->sj_tmp_tables= NULL;
6878
6222
Create a condition for a const reference and add this to the
6879
6223
currenct select for the table.
6913
6257
return(error ? true : false);
6917
@brief Replaces an expression destructively inside the expression tree of
6920
@note Because of current requirements for semijoin flattening, we do not
6921
need to recurse here, hence this function will only examine the top-level
6922
AND conditions. (see JOIN::prepare, comment above the line
6923
'if (do_materialize)'
6925
@param join The top-level query.
6926
@param old_cond The expression to be replaced.
6927
@param new_cond The expression to be substituted.
6928
@param do_fix_fields If true, Item::fix_fields(Session*, Item**) is called for
6930
@return <code>true</code> if there was an error, <code>false</code> if
6933
static bool replace_where_subcondition(JOIN *join, Item *old_cond,
6934
Item *new_cond, bool do_fix_fields)
6936
if (join->conds == old_cond) {
6937
join->conds= new_cond;
6939
new_cond->fix_fields(join->session, &join->conds);
6943
if (join->conds->type() == Item::COND_ITEM) {
6944
List_iterator<Item> li(*((Item_cond*)join->conds)->argument_list());
6946
while ((item= li++))
6947
if (item == old_cond)
6949
li.replace(new_cond);
6951
new_cond->fix_fields(join->session, li.ref());
6960
Pull tables out of semi-join nests, if possible
6963
pull_out_semijoin_tables()
6964
join The join where to do the semi-join flattening
6967
Try to pull tables out of semi-join nests.
6970
When this function is called, the join may have several semi-join nests
6971
(possibly within different semi-join nests), but it is guaranteed that
6972
one semi-join nest does not contain another.
6975
A table can be pulled out of the semi-join nest if
6976
- It is a constant table
6980
* Pulled out tables have JoinTable::emb_sj_nest == NULL (like the outer
6982
* Tables that were not pulled out have JoinTable::emb_sj_nest.
6983
* Semi-join nests TableList::sj_inner_tables
6985
This operation is (and should be) performed at each PS execution since
6986
tables may become/cease to be constant across PS reexecutions.
6990
1 - Out of memory error
6992
static int pull_out_semijoin_tables(JOIN *join)
6995
List_iterator<TableList> sj_list_it(join->select_lex->sj_nests);
6997
/* Try pulling out of the each of the semi-joins */
6998
while ((sj_nest= sj_list_it++))
7000
/* Action #1: Mark the constant tables to be pulled out */
7001
table_map pulled_tables= 0;
7003
List_iterator<TableList> child_li(sj_nest->nested_join->join_list);
7005
while ((tbl= child_li++))
7009
tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest;
7010
if (tbl->table->map & join->const_table_map)
7012
pulled_tables |= tbl->table->map;
7018
Action #2: Find which tables we can pull out based on
7019
update_ref_and_keys() data. Note that pulling one table out can allow
7020
us to pull out some other tables too.
7022
bool pulled_a_table;
7025
pulled_a_table= false;
7027
while ((tbl= child_li++))
7029
if (tbl->table && !(pulled_tables & tbl->table->map))
7031
if (find_eq_ref_candidate(tbl->table,
7032
sj_nest->nested_join->used_tables &
7035
pulled_a_table= true;
7036
pulled_tables |= tbl->table->map;
7040
} while (pulled_a_table);
7043
if ((sj_nest)->nested_join->used_tables == pulled_tables)
7045
(sj_nest)->sj_inner_tables= 0;
7046
while ((tbl= child_li++))
7049
tbl->table->reginfo.join_tab->emb_sj_nest= NULL;
7054
/* Record the bitmap of inner tables, mark the inner tables */
7055
table_map inner_tables=(sj_nest)->nested_join->used_tables &
7057
(sj_nest)->sj_inner_tables= inner_tables;
7058
while ((tbl= child_li++))
7062
if (inner_tables & tbl->table->map)
7063
tbl->table->reginfo.join_tab->emb_sj_nest= (sj_nest);
7065
tbl->table->reginfo.join_tab->emb_sj_nest= NULL;
7074
SemiJoinDuplicateElimination: Weed out duplicate row combinations
7077
do_sj_dups_weedout()
7081
1 The row combination is a duplicate (discard it)
7082
0 The row combination is not a duplicate (continue)
7084
static int do_sj_dups_weedout(Session *session, SemiJoinTable *sjtbl)
7087
SemiJoinTable::TAB *tab= sjtbl->tabs;
7088
SemiJoinTable::TAB *tab_end= sjtbl->tabs_end;
7089
unsigned char *ptr= sjtbl->tmp_table->record[0] + 1;
7090
unsigned char *nulls_ptr= ptr;
7092
/* Put the the rowids tuple into table->record[0]: */
7094
// 1. Store the length
7095
if (((Field_varstring*)(sjtbl->tmp_table->field[0]))->length_bytes == 1)
7097
*ptr= (unsigned char)(sjtbl->rowid_len + sjtbl->null_bytes);
7102
int2store(ptr, sjtbl->rowid_len + sjtbl->null_bytes);
7106
// 2. Zero the null bytes
7107
if (sjtbl->null_bytes)
7109
memset(ptr, 0, sjtbl->null_bytes);
7110
ptr += sjtbl->null_bytes;
7113
// 3. Put the rowids
7114
for (uint32_t i=0; tab != tab_end; tab++, i++)
7116
handler *h= tab->join_tab->table->file;
7117
if (tab->join_tab->table->maybe_null && tab->join_tab->table->null_row)
7119
/* It's a NULL-complemented row */
7120
*(nulls_ptr + tab->null_byte) |= tab->null_bit;
7121
memset(ptr + tab->rowid_offset, 0, h->ref_length);
7125
/* Copy the rowid value */
7126
if (tab->join_tab->rowid_keep_flags & JoinTable::CALL_POSITION)
7127
h->position(tab->join_tab->table->record[0]);
7128
memcpy(ptr + tab->rowid_offset, h->ref, h->ref_length);
7132
error= sjtbl->tmp_table->file->ha_write_row(sjtbl->tmp_table->record[0]);
7135
/* create_myisam_from_heap will generate error if needed */
7136
if (sjtbl->tmp_table->file->is_fatal_error(error, HA_CHECK_DUP) &&
7137
create_myisam_from_heap(session, sjtbl->tmp_table, sjtbl->start_recinfo,
7138
&sjtbl->recinfo, error, 1))
7140
//return (error == HA_ERR_FOUND_DUPP_KEY || error== HA_ERR_FOUND_DUPP_UNIQUE) ? 1: -1;
7146
6260
static void free_blobs(Field **ptr)
7148
6262
for (; *ptr ; ptr++)