1
/* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2
* vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
4
* Copyright (C) 2008 Sun Microsystems
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; version 2 of the License.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1
/* Copyright (C) 2000-2006 MySQL AB
3
This program is free software; you can redistribute it and/or modify
4
it under the terms of the GNU General Public License as published by
5
the Free Software Foundation; version 2 of the License.
7
This program is distributed in the hope that it will be useful,
8
but WITHOUT ANY WARRANTY; without even the implied warranty of
9
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
GNU General Public License for more details.
12
You should have received a copy of the GNU General Public License
13
along with this program; if not, write to the Free Software
14
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
23
19
Handler-calling-functions
30
#include "drizzled/my_hash.h"
31
#include "drizzled/error.h"
32
#include "drizzled/gettext.h"
33
#include "drizzled/probes.h"
34
#include "drizzled/sql_parse.h"
35
#include "drizzled/optimizer/cost_vector.h"
36
#include "drizzled/session.h"
37
#include "drizzled/sql_base.h"
38
#include "drizzled/transaction_services.h"
39
#include "drizzled/lock.h"
40
#include "drizzled/item/int.h"
41
#include "drizzled/item/empty_string.h"
42
#include "drizzled/field/timestamp.h"
43
#include "drizzled/message/table.pb.h"
44
#include "drizzled/plugin/client.h"
45
#include "drizzled/internal/my_sys.h"
46
#include "drizzled/plugin/event_observer.h"
22
#ifdef USE_PRAGMA_IMPLEMENTATION
23
#pragma implementation // gcc: Class implementation
26
#include "mysql_priv.h"
27
#include "rpl_filter.h"
28
#include <myisampack.h>
32
While we have legacy_db_type, we have this array to
33
check for dups and to find handlerton from legacy_db_type.
34
Remove when legacy_db_type is finally gone
36
st_plugin_int *hton2plugin[MAX_HA];
38
static handlerton *installed_htons[128];
40
#define BITMAP_STACKBUF_SIZE (128/8)
42
KEY_CREATE_INFO default_key_create_info= { HA_KEY_ALG_UNDEF, 0, {NullS,0}, {NullS,0} };
44
/* number of entries in handlertons[] */
46
/* number of storage engines (from handlertons[]) that support 2pc */
47
ulong total_ha_2pc= 0;
48
/* size of savepoint storage area (see ha_init) */
49
ulong savepoint_alloc_size= 0;
51
static const LEX_STRING sys_table_aliases[]=
53
{ C_STRING_WITH_LEN("INNOBASE") }, { C_STRING_WITH_LEN("INNODB") },
54
{ C_STRING_WITH_LEN("HEAP") }, { C_STRING_WITH_LEN("MEMORY") },
58
const char *ha_row_type[] = {
59
"", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE", "?","?","?"
62
const char *tx_isolation_names[] =
63
{ "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
65
TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
66
tx_isolation_names, NULL};
68
static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
69
uint known_extensions_id= 0;
73
static plugin_ref ha_default_plugin(THD *thd)
75
if (thd->variables.table_plugin)
76
return thd->variables.table_plugin;
77
return my_plugin_lock(thd, &global_system_variables.table_plugin);
82
Return the default storage engine handlerton for thread
84
@param ha_default_handlerton(thd)
85
@param thd current thread
90
handlerton *ha_default_handlerton(THD *thd)
92
plugin_ref plugin= ha_default_plugin(thd);
94
handlerton *hton= plugin_data(plugin, handlerton*);
101
Return the storage engine handlerton for the supplied name
103
@param thd current thread
104
@param name name of storage engine
107
pointer to storage engine plugin handle
109
plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name)
111
const LEX_STRING *table_alias;
115
/* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
116
if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
117
(const uchar *)name->str, name->length,
118
(const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
119
return ha_default_plugin(thd);
121
if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
123
handlerton *hton= plugin_data(plugin, handlerton *);
124
if (!(hton->flags & HTON_NOT_USER_SELECTABLE))
128
unlocking plugin immediately after locking is relatively low cost.
130
plugin_unlock(thd, plugin);
134
We check for the historical aliases.
136
for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
138
if (!my_strnncoll(&my_charset_latin1,
139
(const uchar *)name->str, name->length,
140
(const uchar *)table_alias->str, table_alias->length))
142
name= table_alias + 1;
151
plugin_ref ha_lock_engine(THD *thd, handlerton *hton)
155
st_plugin_int **plugin= hton2plugin + hton->slot;
158
return my_plugin_lock(thd, plugin);
160
return my_plugin_lock(thd, &plugin);
167
handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
171
case DB_TYPE_DEFAULT:
172
return ha_default_handlerton(thd);
174
if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
175
(plugin= ha_lock_engine(thd, installed_htons[db_type])))
176
return plugin_data(plugin, handlerton*);
178
case DB_TYPE_UNKNOWN:
185
Use other database handler if databasehandler is not compiled in.
187
handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
188
bool no_substitute, bool report_error)
190
handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
191
if (ha_storage_engine_is_enabled(hton))
198
const char *engine_name= ha_resolve_storage_engine_name(hton);
199
my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
204
switch (database_type) {
206
return ha_resolve_by_legacy_type(thd, DB_TYPE_HASH);
211
return ha_default_handlerton(thd);
215
handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
219
DBUG_ENTER("get_new_handler");
220
DBUG_PRINT("enter", ("alloc: 0x%lx", (long) alloc));
222
if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
224
if ((file= db_type->create(db_type, share, alloc)))
229
Try the default table type
230
Here the call to current_thd() is ok as we call this function a lot of
231
times but we enter this branch very seldom.
233
DBUG_RETURN(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
238
Register handler error messages for use with my_error().
246
int ha_init_errors(void)
248
#define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg)
249
const char **errmsgs;
251
/* Allocate a pointer array for the error message strings. */
252
/* Zerofill it to avoid uninitialized gaps. */
253
if (! (errmsgs= (const char**) my_malloc(HA_ERR_ERRORS * sizeof(char*),
254
MYF(MY_WME | MY_ZEROFILL))))
257
/* Set the dedicated error messages. */
258
SETMSG(HA_ERR_KEY_NOT_FOUND, ER(ER_KEY_NOT_FOUND));
259
SETMSG(HA_ERR_FOUND_DUPP_KEY, ER(ER_DUP_KEY));
260
SETMSG(HA_ERR_RECORD_CHANGED, "Update wich is recoverable");
261
SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");
262
SETMSG(HA_ERR_CRASHED, ER(ER_NOT_KEYFILE));
263
SETMSG(HA_ERR_WRONG_IN_RECORD, ER(ER_CRASHED_ON_USAGE));
264
SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory");
265
SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");
266
SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");
267
SETMSG(HA_ERR_OLD_FILE, ER(ER_OLD_KEYFILE));
268
SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update");
269
SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted");
270
SETMSG(HA_ERR_RECORD_FILE_FULL, ER(ER_RECORD_FILE_FULL));
271
SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'");
272
SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last");
273
SETMSG(HA_ERR_UNSUPPORTED, ER(ER_ILLEGAL_HA));
274
SETMSG(HA_ERR_TO_BIG_ROW, "Too big row");
275
SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option");
276
SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER(ER_DUP_UNIQUE));
277
SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset");
278
SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER(ER_WRONG_MRG_TABLE));
279
SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER(ER_CRASHED_ON_REPAIR));
280
SETMSG(HA_ERR_CRASHED_ON_USAGE, ER(ER_CRASHED_ON_USAGE));
281
SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER(ER_LOCK_WAIT_TIMEOUT));
282
SETMSG(HA_ERR_LOCK_TABLE_FULL, ER(ER_LOCK_TABLE_FULL));
283
SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER(ER_READ_ONLY_TRANSACTION));
284
SETMSG(HA_ERR_LOCK_DEADLOCK, ER(ER_LOCK_DEADLOCK));
285
SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER(ER_CANNOT_ADD_FOREIGN));
286
SETMSG(HA_ERR_NO_REFERENCED_ROW, ER(ER_NO_REFERENCED_ROW_2));
287
SETMSG(HA_ERR_ROW_IS_REFERENCED, ER(ER_ROW_IS_REFERENCED_2));
288
SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name");
289
SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size");
290
SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'");
291
SETMSG(HA_ERR_TABLE_EXIST, ER(ER_TABLE_EXISTS_ERROR));
292
SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine");
293
SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER(ER_TABLE_DEF_CHANGED));
294
SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, "FK constraint would lead to duplicate key");
295
SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER(ER_TABLE_NEEDS_UPGRADE));
296
SETMSG(HA_ERR_TABLE_READONLY, ER(ER_OPEN_AS_READONLY));
297
SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER(ER_AUTOINC_READ_FAILED));
298
SETMSG(HA_ERR_AUTOINC_ERANGE, ER(ER_WARN_DATA_OUT_OF_RANGE));
300
/* Register the error messages for use with my_error(). */
301
return my_error_register(errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
306
Unregister handler error messages.
313
static int ha_finish_errors(void)
315
const char **errmsgs;
317
/* Allocate a pointer array for the error message strings. */
318
if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST)))
320
my_free((uchar*) errmsgs, MYF(0));
325
int ha_finalize_handlerton(st_plugin_int *plugin)
327
handlerton *hton= (handlerton *)plugin->data;
328
DBUG_ENTER("ha_finalize_handlerton");
333
case SHOW_OPTION_DISABLED:
335
case SHOW_OPTION_YES:
336
if (installed_htons[hton->db_type] == hton)
337
installed_htons[hton->db_type]= NULL;
342
hton->panic(hton, HA_PANIC_CLOSE);
344
if (plugin->plugin->deinit)
347
Today we have no defined/special behavior for uninstalling
350
DBUG_PRINT("info", ("Deinitializing plugin: '%s'", plugin->name.str));
351
if (plugin->plugin->deinit(NULL))
353
DBUG_PRINT("warning", ("Plugin '%s' deinit function returned error.",
358
my_free((uchar*)hton, MYF(0));
364
int ha_initialize_handlerton(st_plugin_int *plugin)
367
DBUG_ENTER("ha_initialize_handlerton");
368
DBUG_PRINT("plugin", ("initialize plugin: '%s'", plugin->name.str));
370
hton= (handlerton *)my_malloc(sizeof(handlerton),
371
MYF(MY_WME | MY_ZEROFILL));
373
FIXME: the MY_ZEROFILL flag above doesn't zero all the bytes.
375
This was detected after adding get_backup_engine member to handlerton
376
structure. Apparently get_backup_engine was not NULL even though it was
379
bzero(hton, sizeof(hton));
380
/* Historical Requirement */
381
plugin->data= hton; // shortcut for the future
382
if (plugin->plugin->init)
384
if (plugin->plugin->init(hton))
386
sql_print_error("Plugin '%s' init function returned error.",
393
the switch below and hton->state should be removed when
394
command-line options for plugins will be implemented
396
switch (hton->state) {
399
case SHOW_OPTION_YES:
402
/* now check the db_type for conflict */
403
if (hton->db_type <= DB_TYPE_UNKNOWN ||
404
hton->db_type >= DB_TYPE_DEFAULT ||
405
installed_htons[hton->db_type])
407
int idx= (int) DB_TYPE_FIRST_DYNAMIC;
409
while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
412
if (idx == (int) DB_TYPE_DEFAULT)
414
sql_print_warning("Too many storage engines!");
417
if (hton->db_type != DB_TYPE_UNKNOWN)
418
sql_print_warning("Storage engine '%s' has conflicting typecode. "
419
"Assigning value %d.", plugin->plugin->name, idx);
420
hton->db_type= (enum legacy_db_type) idx;
422
installed_htons[hton->db_type]= hton;
423
tmp= hton->savepoint_offset;
424
hton->savepoint_offset= savepoint_alloc_size;
425
savepoint_alloc_size+= tmp;
426
hton->slot= total_ha++;
427
hton2plugin[hton->slot]=plugin;
434
hton->state= SHOW_OPTION_DISABLED;
439
This is entirely for legacy. We will create a new "disk based" hton and a
440
"memory" hton which will be configurable longterm. We should be able to
441
remove partition and myisammrg.
443
switch (hton->db_type) {
462
DBUG_ENTER("ha_init");
464
DBUG_ASSERT(total_ha < MAX_HA);
466
Check if there is a transaction-capable storage engine besides the
467
binary log (which is considered a transaction-capable storage engine in
470
opt_using_transactions= total_ha>(ulong)opt_bin_log;
471
savepoint_alloc_size+= sizeof(SAVEPOINT);
478
DBUG_ENTER("ha_end");
482
This should be eventualy based on the graceful shutdown flag.
483
So if flag is equal to HA_PANIC_CLOSE, the deallocate
486
if (ha_finish_errors())
492
static my_bool dropdb_handlerton(THD *unused1, plugin_ref plugin,
495
handlerton *hton= plugin_data(plugin, handlerton *);
496
if (hton->state == SHOW_OPTION_YES && hton->drop_database)
497
hton->drop_database(hton, (char *)path);
502
void ha_drop_database(char* path)
504
plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
508
static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
511
handlerton *hton= plugin_data(plugin, handlerton *);
513
there's no need to rollback here as all transactions must
514
be rolled back already
516
if (hton->state == SHOW_OPTION_YES && hton->close_connection &&
517
thd_get_ha_data(thd, hton))
518
hton->close_connection(hton, thd);
525
don't bother to rollback here, it's done already
527
void ha_close_connection(THD* thd)
529
plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
532
/* ========================================================================
533
======================= TRANSACTIONS ===================================*/
536
Transaction handling in the server
537
==================================
539
In each client connection, MySQL maintains two transactional
541
- a statement transaction,
542
- a standard, also called normal transaction.
546
"Statement transaction" is a non-standard term that comes
547
from the times when MySQL supported BerkeleyDB storage engine.
549
First of all, it should be said that in BerkeleyDB auto-commit
550
mode auto-commits operations that are atomic to the storage
551
engine itself, such as a write of a record, and are too
552
high-granular to be atomic from the application perspective
553
(MySQL). One SQL statement could involve many BerkeleyDB
554
auto-committed operations and thus BerkeleyDB auto-commit was of
557
Secondly, instead of SQL standard savepoints, BerkeleyDB
558
provided the concept of "nested transactions". In a nutshell,
559
transactions could be arbitrarily nested, but when the parent
560
transaction was committed or aborted, all its child (nested)
561
transactions were handled committed or aborted as well.
562
Commit of a nested transaction, in turn, made its changes
563
visible, but not durable: it destroyed the nested transaction,
564
all its changes would become available to the parent and
565
currently active nested transactions of this parent.
567
So the mechanism of nested transactions was employed to
568
provide "all or nothing" guarantee of SQL statements
569
required by the standard.
570
A nested transaction would be created at start of each SQL
571
statement, and destroyed (committed or aborted) at statement
572
end. Such nested transaction was internally referred to as
573
a "statement transaction" and gave birth to the term.
575
<Historical note ends>
577
Since then a statement transaction is started for each statement
578
that accesses transactional tables or uses the binary log. If
579
the statement succeeds, the statement transaction is committed.
580
If the statement fails, the transaction is rolled back. Commits
581
of statement transactions are not durable -- each such
582
transaction is nested in the normal transaction, and if the
583
normal transaction is rolled back, the effects of all enclosed
584
statement transactions are undone as well. Technically,
585
a statement transaction can be viewed as a savepoint which is
586
maintained automatically in order to make effects of one
589
The normal transaction is started by the user and is ended
590
usually upon a user request as well. The normal transaction
591
encloses transactions of all statements issued between
592
its beginning and its end.
593
In autocommit mode, the normal transaction is equivalent
594
to the statement transaction.
596
Since MySQL supports PSEA (pluggable storage engine
597
architecture), more than one transactional engine can be
598
active at a time. Hence transactions, from the server
599
point of view, are always distributed. In particular,
600
transactional state is maintained independently for each
601
engine. In order to commit a transaction the two phase
602
commit protocol is employed.
604
Not all statements are executed in context of a transaction.
605
Administrative and status information statements do not modify
606
engine data, and thus do not start a statement transaction and
607
also have no effect on the normal transaction. Examples of such
608
statements are SHOW STATUS and RESET SLAVE.
610
Similarly DDL statements are not transactional,
611
and therefore a transaction is [almost] never started for a DDL
612
statement. The difference between a DDL statement and a purely
613
administrative statement though is that a DDL statement always
614
commits the current transaction before proceeding, if there is
617
At last, SQL statements that work with non-transactional
618
engines also have no effect on the transaction state of the
619
connection. Even though they are written to the binary log,
620
and the binary log is, overall, transactional, the writes
621
are done in "write-through" mode, directly to the binlog
622
file, followed with a OS cache sync, in other words,
623
bypassing the binlog undo log (translog).
624
They do not commit the current normal transaction.
625
A failure of a statement that uses non-transactional tables
626
would cause a rollback of the statement transaction, but
627
in case there no non-transactional tables are used,
628
no statement transaction is started.
633
The server stores its transaction-related data in
634
thd->transaction. This structure has two members of type
635
THD_TRANS. These members correspond to the statement and
636
normal transactions respectively:
638
- thd->transaction.stmt contains a list of engines
639
that are participating in the given statement
640
- thd->transaction.all contains a list of engines that
641
have participated in any of the statement transactions started
642
within the context of the normal transaction.
643
Each element of the list contains a pointer to the storage
644
engine, engine-specific transactional data, and engine-specific
647
In autocommit mode thd->transaction.all is empty.
648
Instead, data of thd->transaction.stmt is
649
used to commit/rollback the normal transaction.
651
The list of registered engines has a few important properties:
652
- no engine is registered in the list twice
653
- engines are present in the list a reverse temporal order --
654
new participants are always added to the beginning of the list.
656
Transaction life cycle
657
----------------------
659
When a new connection is established, thd->transaction
660
members are initialized to an empty state.
661
If a statement uses any tables, all affected engines
662
are registered in the statement engine list. In
663
non-autocommit mode, the same engines are registered in
664
the normal transaction list.
665
At the end of the statement, the server issues a commit
666
or a roll back for all engines in the statement list.
667
At this point transaction flags of an engine, if any, are
668
propagated from the statement list to the list of the normal
670
When commit/rollback is finished, the statement list is
671
cleared. It will be filled in again by the next statement,
672
and emptied again at the next statement's end.
674
The normal transaction is committed in a similar way
675
(by going over all engines in thd->transaction.all list)
676
but at different times:
677
- upon COMMIT SQL statement is issued by the user
678
- implicitly, by the server, at the beginning of a DDL statement
679
or SET AUTOCOMMIT={0|1} statement.
681
The normal transaction can be rolled back as well:
682
- if the user has requested so, by issuing ROLLBACK SQL
684
- if one of the storage engines requested a rollback
685
by setting thd->transaction_rollback_request. This may
686
happen in case, e.g., when the transaction in the engine was
687
chosen a victim of the internal deadlock resolution algorithm
688
and rolled back internally. When such a situation happens, there
689
is little the server can do and the only option is to rollback
690
transactions in all other participating engines. In this case
691
the rollback is accompanied by an error sent to the user.
693
As follows from the use cases above, the normal transaction
694
is never committed when there is an outstanding statement
695
transaction. In most cases there is no conflict, since
696
commits of the normal transaction are issued by a stand-alone
697
administrative or DDL statement, thus no outstanding statement
698
transaction of the previous statement exists. Besides,
699
all statements that manipulate with the normal transaction
700
are prohibited in stored functions and triggers, therefore
701
no conflicting situation can occur in a sub-statement either.
702
The remaining rare cases when the server explicitly has
703
to commit the statement transaction prior to committing the normal
704
one cover error-handling scenarios (see for example
707
When committing a statement or a normal transaction, the server
708
either uses the two-phase commit protocol, or issues a commit
709
in each engine independently. The two-phase commit protocol
711
- all participating engines support two-phase commit (provide
712
handlerton::prepare PSEA API call) and
713
- transactions in at least two engines modify data (i.e. are
716
Note that the two phase commit is used for
717
statement transactions, even though they are not durable anyway.
718
This is done to ensure logical consistency of data in a multiple-
720
For example, imagine that some day MySQL supports unique
721
constraint checks deferred till the end of statement. In such
722
case a commit in one of the engines may yield ER_DUP_KEY,
723
and MySQL should be able to gracefully abort statement
724
transactions of other participants.
726
After the normal transaction has been committed,
727
thd->transaction.all list is cleared.
729
When a connection is closed, the current normal transaction, if
732
Roles and responsibilities
733
--------------------------
735
The server has no way to know that an engine participates in
736
the statement and a transaction has been started
737
in it unless the engine says so. Thus, in order to be
738
a part of a transaction, the engine must "register" itself.
739
This is done by invoking trans_register_ha() server call.
740
Normally the engine registers itself whenever handler::external_lock()
741
is called. trans_register_ha() can be invoked many times: if
742
an engine is already registered, the call does nothing.
743
In case autocommit is not set, the engine must register itself
744
twice -- both in the statement list and in the normal transaction
746
In which list to register is a parameter of trans_register_ha().
748
Note, that although the registration interface in itself is
749
fairly clear, the current usage practice often leads to undesired
750
effects. E.g. since a call to trans_register_ha() in most engines
751
is embedded into implementation of handler::external_lock(), some
752
DDL statements start a transaction (at least from the server
753
point of view) even though they are not expected to. E.g.
754
CREATE TABLE does not start a transaction, since
755
handler::external_lock() is never called during CREATE TABLE. But
756
CREATE TABLE ... SELECT does, since handler::external_lock() is
757
called for the table that is being selected from. This has no
758
practical effects currently, but must be kept in mind
761
Once an engine is registered, the server will do the rest
764
During statement execution, whenever any of data-modifying
765
PSEA API methods is used, e.g. handler::write_row() or
766
handler::update_row(), the read-write flag is raised in the
767
statement transaction for the involved engine.
768
Currently All PSEA calls are "traced", and the data can not be
769
changed in a way other than issuing a PSEA call. Important:
770
unless this invariant is preserved the server will not know that
771
a transaction in a given engine is read-write and will not
772
involve the two-phase commit protocol!
774
At the end of a statement, server call
775
ha_autocommit_or_rollback() is invoked. This call in turn
776
invokes handlerton::prepare() for every involved engine.
777
Prepare is followed by a call to handlerton::commit_one_phase()
778
If a one-phase commit will suffice, handlerton::prepare() is not
779
invoked and the server only calls handlerton::commit_one_phase().
780
At statement commit, the statement-related read-write engine
781
flag is propagated to the corresponding flag in the normal
782
transaction. When the commit is complete, the list of registered
785
Rollback is handled in a similar fashion.
787
Additional notes on DDL and the normal transaction.
788
---------------------------------------------------
790
DDLs and operations with non-transactional engines
791
do not "register" in thd->transaction lists, and thus do not
792
modify the transaction state. Besides, each DDL in
793
MySQL is prefixed with an implicit normal transaction commit
794
(a call to end_active_trans()), and thus leaves nothing
796
However, as it has been pointed out with CREATE TABLE .. SELECT,
797
some DDL statements can start a *new* transaction.
799
Behaviour of the server in this case is currently badly
801
DDL statements use a form of "semantic" logging
802
to maintain atomicity: if CREATE TABLE .. SELECT failed,
803
the newly created table is deleted.
804
In addition, some DDL statements issue interim transaction
805
commits: e.g. ALTER TABLE issues a commit after data is copied
806
from the original table to the internal temporary table. Other
807
statements, e.g. CREATE TABLE ... SELECT do not always commit
809
And finally there is a group of DDL statements such as
810
RENAME/DROP TABLE that doesn't start a new transaction
813
This diversity makes it hard to say what will happen if
814
by chance a stored function is invoked during a DDL --
815
whether any modifications it makes will be committed or not
816
is not clear. Fortunately, SQL grammar of few DDLs allows
817
invocation of a stored function.
819
A consistent behaviour is perhaps to always commit the normal
820
transaction after all DDLs, just like the statement transaction
821
is always committed at the end of all statements.
825
Register a storage engine for a transaction.
827
Every storage engine MUST call this function when it starts
828
a transaction or a statement (that is it must be called both for the
829
"beginning of transaction" and "beginning of statement").
830
Only storage engines registered for the transaction/statement
831
will know when to commit/rollback it.
834
trans_register_ha is idempotent - storage engine may register many
835
times per transaction.
838
void trans_register_ha(THD *thd, bool all, handlerton *ht_arg)
841
Ha_trx_info *ha_info;
842
DBUG_ENTER("trans_register_ha");
843
DBUG_PRINT("enter",("%s", all ? "all" : "stmt"));
847
trans= &thd->transaction.all;
848
thd->server_status|= SERVER_STATUS_IN_TRANS;
851
trans= &thd->transaction.stmt;
853
ha_info= thd->ha_data[ht_arg->slot].ha_info + static_cast<unsigned>(all);
855
if (ha_info->is_started())
856
DBUG_VOID_RETURN; /* already registered, return */
858
ha_info->register_ha(trans, ht_arg);
860
trans->no_2pc|=(ht_arg->prepare==0);
861
if (thd->transaction.xid_state.xid.is_null())
862
thd->transaction.xid_state.xid.set(thd->query_id);
871
1 error, transaction was rolled back
873
int ha_prepare(THD *thd)
876
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
877
Ha_trx_info *ha_info= trans->ha_list;
878
DBUG_ENTER("ha_prepare");
881
for (; ha_info; ha_info= ha_info->next())
884
handlerton *ht= ha_info->ht();
885
status_var_increment(thd->status_var.ha_prepare_count);
888
if ((err= ht->prepare(ht, thd, all)))
890
my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
891
ha_rollback_trans(thd, all);
898
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
899
ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
900
ha_resolve_storage_engine_name(ht));
908
Check if we can skip the two-phase commit.
910
A helper function to evaluate if two-phase commit is mandatory.
911
As a side effect, propagates the read-only/read-write flags
912
of the statement transaction to its enclosing normal transaction.
914
@retval TRUE we must run a two-phase commit. Returned
915
if we have at least two engines with read-write changes.
916
@retval FALSE Don't need two-phase commit. Even if we have two
917
transactional engines, we can run two independent
918
commits if changes in one of the engines are read-only.
923
ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
926
/* The number of storage engines that have actual changes. */
927
unsigned rw_ha_count= 0;
928
Ha_trx_info *ha_info;
930
for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
932
if (ha_info->is_trx_read_write())
937
Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
938
DBUG_ASSERT(ha_info != ha_info_all);
940
Merge read-only/read-write information about statement
941
transaction to its enclosing normal transaction. Do this
942
only if in a real transaction -- that is, if we know
943
that ha_info_all is registered in thd->transaction.all.
944
Since otherwise we only clutter the normal transaction flags.
946
if (ha_info_all->is_started()) /* FALSE if autocommit. */
947
ha_info_all->coalesce_trx_with(ha_info);
949
else if (rw_ha_count > 1)
952
It is a normal transaction, so we don't need to merge read/write
953
information up, and the need for two-phase commit has been
954
already established. Break the loop prematurely.
959
return rw_ha_count > 1;
967
1 transaction was rolled back
969
2 error during commit, data may be inconsistent
972
Since we don't support nested statement transactions in 5.0,
973
we can't commit or rollback stmt transactions while we are inside
974
stored functions or triggers. So we simply do nothing now.
975
TODO: This should be fixed in later ( >= 5.1) releases.
977
int ha_commit_trans(THD *thd, bool all)
979
int error= 0, cookie= 0;
981
'all' means that this is either an explicit commit issued by
982
user, or an implicit commit issued by a DDL.
984
THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
985
bool is_real_trans= all || thd->transaction.all.ha_list == 0;
986
Ha_trx_info *ha_info= trans->ha_list;
987
my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
988
DBUG_ENTER("ha_commit_trans");
991
We must not commit the normal transaction if a statement
992
transaction is pending. Otherwise statement transaction
993
flags will not get propagated to its normal transaction's
996
DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
997
trans == &thd->transaction.stmt);
999
if (thd->in_sub_stmt)
1002
Since we don't support nested statement transactions in 5.0,
1003
we can't commit or rollback stmt transactions while we are inside
1004
stored functions or triggers. So we simply do nothing now.
1005
TODO: This should be fixed in later ( >= 5.1) releases.
1010
We assume that all statements which commit or rollback main transaction
1011
are prohibited inside of stored functions or triggers. So they should
1012
bail out with error even before ha_commit_trans() call. To be 100% safe
1013
let us throw error in non-debug builds.
1016
my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1023
if (is_real_trans && wait_if_global_read_lock(thd, 0, 0))
1025
ha_rollback_trans(thd, all);
1031
&& ! thd->slave_thread
1034
my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
1035
ha_rollback_trans(thd, all);
1040
must_2pc= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1042
if (!trans->no_2pc && must_2pc)
1044
for (; ha_info && !error; ha_info= ha_info->next())
1047
handlerton *ht= ha_info->ht();
1049
Do not call two-phase commit if this particular
1050
transaction is read-only. This allows for simpler
1051
implementation in engines that are always read-only.
1053
if (! ha_info->is_trx_read_write())
1056
Sic: we know that prepare() is not NULL since otherwise
1057
trans->no_2pc would have been set.
1059
if ((err= ht->prepare(ht, thd, all)))
1061
my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1064
status_var_increment(thd->status_var.ha_prepare_count);
1066
DBUG_EXECUTE_IF("crash_commit_after_prepare", abort(););
1067
if (error || (is_real_trans && xid &&
1068
(error= !(cookie= tc_log->log_xid(thd, xid)))))
1070
ha_rollback_trans(thd, all);
1074
DBUG_EXECUTE_IF("crash_commit_after_log", abort(););
1076
error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
1077
DBUG_EXECUTE_IF("crash_commit_before_unlog", abort(););
1079
tc_log->unlog(cookie, xid);
1080
DBUG_EXECUTE_IF("crash_commit_after", abort(););
1083
start_waiting_global_read_lock(thd);
1090
This function does not care about global read lock. A caller should.
1092
int ha_commit_one_phase(THD *thd, bool all)
1095
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1096
bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1097
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1098
DBUG_ENTER("ha_commit_one_phase");
1101
for (; ha_info; ha_info= ha_info_next)
1104
handlerton *ht= ha_info->ht();
1105
if ((err= ht->commit(ht, thd, all)))
1107
my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1110
status_var_increment(thd->status_var.ha_commit_count);
1111
ha_info_next= ha_info->next();
1112
ha_info->reset(); /* keep it conveniently zero-filled */
1117
thd->transaction.xid_state.xid.null();
1120
thd->variables.tx_isolation=thd->session_tx_isolation;
1121
thd->transaction.cleanup();
1128
int ha_rollback_trans(THD *thd, bool all)
1131
THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1132
Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1133
bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1134
DBUG_ENTER("ha_rollback_trans");
1137
We must not rollback the normal transaction if a statement
1138
transaction is pending.
1140
DBUG_ASSERT(thd->transaction.stmt.ha_list == NULL ||
1141
trans == &thd->transaction.stmt);
1143
if (thd->in_sub_stmt)
1146
If we are inside stored function or trigger we should not commit or
1147
rollback current statement transaction. See comment in ha_commit_trans()
1148
call for more information.
1153
my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
1158
for (; ha_info; ha_info= ha_info_next)
1161
handlerton *ht= ha_info->ht();
1162
if ((err= ht->rollback(ht, thd, all)))
1164
my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1167
status_var_increment(thd->status_var.ha_rollback_count);
1168
ha_info_next= ha_info->next();
1169
ha_info->reset(); /* keep it conveniently zero-filled */
1174
thd->transaction.xid_state.xid.null();
1177
thd->variables.tx_isolation=thd->session_tx_isolation;
1178
thd->transaction.cleanup();
1182
thd->transaction_rollback_request= FALSE;
1185
If a non-transactional table was updated, warn; don't warn if this is a
1186
slave thread (because when a slave thread executes a ROLLBACK, it has
1187
been read from the binary log, so it's 100% sure and normal to produce
1188
error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
1189
slave SQL thread, it would not stop the thread but just be printed in
1190
the error log; but we don't want users to wonder why they have this
1191
message in the error log, so we don't send it.
1193
if (is_real_trans && thd->transaction.all.modified_non_trans_table &&
1194
!thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
1195
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
1196
ER_WARNING_NOT_COMPLETE_ROLLBACK,
1197
ER(ER_WARNING_NOT_COMPLETE_ROLLBACK));
1202
This is used to commit or rollback a single statement depending on
1206
Note that if the autocommit is on, then the following call inside
1207
InnoDB will commit or rollback the whole transaction (= the statement). The
1208
autocommit mechanism built into InnoDB is based on counting locks, but if
1209
the user has used LOCK TABLES then that mechanism does not know to do the
1212
int ha_autocommit_or_rollback(THD *thd, int error)
1214
DBUG_ENTER("ha_autocommit_or_rollback");
1215
if (thd->transaction.stmt.ha_list)
1219
if (ha_commit_trans(thd, 0))
1224
(void) ha_rollback_trans(thd, 0);
1225
if (thd->transaction_rollback_request && !thd->in_sub_stmt)
1226
(void) ha_rollback(thd);
1229
thd->variables.tx_isolation=thd->session_tx_isolation;
1240
static my_bool xacommit_handlerton(THD *unused1, plugin_ref plugin,
1243
handlerton *hton= plugin_data(plugin, handlerton *);
1244
if (hton->state == SHOW_OPTION_YES && hton->recover)
1246
hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
1247
((struct xahton_st *)arg)->result= 0;
1252
static my_bool xarollback_handlerton(THD *unused1, plugin_ref plugin,
1255
handlerton *hton= plugin_data(plugin, handlerton *);
1256
if (hton->state == SHOW_OPTION_YES && hton->recover)
1258
hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
1259
((struct xahton_st *)arg)->result= 0;
1265
int ha_commit_or_rollback_by_xid(XID *xid, bool commit)
1267
struct xahton_st xaop;
1271
plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
1272
MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
1281
This does not need to be multi-byte safe or anything
1283
static char* xid_to_str(char *buf, XID *xid)
1288
for (i=0; i < xid->gtrid_length+xid->bqual_length; i++)
1290
uchar c=(uchar)xid->data[i];
1291
/* is_next_dig is set if next character is a number */
1292
bool is_next_dig= FALSE;
1293
if (i < XIDDATASIZE)
1295
char ch= xid->data[i+1];
1296
is_next_dig= (ch >= '0' && ch <='9');
1298
if (i == xid->gtrid_length)
1301
if (xid->bqual_length)
1307
if (c < 32 || c > 126)
1311
If next character is a number, write current character with
1312
3 octal numbers to ensure that the next number is not seen
1313
as part of the octal number
1315
if (c > 077 || is_next_dig)
1316
*s++=_dig_vec_lower[c >> 6];
1317
if (c > 007 || is_next_dig)
1318
*s++=_dig_vec_lower[(c >> 3) & 7];
1319
*s++=_dig_vec_lower[c & 7];
1323
if (c == '\'' || c == '\\')
1335
recover() step of xa.
1338
there are three modes of operation:
1339
- automatic recover after a crash
1340
in this case commit_list != 0, tc_heuristic_recover==0
1341
all xids from commit_list are committed, others are rolled back
1342
- manual (heuristic) recover
1343
in this case commit_list==0, tc_heuristic_recover != 0
1344
DBA has explicitly specified that all prepared transactions should
1345
be committed (or rolled back).
1346
- no recovery (MySQL did not detect a crash)
1347
in this case commit_list==0, tc_heuristic_recover == 0
1348
there should be no prepared transactions in this case.
1352
int len, found_foreign_xids, found_my_xids;
1358
static my_bool xarecover_handlerton(THD *unused, plugin_ref plugin,
1361
handlerton *hton= plugin_data(plugin, handlerton *);
1362
struct xarecover_st *info= (struct xarecover_st *) arg;
1365
if (hton->state == SHOW_OPTION_YES && hton->recover)
1367
while ((got= hton->recover(hton, info->list, info->len)) > 0 )
1369
sql_print_information("Found %d prepared transaction(s) in %s",
1370
got, ha_resolve_storage_engine_name(hton));
1371
for (int i=0; i < got; i ++)
1373
my_xid x=info->list[i].get_my_xid();
1374
if (!x) // not "mine" - that is generated by external TM
1377
char buf[XIDDATASIZE*4+6]; // see xid_to_str
1378
sql_print_information("ignore xid %s", xid_to_str(buf, info->list+i));
1380
xid_cache_insert(info->list+i, XA_PREPARED);
1381
info->found_foreign_xids++;
1386
info->found_my_xids++;
1390
if (info->commit_list ?
1391
hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
1392
tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
1395
char buf[XIDDATASIZE*4+6]; // see xid_to_str
1396
sql_print_information("commit xid %s", xid_to_str(buf, info->list+i));
1398
hton->commit_by_xid(hton, info->list+i);
1403
char buf[XIDDATASIZE*4+6]; // see xid_to_str
1404
sql_print_information("rollback xid %s",
1405
xid_to_str(buf, info->list+i));
1407
hton->rollback_by_xid(hton, info->list+i);
1410
if (got < info->len)
1417
int ha_recover(HASH *commit_list)
1419
struct xarecover_st info;
1420
DBUG_ENTER("ha_recover");
1421
info.found_foreign_xids= info.found_my_xids= 0;
1422
info.commit_list= commit_list;
1423
info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
1426
/* commit_list and tc_heuristic_recover cannot be set both */
1427
DBUG_ASSERT(info.commit_list==0 || tc_heuristic_recover==0);
1428
/* if either is set, total_ha_2pc must be set too */
1429
DBUG_ASSERT(info.dry_run || total_ha_2pc>(ulong)opt_bin_log);
1431
if (total_ha_2pc <= (ulong)opt_bin_log)
1434
if (info.commit_list)
1435
sql_print_information("Starting crash recovery...");
1438
#ifndef WILL_BE_DELETED_LATER
1441
for now, only InnoDB supports 2pc. It means we can always safely
1442
rollback all pending transactions, without risking inconsistent data
1445
DBUG_ASSERT(total_ha_2pc == (ulong) opt_bin_log+1); // only InnoDB and binlog
1446
tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
1451
for (info.len= MAX_XID_LIST_SIZE ;
1452
info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
1454
info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0));
1458
sql_print_error(ER(ER_OUTOFMEMORY), info.len*sizeof(XID));
1462
plugin_foreach(NULL, xarecover_handlerton,
1463
MYSQL_STORAGE_ENGINE_PLUGIN, &info);
1465
my_free((uchar*)info.list, MYF(0));
1466
if (info.found_foreign_xids)
1467
sql_print_warning("Found %d prepared XA transactions",
1468
info.found_foreign_xids);
1469
if (info.dry_run && info.found_my_xids)
1471
sql_print_error("Found %d prepared transactions! It means that mysqld was "
1472
"not shut down properly last time and critical recovery "
1473
"information (last binlog or %s file) was manually deleted "
1474
"after a crash. You have to start mysqld with "
1475
"--tc-heuristic-recover switch to commit or rollback "
1476
"pending transactions.",
1477
info.found_my_xids, opt_tc_log_file);
1480
if (info.commit_list)
1481
sql_print_information("Crash recovery finished.");
1486
return the list of XID's to a client, the same way SHOW commands do.
1489
I didn't find in XA specs that an RM cannot return the same XID twice,
1490
so mysql_xa_recover does not filter XID's to ensure uniqueness.
1491
It can be easily fixed later, if necessary.
1493
bool mysql_xa_recover(THD *thd)
1495
List<Item> field_list;
1496
Protocol *protocol= thd->protocol;
1499
DBUG_ENTER("mysql_xa_recover");
1501
field_list.push_back(new Item_int("formatID", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1502
field_list.push_back(new Item_int("gtrid_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1503
field_list.push_back(new Item_int("bqual_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1504
field_list.push_back(new Item_empty_string("data",XIDDATASIZE));
1506
if (protocol->send_fields(&field_list,
1507
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
1510
pthread_mutex_lock(&LOCK_xid_cache);
1511
while ((xs= (XID_STATE*)hash_element(&xid_cache, i++)))
1513
if (xs->xa_state==XA_PREPARED)
1515
protocol->prepare_for_resend();
1516
protocol->store_longlong((longlong)xs->xid.formatID, FALSE);
1517
protocol->store_longlong((longlong)xs->xid.gtrid_length, FALSE);
1518
protocol->store_longlong((longlong)xs->xid.bqual_length, FALSE);
1519
protocol->store(xs->xid.data, xs->xid.gtrid_length+xs->xid.bqual_length,
1521
if (protocol->write())
1523
pthread_mutex_unlock(&LOCK_xid_cache);
1529
pthread_mutex_unlock(&LOCK_xid_cache);
1536
This function should be called when MySQL sends rows of a SELECT result set
1537
or the EOF mark to the client. It releases a possible adaptive hash index
1538
S-latch held by thd in InnoDB and also releases a possible InnoDB query
1539
FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a thd to
1540
keep them over several calls of the InnoDB handler interface when a join
1541
is executed. But when we let the control to pass to the client they have
1542
to be released because if the application program uses mysql_use_result(),
1543
it may deadlock on the S-latch if the application on another connection
1544
performs another SQL query. In MySQL-4.1 this is even more important because
1545
there a connection can have several SELECT queries open at the same time.
1547
@param thd the thread handle of the current connection
1552
static my_bool release_temporary_latches(THD *thd, plugin_ref plugin,
1555
handlerton *hton= plugin_data(plugin, handlerton *);
1557
if (hton->state == SHOW_OPTION_YES && hton->release_temporary_latches)
1558
hton->release_temporary_latches(hton, thd);
1564
int ha_release_temporary_latches(THD *thd)
1566
plugin_foreach(thd, release_temporary_latches, MYSQL_STORAGE_ENGINE_PLUGIN,
1572
int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
1575
THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
1576
&thd->transaction.all);
1577
Ha_trx_info *ha_info, *ha_info_next;
1579
DBUG_ENTER("ha_rollback_to_savepoint");
1583
rolling back to savepoint in all storage engines that were part of the
1584
transaction when the savepoint was set
1586
for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
1589
handlerton *ht= ha_info->ht();
1591
DBUG_ASSERT(ht->savepoint_set != 0);
1592
if ((err= ht->savepoint_rollback(ht, thd,
1593
(uchar *)(sv+1)+ht->savepoint_offset)))
1595
my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1598
status_var_increment(thd->status_var.ha_savepoint_rollback_count);
1599
trans->no_2pc|= ht->prepare == 0;
1602
rolling back the transaction in all storage engines that were not part of
1603
the transaction when the savepoint was set
1605
for (ha_info= trans->ha_list; ha_info != sv->ha_list;
1606
ha_info= ha_info_next)
1609
handlerton *ht= ha_info->ht();
1610
if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
1612
my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1615
status_var_increment(thd->status_var.ha_rollback_count);
1616
ha_info_next= ha_info->next();
1617
ha_info->reset(); /* keep it conveniently zero-filled */
1619
trans->ha_list= sv->ha_list;
1625
according to the sql standard (ISO/IEC 9075-2:2003)
1626
section "4.33.4 SQL-statements and transaction states",
1627
SAVEPOINT is *not* transaction-initiating SQL-statement
1629
int ha_savepoint(THD *thd, SAVEPOINT *sv)
1632
THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
1633
&thd->transaction.all);
1634
Ha_trx_info *ha_info= trans->ha_list;
1635
DBUG_ENTER("ha_savepoint");
1636
for (; ha_info; ha_info= ha_info->next())
1639
handlerton *ht= ha_info->ht();
1641
if (! ht->savepoint_set)
1643
my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
1647
if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
1649
my_error(ER_GET_ERRNO, MYF(0), err);
1652
status_var_increment(thd->status_var.ha_savepoint_count);
1655
Remember the list of registered storage engines. All new
1656
engines are prepended to the beginning of the list.
1658
sv->ha_list= trans->ha_list;
1662
int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
1665
Ha_trx_info *ha_info= sv->ha_list;
1666
DBUG_ENTER("ha_release_savepoint");
1668
for (; ha_info; ha_info= ha_info->next())
1671
handlerton *ht= ha_info->ht();
1672
/* Savepoint life time is enclosed into transaction life time. */
1674
if (!ht->savepoint_release)
1676
if ((err= ht->savepoint_release(ht, thd,
1677
(uchar *)(sv+1) + ht->savepoint_offset)))
1679
my_error(ER_GET_ERRNO, MYF(0), err);
1687
static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
1690
handlerton *hton= plugin_data(plugin, handlerton *);
1691
if (hton->state == SHOW_OPTION_YES &&
1692
hton->start_consistent_snapshot)
1694
hton->start_consistent_snapshot(hton, thd);
1695
*((bool *)arg)= false;
1700
int ha_start_consistent_snapshot(THD *thd)
1704
plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
1707
Same idea as when one wants to CREATE TABLE in one engine which does not
1711
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
1712
"This MySQL server does not support any "
1713
"consistent-read capable storage engine");
1718
static my_bool flush_handlerton(THD *thd, plugin_ref plugin,
1721
handlerton *hton= plugin_data(plugin, handlerton *);
1722
if (hton->state == SHOW_OPTION_YES && hton->flush_logs &&
1723
hton->flush_logs(hton))
1729
bool ha_flush_logs(handlerton *db_type)
1731
if (db_type == NULL)
1733
if (plugin_foreach(NULL, flush_handlerton,
1734
MYSQL_STORAGE_ENGINE_PLUGIN, 0))
1739
if (db_type->state != SHOW_OPTION_YES ||
1740
(db_type->flush_logs && db_type->flush_logs(db_type)))
1746
static const char *check_lowercase_names(handler *file, const char *path,
1749
if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
1752
/* Ensure that table handler get path in lower case */
1753
if (tmp_path != path)
1754
strmov(tmp_path, path);
1757
we only should turn into lowercase database/table part
1758
so start the process after homedirectory
1760
my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
1766
An interceptor to hijack the text of the error message without
1767
setting an error in the thread. We need the text to present it
1768
in the form of a warning to the user.
1771
struct Ha_delete_table_error_handler: public Internal_error_handler
1774
virtual bool handle_error(uint sql_errno,
1775
const char *message,
1776
MYSQL_ERROR::enum_warning_level level,
1778
char buff[MYSQL_ERRMSG_SIZE];
1783
Ha_delete_table_error_handler::
1784
handle_error(uint sql_errno,
1785
const char *message,
1786
MYSQL_ERROR::enum_warning_level level,
1789
/* Grab the error message */
1790
strmake(buff, message, sizeof(buff)-1);
1796
This should return ENOENT if the file doesn't exists.
1797
The .frm file will be deleted only if we return 0 or ENOENT
1799
int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
1800
const char *db, const char *alias, bool generate_warning)
1803
char tmp_path[FN_REFLEN];
1806
TABLE_SHARE dummy_share;
1807
DBUG_ENTER("ha_delete_table");
1809
bzero((char*) &dummy_table, sizeof(dummy_table));
1810
bzero((char*) &dummy_share, sizeof(dummy_share));
1811
dummy_table.s= &dummy_share;
1813
/* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
1814
if (table_type == NULL ||
1815
! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
1816
DBUG_RETURN(ENOENT);
1818
path= check_lowercase_names(file, path, tmp_path);
1819
if ((error= file->ha_delete_table(path)) && generate_warning)
1822
Because file->print_error() use my_error() to generate the error message
1823
we use an internal error handler to intercept it and store the text
1824
in a temporary buffer. Later the message will be presented to user
1827
Ha_delete_table_error_handler ha_delete_table_error_handler;
1829
/* Fill up strucutures that print_error may need */
1830
dummy_share.path.str= (char*) path;
1831
dummy_share.path.length= strlen(path);
1832
dummy_share.db.str= (char*) db;
1833
dummy_share.db.length= strlen(db);
1834
dummy_share.table_name.str= (char*) alias;
1835
dummy_share.table_name.length= strlen(alias);
1836
dummy_table.alias= alias;
1838
file->change_table_ptr(&dummy_table, &dummy_share);
1840
thd->push_internal_handler(&ha_delete_table_error_handler);
1841
file->print_error(error, 0);
1843
thd->pop_internal_handler();
1846
XXX: should we convert *all* errors to warnings here?
1847
What if the error is fatal?
1849
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, error,
1850
ha_delete_table_error_handler.buff);
53
1856
/****************************************************************************
54
** General Cursor functions
1857
** General handler functions
55
1858
****************************************************************************/
56
Cursor::Cursor(plugin::StorageEngine &engine_arg,
60
estimation_rows_to_insert(0),
62
key_used_on_scan(MAX_KEY), active_index(MAX_KEY),
63
ref_length(sizeof(internal::my_off_t)),
66
next_insert_id(0), insert_id_for_cur_row(0)
71
assert(locked == false);
72
/* TODO: assert(inited == NONE); */
77
* @note this only used in
78
* optimizer::QuickRangeSelect::init_ror_merged_scan(bool reuse_handler) as
79
* of the writing of this comment. -Brian
81
Cursor *Cursor::clone(memory::Root *mem_root)
83
Cursor *new_handler= getTable()->getMutableShare()->db_type()->getCursor(*getTable());
1859
handler *handler::clone(MEM_ROOT *mem_root)
1861
handler *new_handler= get_new_handler(table->s, mem_root, table->s->db_type());
86
Allocate Cursor->ref here because otherwise ha_open will allocate it
87
on this->table->mem_root and we will not be able to reclaim that memory
88
when the clone Cursor object is destroyed.
1863
Allocate handler->ref here because otherwise ha_open will allocate it
1864
on this->table->mem_root and we will not be able to reclaim that memory
1865
when the clone handler object is destroyed.
90
if (!(new_handler->ref= (unsigned char*) mem_root->alloc_root(ALIGN_SIZE(ref_length)*2)))
1867
if (!(new_handler->ref= (uchar*) alloc_root(mem_root, ALIGN_SIZE(ref_length)*2)))
93
TableIdentifier identifier(getTable()->getShare()->getSchemaName(),
94
getTable()->getShare()->getTableName(),
95
getTable()->getShare()->getType());
97
if (new_handler && !new_handler->ha_open(identifier,
98
getTable()->getDBStat(),
1869
if (new_handler && !new_handler->ha_open(table,
1870
table->s->normalized_path.str,
99
1872
HA_OPEN_IGNORE_IF_LOCKED))
100
1873
return new_handler;
106
given a buffer with a key value, and a map of keyparts
107
that are present in this value, returns the length of the value
1879
void handler::ha_statistic_increment(ulong SSV::*offset) const
1881
status_var_increment(table->in_use->status_var.*offset);
1884
void **handler::ha_data(THD *thd) const
1886
return thd_ha_data(thd, ht);
1889
THD *handler::ha_thd(void) const
1891
DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
1892
return (table && table->in_use) ? table->in_use : current_thd;
1897
Get tablespace name from handler
1898
Returns the tablespace name associated
1899
with the table or NULL if not defined
109
uint32_t Cursor::calculate_key_len(uint32_t key_position, key_part_map keypart_map_arg)
111
/* works only with key prefixes */
112
assert(((keypart_map_arg + 1) & keypart_map_arg) == 0);
114
const KeyPartInfo *key_part_found= getTable()->getShare()->getKeyInfo(key_position).key_part;
115
const KeyPartInfo *end_key_part_found= key_part_found + getTable()->getShare()->getKeyInfo(key_position).key_parts;
118
while (key_part_found < end_key_part_found && keypart_map_arg)
120
length+= key_part_found->store_length;
121
keypart_map_arg >>= 1;
127
int Cursor::startIndexScan(uint32_t idx, bool sorted)
130
assert(inited == NONE);
131
if (!(result= doStartIndexScan(idx, sorted)))
137
int Cursor::endIndexScan()
139
assert(inited==INDEX);
142
return(doEndIndexScan());
145
int Cursor::startTableScan(bool scan)
148
assert(inited==NONE || (inited==RND && scan));
149
inited= (result= doStartTableScan(scan)) ? NONE: RND;
154
int Cursor::endTableScan()
158
return(doEndTableScan());
161
int Cursor::ha_index_or_rnd_end()
163
return inited == INDEX ? endIndexScan() : inited == RND ? endTableScan() : 0;
166
void Cursor::ha_start_bulk_insert(ha_rows rows)
168
estimation_rows_to_insert= rows;
169
start_bulk_insert(rows);
172
int Cursor::ha_end_bulk_insert()
174
estimation_rows_to_insert= 0;
175
return end_bulk_insert();
178
const key_map *Cursor::keys_to_use_for_scanning()
180
return &key_map_empty;
183
bool Cursor::has_transactions()
185
return (getTable()->getShare()->db_type()->check_flag(HTON_BIT_DOES_TRANSACTIONS));
188
void Cursor::ha_statistic_increment(uint64_t system_status_var::*offset) const
190
(getTable()->in_use->status_var.*offset)++;
193
void **Cursor::ha_data(Session *session) const
195
return session->getEngineData(getEngine());
198
bool Cursor::is_fatal_error(int error, uint32_t flags)
201
((flags & HA_CHECK_DUP_KEY) &&
202
(error == HA_ERR_FOUND_DUPP_KEY ||
203
error == HA_ERR_FOUND_DUPP_UNIQUE)))
209
ha_rows Cursor::records() { return stats.records; }
210
uint64_t Cursor::tableSize() { return stats.index_file_length + stats.data_file_length; }
211
uint64_t Cursor::rowSize() { return getTable()->getRecordLength() + getTable()->sizeFields(); }
213
int Cursor::doOpen(const TableIdentifier &identifier, int mode, uint32_t test_if_locked)
215
return open(identifier.getPath().c_str(), mode, test_if_locked);
1902
char* handler::get_tablespace_name()
1904
return table->s->tablespace;
219
Open database-Cursor.
1908
Open database-handler.
221
1910
Try O_RDONLY if cannot open as O_RDWR
222
1911
Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
224
int Cursor::ha_open(const TableIdentifier &identifier,
1913
int handler::ha_open(TABLE *table_arg, const char *name, int mode,
230
if ((error= doOpen(identifier, mode, test_if_locked)))
1917
DBUG_ENTER("handler::ha_open");
1919
("name: %s db_type: %d db_stat: %d mode: %d lock_test: %d",
1920
name, ht->db_type, table_arg->db_stat, mode,
1924
DBUG_ASSERT(table->s == table_share);
1925
DBUG_ASSERT(alloc_root_inited(&table->mem_root));
1927
if ((error=open(name,mode,test_if_locked)))
232
1929
if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
233
(getTable()->db_stat & HA_TRY_READ_ONLY))
1930
(table->db_stat & HA_TRY_READ_ONLY))
235
getTable()->db_stat|=HA_READ_ONLY;
236
error= doOpen(identifier, O_RDONLY,test_if_locked);
1932
table->db_stat|=HA_READ_ONLY;
1933
error=open(name,O_RDONLY,test_if_locked);
241
errno= error; /* Safeguard */
1938
my_errno= error; /* Safeguard */
1939
DBUG_PRINT("error",("error: %d errno: %d",error,errno));
245
if (getTable()->getShare()->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
246
getTable()->db_stat|=HA_READ_ONLY;
1943
if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
1944
table->db_stat|=HA_READ_ONLY;
247
1945
(void) extra(HA_EXTRA_NO_READCHECK); // Not needed in SQL
249
/* ref is already allocated for us if we're called from Cursor::clone() */
250
if (!ref && !(ref= (unsigned char*) getTable()->alloc_root(ALIGN_SIZE(ref_length)*2)))
1947
/* ref is already allocated for us if we're called from handler::clone() */
1948
if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root,
1949
ALIGN_SIZE(ref_length)*2)))
253
1952
error=HA_ERR_OUT_OF_MEM;
256
1955
dup_ref=ref+ALIGN_SIZE(ref_length);
1956
cached_table_flags= table_flags();
1962
one has to use this method when to find
1963
random position by record as the plain
1964
position() call doesn't work for some
1965
handlers for random position
1968
int handler::rnd_pos_by_record(uchar *record)
1971
DBUG_ENTER("handler::rnd_pos_by_record");
1974
if (inited && (error= ha_index_end()))
1976
if ((error= ha_rnd_init(FALSE)))
1979
DBUG_RETURN(rnd_pos(record, ref));
774
3038
Discard or import tablespace: public interface.
776
@sa Cursor::discard_or_import_tablespace()
3040
@sa handler::discard_or_import_tablespace()
780
Cursor::ha_discard_or_import_tablespace(bool discard)
3044
handler::ha_discard_or_import_tablespace(my_bool discard)
782
setTransactionReadWrite();
3046
mark_trx_read_write();
784
3048
return discard_or_import_tablespace(discard);
3053
Prepare for alter: public interface.
3055
Called to prepare an *online* ALTER.
3057
@sa handler::prepare_for_alter()
3061
handler::ha_prepare_for_alter()
3063
mark_trx_read_write();
3065
prepare_for_alter();
3070
Rename table: public interface.
3072
@sa handler::rename_table()
3076
handler::ha_rename_table(const char *from, const char *to)
3078
mark_trx_read_write();
3080
return rename_table(from, to);
3085
Delete table: public interface.
3087
@sa handler::delete_table()
3091
handler::ha_delete_table(const char *name)
3093
mark_trx_read_write();
3095
return delete_table(name);
788
3100
Drop table in the engine: public interface.
790
@sa Cursor::drop_table()
3102
@sa handler::drop_table()
794
Cursor::closeMarkForDelete(const char *name)
3106
handler::ha_drop_table(const char *name)
796
setTransactionReadWrite();
3108
mark_trx_read_write();
798
3110
return drop_table(name);
801
int Cursor::index_next_same(unsigned char *buf, const unsigned char *key, uint32_t keylen)
3115
Create a table in the engine: public interface.
3117
@sa handler::create()
3121
handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info)
3123
mark_trx_read_write();
3125
return create(name, form, info);
3130
Create handler files for CREATE TABLE: public interface.
3132
@sa handler::create_handler_files()
3136
handler::ha_create_handler_files(const char *name, const char *old_name,
3137
int action_flag, HA_CREATE_INFO *info)
3139
mark_trx_read_write();
3141
return create_handler_files(name, old_name, action_flag, info);
3146
Tell the storage engine that it is allowed to "disable transaction" in the
3147
handler. It is a hint that ACID is not required - it is used in NDB for
3148
ALTER TABLE, for example, when data are copied to temporary table.
3149
A storage engine may treat this hint any way it likes. NDB for example
3150
starts to commit every now and then automatically.
3151
This hint can be safely ignored.
3153
int ha_enable_transaction(THD *thd, bool on)
3156
DBUG_ENTER("ha_enable_transaction");
3157
DBUG_PRINT("enter", ("on: %d", (int) on));
3159
if ((thd->transaction.on= on))
3162
Now all storage engines should have transaction handling enabled.
3163
But some may have it enabled all the time - "disabling" transactions
3164
is an optimization hint that storage engine is free to ignore.
3165
So, let's commit an open transaction (if any) now.
3167
if (!(error= ha_commit_trans(thd, 0)))
3168
error= end_trans(thd, COMMIT);
3173
int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
3176
DBUG_ENTER("index_next_same");
804
3177
if (!(error=index_next(buf)))
806
ptrdiff_t ptrdiff= buf - getTable()->getInsertRecord();
807
unsigned char *save_record_0= NULL;
808
KeyInfo *key_info= NULL;
809
KeyPartInfo *key_part;
810
KeyPartInfo *key_part_end= NULL;
3179
my_ptrdiff_t ptrdiff= buf - table->record[0];
3180
uchar *save_record_0= NULL;
3181
KEY *key_info= NULL;
3182
KEY_PART_INFO *key_part;
3183
KEY_PART_INFO *key_part_end= NULL;
813
key_cmp_if_same() compares table->getInsertRecord() against 'key'.
814
In parts it uses table->getInsertRecord() directly, in parts it uses
815
field objects with their local pointers into table->getInsertRecord().
816
If 'buf' is distinct from table->getInsertRecord(), we need to move
817
all record references. This is table->getInsertRecord() itself and
3186
key_cmp_if_same() compares table->record[0] against 'key'.
3187
In parts it uses table->record[0] directly, in parts it uses
3188
field objects with their local pointers into table->record[0].
3189
If 'buf' is distinct from table->record[0], we need to move
3190
all record references. This is table->record[0] itself and
818
3191
the field pointers of the fields used in this key.
822
save_record_0= getTable()->getInsertRecord();
823
getTable()->record[0]= buf;
824
key_info= getTable()->key_info + active_index;
3195
save_record_0= table->record[0];
3196
table->record[0]= buf;
3197
key_info= table->key_info + active_index;
825
3198
key_part= key_info->key_part;
826
3199
key_part_end= key_part + key_info->key_parts;
827
3200
for (; key_part < key_part_end; key_part++)
829
assert(key_part->field);
3202
DBUG_ASSERT(key_part->field);
830
3203
key_part->field->move_field_offset(ptrdiff);
834
if (key_cmp_if_same(getTable(), key, active_index, keylen))
3207
if (key_cmp_if_same(table, key, active_index, keylen))
836
getTable()->status=STATUS_NOT_FOUND;
3209
table->status=STATUS_NOT_FOUND;
837
3210
error=HA_ERR_END_OF_FILE;
840
3213
/* Move back if necessary. */
843
getTable()->record[0]= save_record_0;
3216
table->record[0]= save_record_0;
844
3217
for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
845
3218
key_part->field->move_field_offset(-ptrdiff);
852
3225
/****************************************************************************
853
** Some general functions that isn't in the Cursor class
3226
** Some general functions that isn't in the handler class
854
3227
****************************************************************************/
3230
Initiates table-file and calls appropriate database-creator.
3237
int ha_create_table(THD *thd, const char *path,
3238
const char *db, const char *table_name,
3239
HA_CREATE_INFO *create_info,
3240
bool update_create_info)
3244
char name_buff[FN_REFLEN];
3247
DBUG_ENTER("ha_create_table");
3249
init_tmp_table_share(thd, &share, db, 0, table_name, path);
3250
if (open_table_def(thd, &share, 0) ||
3251
open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
3255
if (update_create_info)
3256
update_create_info_from_table(create_info, &table);
3258
name= check_lowercase_names(table.file, share.path.str, name_buff);
3260
error= table.file->ha_create(name, &table, create_info);
3261
VOID(closefrm(&table, 0));
3264
strxmov(name_buff, db, ".", table_name, NullS);
3265
my_error(ER_CANT_CREATE_TABLE, MYF(ME_BELL+ME_WAITTANG), name_buff, error);
3268
free_table_share(&share);
3269
DBUG_RETURN(error != 0);
3273
Try to discover table from engine.
3276
If found, write the frm file to disk.
3279
-1 Table did not exists
3283
> 0 Error, table existed but could not be created
3285
int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
3290
char path[FN_REFLEN];
3291
HA_CREATE_INFO create_info;
3294
DBUG_ENTER("ha_create_table_from_engine");
3295
DBUG_PRINT("enter", ("name '%s'.'%s'", db, name));
3297
bzero((uchar*) &create_info,sizeof(create_info));
3298
if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
3300
/* Table could not be discovered and thus not created */
3305
Table exists in handler and could be discovered
3306
frmblob and frmlen are set, write the frm to disk
3309
build_table_filename(path, FN_REFLEN-1, db, name, "", 0);
3310
// Save the frm file
3311
error= writefrm(path, frmblob, frmlen);
3312
my_free(frmblob, MYF(0));
3316
init_tmp_table_share(thd, &share, db, 0, name, path);
3317
if (open_table_def(thd, &share, 0))
3321
if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, OTM_OPEN))
3323
free_table_share(&share);
3327
update_create_info_from_table(&create_info, &table);
3328
create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
3330
check_lowercase_names(table.file, path, path);
3331
error=table.file->ha_create(path, &table, &create_info);
3332
VOID(closefrm(&table, 1));
3334
DBUG_RETURN(error != 0);
3337
void st_ha_check_opt::init()
3339
flags= sql_flags= 0;
3340
sort_buffer_size = current_thd->variables.myisam_sort_buff_size;
3344
/*****************************************************************************
3347
This code is only relevant for ISAM/MyISAM tables
3349
key_cache->cache may be 0 only in the case where a key cache is not
3350
initialized or when we where not able to init the key cache in a previous
3351
call to ha_init_key_cache() (probably out of memory)
3352
*****************************************************************************/
3355
Init a key cache if it has not been initied before.
3357
int ha_init_key_cache(const char *name, KEY_CACHE *key_cache)
3359
DBUG_ENTER("ha_init_key_cache");
3361
if (!key_cache->key_cache_inited)
3363
pthread_mutex_lock(&LOCK_global_system_variables);
3364
ulong tmp_buff_size= (ulong) key_cache->param_buff_size;
3365
uint tmp_block_size= (uint) key_cache->param_block_size;
3366
uint division_limit= key_cache->param_division_limit;
3367
uint age_threshold= key_cache->param_age_threshold;
3368
pthread_mutex_unlock(&LOCK_global_system_variables);
3369
DBUG_RETURN(!init_key_cache(key_cache,
3372
division_limit, age_threshold));
3381
int ha_resize_key_cache(KEY_CACHE *key_cache)
3383
DBUG_ENTER("ha_resize_key_cache");
3385
if (key_cache->key_cache_inited)
3387
pthread_mutex_lock(&LOCK_global_system_variables);
3388
long tmp_buff_size= (long) key_cache->param_buff_size;
3389
long tmp_block_size= (long) key_cache->param_block_size;
3390
uint division_limit= key_cache->param_division_limit;
3391
uint age_threshold= key_cache->param_age_threshold;
3392
pthread_mutex_unlock(&LOCK_global_system_variables);
3393
DBUG_RETURN(!resize_key_cache(key_cache, tmp_block_size,
3395
division_limit, age_threshold));
3402
Change parameters for key cache (like size)
3404
int ha_change_key_cache_param(KEY_CACHE *key_cache)
3406
if (key_cache->key_cache_inited)
3408
pthread_mutex_lock(&LOCK_global_system_variables);
3409
uint division_limit= key_cache->param_division_limit;
3410
uint age_threshold= key_cache->param_age_threshold;
3411
pthread_mutex_unlock(&LOCK_global_system_variables);
3412
change_key_cache_param(key_cache, division_limit, age_threshold);
3418
Free memory allocated by a key cache.
3420
int ha_end_key_cache(KEY_CACHE *key_cache)
3422
end_key_cache(key_cache, 1); // Can never fail
3427
Move all tables from one key cache to another one.
3429
int ha_change_key_cache(KEY_CACHE *old_key_cache,
3430
KEY_CACHE *new_key_cache)
3432
mi_change_key_cache(old_key_cache, new_key_cache);
3438
Try to discover one table from handler(s).
3441
-1 Table did not exists
3443
0 OK. In this case *frmblob and *frmlen are set
3445
>0 error. frmblob and frmlen may not be set
3447
struct st_discover_args
3455
static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
3458
st_discover_args *vargs= (st_discover_args *)arg;
3459
handlerton *hton= plugin_data(plugin, handlerton *);
3460
if (hton->state == SHOW_OPTION_YES && hton->discover &&
3461
(!(hton->discover(hton, thd, vargs->db, vargs->name,
3469
int ha_discover(THD *thd, const char *db, const char *name,
3470
uchar **frmblob, size_t *frmlen)
3472
int error= -1; // Table does not exist in any handler
3473
DBUG_ENTER("ha_discover");
3474
DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
3475
st_discover_args args= {db, name, frmblob, frmlen};
3477
if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
3480
if (plugin_foreach(thd, discover_handlerton,
3481
MYSQL_STORAGE_ENGINE_PLUGIN, &args))
3485
status_var_increment(thd->status_var.ha_discover_count);
3491
Call this function in order to give the handler the possiblity
3492
to ask engine if there are any new tables that should be written to disk
3493
or any dropped tables that need to be removed from disk
3495
struct st_find_files_args
3501
List<LEX_STRING> *files;
3505
Ask handler if the table exists in engine.
3507
HA_ERR_NO_SUCH_TABLE Table does not exist
3509
HA_ERR_TABLE_EXIST Table exists
3513
struct st_table_exists_in_engine_args
3520
static my_bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
3523
st_table_exists_in_engine_args *vargs= (st_table_exists_in_engine_args *)arg;
3524
handlerton *hton= plugin_data(plugin, handlerton *);
3526
int err= HA_ERR_NO_SUCH_TABLE;
3528
if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
3529
err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
3532
if (vargs->err == HA_ERR_TABLE_EXIST)
3538
int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
3540
DBUG_ENTER("ha_table_exists_in_engine");
3541
DBUG_PRINT("enter", ("db: %s, name: %s", db, name));
3542
st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
3543
plugin_foreach(thd, table_exists_in_engine_handlerton,
3544
MYSQL_STORAGE_ENGINE_PLUGIN, &args);
3545
DBUG_PRINT("exit", ("error: %d", args.err));
3546
DBUG_RETURN(args.err);
857
3550
Calculate cost of 'index only' scan for given index and number of records
859
3552
@param keynr Index number
1142
3841
while ((result == HA_ERR_END_OF_FILE) && !range_res);
1144
3843
*range_info= mrr_cur_range.ptr;
3844
DBUG_PRINT("exit",("handler::multi_range_read_next result %d", result));
3845
DBUG_RETURN(result);
3849
/* **************************************************************************
3850
* DS-MRR implementation
3851
***************************************************************************/
3854
DS-MRR: Initialize and start MRR scan
3856
Initialize and start the MRR scan. Depending on the mode parameter, this
3857
may use default or DS-MRR implementation.
3859
@param h Table handler to be used
3860
@param key Index to be used
3861
@param seq_funcs Interval sequence enumeration functions
3862
@param seq_init_param Interval sequence enumeration parameter
3863
@param n_ranges Number of ranges in the sequence.
3864
@param mode HA_MRR_* modes to use
3865
@param buf INOUT Buffer to use
3867
@retval 0 Ok, Scan started.
3871
int DsMrr_impl::dsmrr_init(handler *h, KEY *key,
3872
RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
3873
uint n_ranges, uint mode, HANDLER_BUFFER *buf)
3877
Item *pushed_cond= NULL;
3879
DBUG_ENTER("DsMrr_impl::dsmrr_init");
3880
keyno= h->active_index;
3881
DBUG_ASSERT(h2 == NULL);
3882
if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED)
3884
use_default_impl= TRUE;
3885
DBUG_RETURN(h->handler::multi_range_read_init(seq_funcs, seq_init_param,
3886
n_ranges, mode, buf));
3888
rowids_buf= buf->buffer;
3889
//psergey-todo: don't add key_length as it is not needed anymore
3890
rowids_buf += key->key_length + h->ref_length;
3892
is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
3893
rowids_buf_end= buf->buffer_end;
3895
elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
3896
rowids_buf_last= rowids_buf +
3897
((rowids_buf_end - rowids_buf)/ elem_size)*
3899
rowids_buf_end= rowids_buf_last;
3901
/* Create a separate handler object to do rndpos() calls. */
3902
THD *thd= current_thd;
3903
if (!(new_h2= h->clone(thd->mem_root)) ||
3904
new_h2->ha_external_lock(thd, F_RDLCK))
3910
if (keyno == h->pushed_idx_cond_keyno)
3911
pushed_cond= h->pushed_idx_cond;
3912
if (h->ha_index_end())
3919
table->prepare_for_position();
3920
new_h2->extra(HA_EXTRA_KEYREAD);
3922
if (h2->ha_index_init(keyno, FALSE) ||
3923
h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
3926
use_default_impl= FALSE;
3929
h2->idx_cond_push(keyno, pushed_cond);
3930
if (dsmrr_fill_buffer(new_h2))
3934
If the above call has scanned through all intervals in *seq, then
3935
adjust *buf to indicate that the remaining buffer space will not be used.
3938
buf->end_of_used_area= rowids_buf_last;
3940
if (h->ha_rnd_init(FALSE))
3945
h2->ha_index_or_rnd_end();
3946
h2->ha_external_lock(thd, F_UNLCK);
3953
void DsMrr_impl::dsmrr_close()
3955
DBUG_ENTER("DsMrr_impl::dsmrr_close");
3958
h2->ha_external_lock(current_thd, F_UNLCK);
3963
use_default_impl= TRUE;
3968
static int rowid_cmp(void *h, uchar *a, uchar *b)
3970
return ((handler*)h)->cmp_ref(a, b);
3975
DS-MRR: Fill the buffer with rowids and sort it by rowid
3977
{This is an internal function of DiskSweep MRR implementation}
3978
Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into
3979
buffer. When the buffer is full or scan is completed, sort the buffer by
3982
The function assumes that rowids buffer is empty when it is invoked.
3984
@param h Table handler
3986
@retval 0 OK, the next portion of rowids is in the buffer,
3991
int DsMrr_impl::dsmrr_fill_buffer(handler *unused)
3995
DBUG_ENTER("DsMrr_impl::dsmrr_fill_buffer");
3997
rowids_buf_cur= rowids_buf;
3998
while ((rowids_buf_cur < rowids_buf_end) &&
3999
!(res= h2->handler::multi_range_read_next(&range_info)))
4001
/* Put rowid, or {rowid, range_id} pair into the buffer */
4002
h2->position(table->record[0]);
4003
memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
4004
rowids_buf_cur += h->ref_length;
4008
memcpy(rowids_buf_cur, &range_info, sizeof(void*));
4009
rowids_buf_cur += sizeof(void*);
4013
if (res && res != HA_ERR_END_OF_FILE)
4015
dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
4017
/* Sort the buffer contents by rowid */
4018
uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
4019
uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
4021
my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
4023
rowids_buf_last= rowids_buf_cur;
4024
rowids_buf_cur= rowids_buf;
4030
DS-MRR implementation: multi_range_read_next() function
4033
int DsMrr_impl::dsmrr_next(handler *h, char **range_info)
4037
if (use_default_impl)
4038
return h->handler::multi_range_read_next(range_info);
4040
if (rowids_buf_cur == rowids_buf_last)
4044
res= HA_ERR_END_OF_FILE;
4047
res= dsmrr_fill_buffer(h);
4052
/* Return EOF if there are no rowids in the buffer after re-fill attempt */
4053
if (rowids_buf_cur == rowids_buf_last)
4055
res= HA_ERR_END_OF_FILE;
4059
res= h->rnd_pos(table->record[0], rowids_buf_cur);
4060
rowids_buf_cur += h->ref_length;
4063
memcpy(range_info, rowids_buf_cur, sizeof(void*));
4064
rowids_buf_cur += sizeof(void*);
4075
DS-MRR implementation: multi_range_read_info() function
4077
int DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, uint *bufsz,
4078
uint *flags, COST_VECT *cost)
4081
uint def_flags= *flags;
4082
uint def_bufsz= *bufsz;
4084
/* Get cost/flags/mem_usage of default MRR implementation */
4085
res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
4089
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
4090
choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost))
4092
/* Default implementation is choosen */
4093
DBUG_PRINT("info", ("Default MRR implementation choosen"));
4099
DBUG_PRINT("info", ("DS-MRR implementation choosen"));
4106
DS-MRR Implementation: multi_range_read_info_const() function
4109
ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
4110
void *seq_init_param, uint n_ranges,
4111
uint *bufsz, uint *flags, COST_VECT *cost)
4114
uint def_flags= *flags;
4115
uint def_bufsz= *bufsz;
4116
/* Get cost/flags/mem_usage of default MRR implementation */
4117
rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
4118
n_ranges, &def_bufsz,
4120
if (rows == HA_POS_ERROR)
4122
/* Default implementation can't perform MRR scan => we can't either */
4127
If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
4128
use the default MRR implementation (we need it for UPDATE/DELETE).
4129
Otherwise, make a choice based on cost and @@optimizer_use_mrr.
4131
if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
4132
choose_mrr_impl(keyno, rows, flags, bufsz, cost))
4134
DBUG_PRINT("info", ("Default MRR implementation choosen"));
4140
*flags &= ~HA_MRR_USE_DEFAULT_IMPL;
4141
DBUG_PRINT("info", ("DS-MRR implementation choosen"));
4148
Check if key has partially-covered columns
4150
We can't use DS-MRR to perform range scans when the ranges are over
4151
partially-covered keys, because we'll not have full key part values
4152
(we'll have their prefixes from the index) and will not be able to check
4153
if we've reached the end the range.
4155
@param keyno Key to check
4158
Allow use of DS-MRR in cases where the index has partially-covered
4159
components but they are not used for scanning.
4165
bool DsMrr_impl::key_uses_partial_cols(uint keyno)
4167
KEY_PART_INFO *kp= table->key_info[keyno].key_part;
4168
KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts;
4169
for (; kp != kp_end; kp++)
4171
if (!kp->field->part_of_key.is_set(keyno))
4179
DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
4181
Make the choice between using Default MRR implementation and DS-MRR.
4182
This function contains common functionality factored out of dsmrr_info()
4183
and dsmrr_info_const(). The function assumes that the default MRR
4184
implementation's applicability requirements are satisfied.
4186
@param keyno Index number
4187
@param rows E(full rows to be retrieved)
4188
@param flags IN MRR flags provided by the MRR user
4189
OUT If DS-MRR is choosen, flags of DS-MRR implementation
4190
else the value is not modified
4191
@param bufsz IN If DS-MRR is choosen, buffer use of DS-MRR implementation
4192
else the value is not modified
4193
@param cost IN Cost of default MRR implementation
4194
OUT If DS-MRR is choosen, cost of DS-MRR scan
4195
else the value is not modified
4197
@retval TRUE Default MRR implementation should be used
4198
@retval FALSE DS-MRR implementation should be used
4201
bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
4202
uint *bufsz, COST_VECT *cost)
4204
COST_VECT dsmrr_cost;
4206
THD *thd= current_thd;
4207
if ((thd->variables.optimizer_use_mrr == 2) ||
4208
(*flags & HA_MRR_INDEX_ONLY) || (*flags & HA_MRR_SORTED) ||
4209
(keyno == table->s->primary_key &&
4210
h->primary_key_is_clustered()) ||
4211
key_uses_partial_cols(keyno))
4213
/* Use the default implementation */
4214
*flags |= HA_MRR_USE_DEFAULT_IMPL;
4218
uint add_len= table->key_info[keyno].key_length + h->ref_length;
4220
if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
4226
If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of
4227
DS-MRR and Default implementations cost. This allows one to force use of
4228
DS-MRR whenever it is applicable without affecting other cost-based
4231
if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) &&
4232
dsmrr_cost.total_cost() > cost->total_cost())
4235
if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
4237
*flags &= ~HA_MRR_USE_DEFAULT_IMPL; /* Use the DS-MRR implementation */
4238
*flags &= ~HA_MRR_SORTED; /* We will return unordered output */
4244
/* Use the default MRR implementation */
4251
static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost);
4255
Get cost of DS-MRR scan
4257
@param keynr Index to be used
4258
@param rows E(Number of rows to be scanned)
4259
@param flags Scan parameters (HA_MRR_* flags)
4260
@param buffer_size INOUT Buffer size
4261
@param cost OUT The cost
4264
@retval TRUE Error, DS-MRR cannot be used (the buffer is too small
4268
bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
4269
uint *buffer_size, COST_VECT *cost)
4271
ulong max_buff_entries, elem_size;
4272
ha_rows rows_in_full_step, rows_in_last_step;
4274
double index_read_cost;
4276
elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
4277
max_buff_entries = *buffer_size / elem_size;
4279
if (!max_buff_entries)
4280
return TRUE; /* Buffer has not enough space for even 1 rowid */
4282
/* Number of iterations we'll make with full buffer */
4283
n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
4286
Get numbers of rows we'll be processing in
4287
- non-last sweep, with full buffer
4288
- last iteration, with non-full buffer
4290
rows_in_full_step= max_buff_entries;
4291
rows_in_last_step= rows % max_buff_entries;
4293
/* Adjust buffer size if we expect to use only part of the buffer */
4296
get_sort_and_sweep_cost(table, rows, cost);
4297
cost->multiply(n_full_steps);
4302
*buffer_size= max(*buffer_size,
4303
(size_t)(1.2*rows_in_last_step) * elem_size +
4304
h->ref_length + table->key_info[keynr].key_length);
4307
COST_VECT last_step_cost;
4308
get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
4309
cost->add(&last_step_cost);
4311
if (n_full_steps != 0)
4312
cost->mem_cost= *buffer_size;
4314
cost->mem_cost= (double)rows_in_last_step * elem_size;
4316
/* Total cost of all index accesses */
4317
index_read_cost= h->index_only_read_time(keynr, (double)rows);
4318
cost->add_io(index_read_cost, 1 /* Random seeks */);
4324
Get cost of one sort-and-sweep step
4327
get_sort_and_sweep_cost()
4328
table Table being accessed
4329
nrows Number of rows to be sorted and retrieved
4333
Get cost of these operations:
4334
- sort an array of #nrows ROWIDs using qsort
4335
- read #nrows records from table in a sweep.
4339
void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost)
4343
get_sweep_read_cost(table, nrows, FALSE, cost);
4344
/* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
4345
double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
4348
cost->cpu_cost += cmp_op * log2(cmp_op);
4356
Get cost of reading nrows table records in a "disk sweep"
4358
A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
4359
for an ordered sequence of rowids.
4361
We assume hard disk IO. The read is performed as follows:
4363
1. The disk head is moved to the needed cylinder
4364
2. The controller waits for the plate to rotate
4365
3. The data is transferred
4367
Time to do #3 is insignificant compared to #2+#1.
4369
Time to move the disk head is proportional to head travel distance.
4371
Time to wait for the plate to rotate depends on whether the disk head
4374
If disk head wasn't moved, the wait time is proportional to distance
4375
between the previous block and the block we're reading.
4377
If the head was moved, we don't know how much we'll need to wait for the
4378
plate to rotate. We assume the wait time to be a variate with a mean of
4379
0.5 of full rotation time.
4381
Our cost units are "random disk seeks". The cost of random disk seek is
4382
actually not a constant, it depends one range of cylinders we're going
4383
to access. We make it constant by introducing a fuzzy concept of "typical
4384
datafile length" (it's fuzzy as it's hard to tell whether it should
4385
include index file, temp.tables etc). Then random seek cost is:
4387
1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
4389
We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
4391
@param table Table to be accessed
4392
@param nrows Number of rows to retrieve
4393
@param interrupted TRUE <=> Assume that the disk sweep will be
4394
interrupted by other disk IO. FALSE - otherwise.
4395
@param cost OUT The cost.
4398
void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted,
4401
DBUG_ENTER("get_sweep_read_cost");
4404
if (table->file->primary_key_is_clustered())
4406
cost->io_count= table->file->read_time(table->s->primary_key,
4407
(uint) nrows, nrows);
4412
ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
4414
n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
4415
if (busy_blocks < 1.0)
4418
DBUG_PRINT("info",("sweep: nblocks=%g, busy_blocks=%g", n_blocks,
4420
cost->io_count= busy_blocks;
4424
/* Assume reading is done in one 'sweep' */
4425
cost->avg_io_cost= (DISK_SEEK_BASE_COST +
4426
DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
4429
DBUG_PRINT("info",("returning cost=%g", cost->total_cost()));
1262
int Cursor::index_read_idx_map(unsigned char * buf, uint32_t index,
1263
const unsigned char * key,
4551
Same as compare_key() but doesn't check have in_range_check_pushed_down.
4552
This is used by index condition pushdown implementation.
4555
int handler::compare_key2(key_range *range)
4559
return 0; // no max range
4560
cmp= key_cmp(range_key_part, range->key, range->length);
4562
cmp= key_compare_result_on_equal;
4566
int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
1264
4567
key_part_map keypart_map,
1265
4568
enum ha_rkey_function find_flag)
1267
4570
int error, error1;
1268
error= doStartIndexScan(index, 0);
4571
error= index_init(index, 0);
1271
4574
error= index_read_map(buf, key, keypart_map, find_flag);
1272
error1= doEndIndexScan();
4575
error1= index_end();
1274
4577
return error ? error : error1;
4582
Returns a list of all known extensions.
4584
No mutexes, worst case race is a minor surplus memory allocation
4585
We have to recreate the extension map if mysqld is restarted (for example
4589
pointer pointer to TYPELIB structure
4591
static my_bool exts_handlerton(THD *unused, plugin_ref plugin,
4594
List<char> *found_exts= (List<char> *) arg;
4595
handlerton *hton= plugin_data(plugin, handlerton *);
4597
if (hton->state == SHOW_OPTION_YES && hton->create &&
4598
(file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
4600
List_iterator_fast<char> it(*found_exts);
4601
const char **ext, *old_ext;
4603
for (ext= file->bas_ext(); *ext; ext++)
4605
while ((old_ext= it++))
4607
if (!strcmp(old_ext, *ext))
4611
found_exts->push_back((char *) *ext);
4620
TYPELIB *ha_known_exts(void)
4622
if (!known_extensions.type_names || mysys_usage_id != known_extensions_id)
4624
List<char> found_exts;
4625
const char **ext, *old_ext;
4627
known_extensions_id= mysys_usage_id;
4629
plugin_foreach(NULL, exts_handlerton,
4630
MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
4632
ext= (const char **) my_once_alloc(sizeof(char *)*
4633
(found_exts.elements+1),
4634
MYF(MY_WME | MY_FAE));
4636
DBUG_ASSERT(ext != 0);
4637
known_extensions.count= found_exts.elements;
4638
known_extensions.type_names= ext;
4640
List_iterator_fast<char> it(found_exts);
4641
while ((old_ext= it++))
4645
return &known_extensions;
4649
static bool stat_print(THD *thd, const char *type, uint type_len,
4650
const char *file, uint file_len,
4651
const char *status, uint status_len)
4653
Protocol *protocol= thd->protocol;
4654
protocol->prepare_for_resend();
4655
protocol->store(type, type_len, system_charset_info);
4656
protocol->store(file, file_len, system_charset_info);
4657
protocol->store(status, status_len, system_charset_info);
4658
if (protocol->write())
4663
bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
4665
List<Item> field_list;
4666
Protocol *protocol= thd->protocol;
4669
field_list.push_back(new Item_empty_string("Type",10));
4670
field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
4671
field_list.push_back(new Item_empty_string("Status",10));
4673
if (protocol->send_fields(&field_list,
4674
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
4677
result= db_type->show_status &&
4678
db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
1278
4687
Check if the conditions for row-based binlogging is correct for the table.
1280
4689
A row in the given table should be replicated if:
4690
- Row-based replication is enabled in the current thread
4691
- The binlog is enabled
1281
4692
- It is not a temporary table
1284
static bool log_row_for_replication(Table* table,
1285
const unsigned char *before_record,
1286
const unsigned char *after_record)
1288
TransactionServices &transaction_services= TransactionServices::singleton();
1289
Session *const session= table->in_use;
1291
if (table->getShare()->getType() || not transaction_services.shouldConstructMessages())
1296
switch (session->lex->sql_command)
1298
case SQLCOM_CREATE_TABLE:
1300
* We are in a CREATE TABLE ... SELECT statement
1301
* and the kernel has already created the table
1302
* and put a CreateTableStatement in the active
1303
* Transaction message. Here, we add a new InsertRecord
1304
* to a new Transaction message (because the above
1305
* CREATE TABLE will commit the transaction containing
1308
result= transaction_services.insertRecord(session, table);
1310
case SQLCOM_REPLACE:
1311
case SQLCOM_REPLACE_SELECT:
1313
* This is a total hack because of the code that is
1314
* in write_record() in sql_insert.cc. During
1315
* a REPLACE statement, a call to insertRecord() is
1316
* called. If it fails, then a call to deleteRecord()
1317
* is called, followed by a repeat of the original
1318
* call to insertRecord(). So, log_row_for_replication
1319
* could be called multiple times for a REPLACE
1320
* statement. The below looks at the values of before_record
1321
* and after_record to determine which call to this
1322
* function is for the delete or the insert, since NULL
1323
* is passed for after_record for the delete and NULL is
1324
* passed for before_record for the insert...
1326
* In addition, there is an optimization that allows an
1327
* engine to convert the above delete + insert into an
1328
* update, so we must also check for this case below...
1330
if (after_record == NULL)
1333
* The storage engine is passed the record in table->record[1]
1334
* as the row to delete (this is the conflicting row), so
1335
* we need to notify TransactionService to use that row.
1337
transaction_services.deleteRecord(session, table, true);
1339
* We set the "current" statement message to NULL. This triggers
1340
* the replication services component to generate a new statement
1341
* message for the inserted record which will come next.
1343
transaction_services.finalizeStatementMessage(*session->getStatementMessage(), session);
1347
if (before_record == NULL)
1348
result= transaction_services.insertRecord(session, table);
1350
transaction_services.updateRecord(session, table, before_record, after_record);
1354
case SQLCOM_INSERT_SELECT:
1357
* The else block below represents an
1358
* INSERT ... ON DUPLICATE KEY UPDATE that
1359
* has hit a key conflict and actually done
1362
if (before_record == NULL)
1363
result= transaction_services.insertRecord(session, table);
1365
transaction_services.updateRecord(session, table, before_record, after_record);
1369
transaction_services.updateRecord(session, table, before_record, after_record);
1373
transaction_services.deleteRecord(session, table);
1382
int Cursor::ha_external_lock(Session *session, int lock_type)
4693
- The binary log is open
4694
- The database the table resides in shall be binlogged (binlog_*_db rules)
4695
- table is not mysql.event
4698
static bool check_table_binlog_row_based(THD *thd, TABLE *table)
4700
if (table->s->cached_row_logging_check == -1)
4702
int const check(table->s->tmp_table == NO_TMP_TABLE &&
4703
binlog_filter->db_ok(table->s->db.str));
4704
table->s->cached_row_logging_check= check;
4707
DBUG_ASSERT(table->s->cached_row_logging_check == 0 ||
4708
table->s->cached_row_logging_check == 1);
4710
return (thd->current_stmt_binlog_row_based &&
4711
table->s->cached_row_logging_check &&
4712
(thd->options & OPTION_BIN_LOG) &&
4713
mysql_bin_log.is_open());
4718
Write table maps for all (manually or automatically) locked tables
4721
This function will generate and write table maps for all tables
4722
that are locked by the thread 'thd'. Either manually locked
4723
(stored in THD::locked_tables) and automatically locked (stored
4724
in THD::lock) are considered.
4726
@param thd Pointer to THD structure
4729
@retval 1 Failed to write all table maps
4736
static int write_locked_table_maps(THD *thd)
4738
DBUG_ENTER("write_locked_table_maps");
4739
DBUG_PRINT("enter", ("thd: 0x%lx thd->lock: 0x%lx thd->locked_tables: 0x%lx "
4740
"thd->extra_lock: 0x%lx",
4741
(long) thd, (long) thd->lock,
4742
(long) thd->locked_tables, (long) thd->extra_lock));
4744
if (thd->get_binlog_table_maps() == 0)
4746
MYSQL_LOCK *locks[3];
4747
locks[0]= thd->extra_lock;
4748
locks[1]= thd->lock;
4749
locks[2]= thd->locked_tables;
4750
for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
4752
MYSQL_LOCK const *const lock= locks[i];
4756
TABLE **const end_ptr= lock->table + lock->table_count;
4757
for (TABLE **table_ptr= lock->table ;
4758
table_ptr != end_ptr ;
4761
TABLE *const table= *table_ptr;
4762
DBUG_PRINT("info", ("Checking table %s", table->s->table_name.str));
4763
if (table->current_lock == F_WRLCK &&
4764
check_table_binlog_row_based(thd, table))
4766
int const has_trans= table->file->has_transactions();
4767
int const error= thd->binlog_write_table_map(table, has_trans);
4769
If an error occurs, it is the responsibility of the caller to
4770
roll back the transaction.
4772
if (unlikely(error))
4782
typedef bool Log_func(THD*, TABLE*, bool, const uchar*, const uchar*);
4784
static int binlog_log_row(TABLE* table,
4785
const uchar *before_record,
4786
const uchar *after_record,
4789
if (table->no_replicate)
4792
THD *const thd= table->in_use;
4794
if (check_table_binlog_row_based(thd, table))
4796
DBUG_DUMP("read_set 10", (uchar*) table->read_set->bitmap,
4797
(table->s->fields + 7) / 8);
4799
If there are no table maps written to the binary log, this is
4800
the first row handled in this statement. In that case, we need
4801
to write table maps for all locked tables to the binary log.
4803
if (likely(!(error= write_locked_table_maps(thd))))
4805
bool const has_trans= table->file->has_transactions();
4806
error= (*log_func)(thd, table, has_trans, before_record, after_record);
4809
return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
4812
int handler::ha_external_lock(THD *thd, int lock_type)
4814
DBUG_ENTER("handler::ha_external_lock");
1385
4816
Whether this is lock or unlock, this should be true, and is to verify that
1386
4817
if get_auto_increment() was called (thus may have reserved intervals or
1387
4818
taken a table lock), ha_release_auto_increment() was too.
1389
assert(next_insert_id == 0);
1391
if (DRIZZLE_CURSOR_RDLOCK_START_ENABLED() ||
1392
DRIZZLE_CURSOR_WRLOCK_START_ENABLED() ||
1393
DRIZZLE_CURSOR_UNLOCK_START_ENABLED())
1395
if (lock_type == F_RDLCK)
1397
DRIZZLE_CURSOR_RDLOCK_START(getTable()->getShare()->getSchemaName(),
1398
getTable()->getShare()->getTableName());
1400
else if (lock_type == F_WRLCK)
1402
DRIZZLE_CURSOR_WRLOCK_START(getTable()->getShare()->getSchemaName(),
1403
getTable()->getShare()->getTableName());
1405
else if (lock_type == F_UNLCK)
1407
DRIZZLE_CURSOR_UNLOCK_START(getTable()->getShare()->getSchemaName(),
1408
getTable()->getShare()->getTableName());
4820
DBUG_ASSERT(next_insert_id == 0);
1413
4823
We cache the table flags if the locking succeeded. Otherwise, we
1414
4824
keep them as they were when they were fetched in ha_open().
1417
int error= external_lock(session, lock_type);
1419
if (DRIZZLE_CURSOR_RDLOCK_DONE_ENABLED() ||
1420
DRIZZLE_CURSOR_WRLOCK_DONE_ENABLED() ||
1421
DRIZZLE_CURSOR_UNLOCK_DONE_ENABLED())
1423
if (lock_type == F_RDLCK)
1425
DRIZZLE_CURSOR_RDLOCK_DONE(error);
1427
else if (lock_type == F_WRLCK)
1429
DRIZZLE_CURSOR_WRLOCK_DONE(error);
1431
else if (lock_type == F_UNLCK)
1433
DRIZZLE_CURSOR_UNLOCK_DONE(error);
4826
MYSQL_EXTERNAL_LOCK(lock_type);
4828
int error= external_lock(thd, lock_type);
4830
cached_table_flags= table_flags();
1442
Check Cursor usage and reset state of file to after 'open'
4836
Check handler usage and reset state of file to after 'open'
1444
int Cursor::ha_reset()
4838
int handler::ha_reset()
4840
DBUG_ENTER("ha_reset");
1446
4841
/* Check that we have called all proper deallocation functions */
1447
assert(! getTable()->getShare()->all_set.none());
1448
assert(getTable()->key_read == 0);
1449
/* ensure that ha_index_end / endTableScan has been called */
1450
assert(inited == NONE);
4842
DBUG_ASSERT((uchar*) table->def_read_set.bitmap +
4843
table->s->column_bitmap_size ==
4844
(uchar*) table->def_write_set.bitmap);
4845
DBUG_ASSERT(bitmap_is_set_all(&table->s->all_set));
4846
DBUG_ASSERT(table->key_read == 0);
4847
/* ensure that ha_index_end / ha_rnd_end has been called */
4848
DBUG_ASSERT(inited == NONE);
1451
4849
/* Free cache used by filesort */
1452
getTable()->free_io_cache();
4850
free_io_cache(table);
1453
4851
/* reset the bitmaps to point to defaults */
1454
getTable()->default_column_bitmaps();
1459
int Cursor::insertRecord(unsigned char *buf)
1464
* If we have a timestamp column, update it to the current time
1466
* @TODO Technically, the below two lines can be take even further out of the
1467
* Cursor interface and into the fill_record() method.
1469
if (getTable()->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
1471
getTable()->timestamp_field->set_time();
1474
DRIZZLE_INSERT_ROW_START(getTable()->getShare()->getSchemaName(), getTable()->getShare()->getTableName());
1475
setTransactionReadWrite();
1477
if (unlikely(plugin::EventObserver::beforeInsertRecord(*getTable(), buf)))
1479
error= ER_EVENT_OBSERVER_PLUGIN;
1483
error= doInsertRecord(buf);
1484
if (unlikely(plugin::EventObserver::afterInsertRecord(*getTable(), buf, error)))
1486
error= ER_EVENT_OBSERVER_PLUGIN;
1490
ha_statistic_increment(&system_status_var::ha_write_count);
1492
DRIZZLE_INSERT_ROW_DONE(error);
1494
if (unlikely(error))
1499
if (unlikely(log_row_for_replication(getTable(), NULL, buf)))
1500
return HA_ERR_RBR_LOGGING_FAILED;
1506
int Cursor::updateRecord(const unsigned char *old_data, unsigned char *new_data)
1511
Some storage engines require that the new record is in getInsertRecord()
1512
(and the old record is in getUpdateRecord()).
1514
assert(new_data == getTable()->getInsertRecord());
1516
DRIZZLE_UPDATE_ROW_START(getTable()->getShare()->getSchemaName(), getTable()->getShare()->getTableName());
1517
setTransactionReadWrite();
1518
if (unlikely(plugin::EventObserver::beforeUpdateRecord(*getTable(), old_data, new_data)))
1520
error= ER_EVENT_OBSERVER_PLUGIN;
1524
if (getTable()->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
1526
getTable()->timestamp_field->set_time();
1529
error= doUpdateRecord(old_data, new_data);
1530
if (unlikely(plugin::EventObserver::afterUpdateRecord(*getTable(), old_data, new_data, error)))
1532
error= ER_EVENT_OBSERVER_PLUGIN;
1536
ha_statistic_increment(&system_status_var::ha_update_count);
1538
DRIZZLE_UPDATE_ROW_DONE(error);
1540
if (unlikely(error))
1545
if (unlikely(log_row_for_replication(getTable(), old_data, new_data)))
1546
return HA_ERR_RBR_LOGGING_FAILED;
1550
TableShare *Cursor::getShare()
1552
return getTable()->getMutableShare();
1555
int Cursor::deleteRecord(const unsigned char *buf)
1559
DRIZZLE_DELETE_ROW_START(getTable()->getShare()->getSchemaName(), getTable()->getShare()->getTableName());
1560
setTransactionReadWrite();
1561
if (unlikely(plugin::EventObserver::beforeDeleteRecord(*getTable(), buf)))
1563
error= ER_EVENT_OBSERVER_PLUGIN;
1567
error= doDeleteRecord(buf);
1568
if (unlikely(plugin::EventObserver::afterDeleteRecord(*getTable(), buf, error)))
1570
error= ER_EVENT_OBSERVER_PLUGIN;
1574
ha_statistic_increment(&system_status_var::ha_delete_count);
1576
DRIZZLE_DELETE_ROW_DONE(error);
1578
if (unlikely(error))
1581
if (unlikely(log_row_for_replication(getTable(), buf, NULL)))
1582
return HA_ERR_RBR_LOGGING_FAILED;
1587
} /* namespace drizzled */
4852
table->default_column_bitmaps();
4853
DBUG_RETURN(reset());
4857
int handler::ha_write_row(uchar *buf)
4860
Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
4861
DBUG_ENTER("handler::ha_write_row");
4862
MYSQL_INSERT_ROW_START();
4864
mark_trx_read_write();
4866
if (unlikely(error= write_row(buf)))
4868
if (unlikely(error= binlog_log_row(table, 0, buf, log_func)))
4869
DBUG_RETURN(error); /* purecov: inspected */
4870
MYSQL_INSERT_ROW_END();
4875
int handler::ha_update_row(const uchar *old_data, uchar *new_data)
4878
Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
4881
Some storage engines require that the new record is in record[0]
4882
(and the old record is in record[1]).
4884
DBUG_ASSERT(new_data == table->record[0]);
4886
mark_trx_read_write();
4888
if (unlikely(error= update_row(old_data, new_data)))
4890
if (unlikely(error= binlog_log_row(table, old_data, new_data, log_func)))
4895
int handler::ha_delete_row(const uchar *buf)
4898
Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
4900
mark_trx_read_write();
4902
if (unlikely(error= delete_row(buf)))
4904
if (unlikely(error= binlog_log_row(table, buf, 0, log_func)))
4913
use_hidden_primary_key() is called in case of an update/delete when
4914
(table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
4915
but we don't have a primary key
4917
void handler::use_hidden_primary_key()
4919
/* fallback to use all columns in the table to identify row */
4920
table->use_all_columns();