92
92
SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");
93
93
SETMSG(HA_ERR_CRASHED, ER(ER_NOT_KEYFILE));
94
94
SETMSG(HA_ERR_WRONG_IN_RECORD, ER(ER_CRASHED_ON_USAGE));
95
SETMSG(HA_ERR_OUT_OF_MEM, "Table handler out of memory");
95
SETMSG(HA_ERR_OUT_OF_MEM, "Table Cursor out of memory");
96
96
SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");
97
97
SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");
98
98
SETMSG(HA_ERR_OLD_FILE, ER(ER_OLD_KEYFILE));
391
391
in it unless the engine says so. Thus, in order to be
392
392
a part of a transaction, the engine must "register" itself.
393
393
This is done by invoking trans_register_ha() server call.
394
Normally the engine registers itself whenever handler::external_lock()
394
Normally the engine registers itself whenever Cursor::external_lock()
395
395
is called. trans_register_ha() can be invoked many times: if
396
396
an engine is already registered, the call does nothing.
397
397
In case autocommit is not set, the engine must register itself
402
402
Note, that although the registration interface in itself is
403
403
fairly clear, the current usage practice often leads to undesired
404
404
effects. E.g. since a call to trans_register_ha() in most engines
405
is embedded into implementation of handler::external_lock(), some
405
is embedded into implementation of Cursor::external_lock(), some
406
406
DDL statements start a transaction (at least from the server
407
407
point of view) even though they are not expected to. E.g.
408
408
CREATE TABLE does not start a transaction, since
409
handler::external_lock() is never called during CREATE TABLE. But
410
CREATE TABLE ... SELECT does, since handler::external_lock() is
409
Cursor::external_lock() is never called during CREATE TABLE. But
410
CREATE TABLE ... SELECT does, since Cursor::external_lock() is
411
411
called for the table that is being selected from. This has no
412
412
practical effects currently, but must be kept in mind
418
418
During statement execution, whenever any of data-modifying
419
PSEA API methods is used, e.g. handler::write_row() or
420
handler::update_row(), the read-write flag is raised in the
419
PSEA API methods is used, e.g. Cursor::write_row() or
420
Cursor::update_row(), the read-write flag is raised in the
421
421
statement transaction for the involved engine.
422
422
Currently All PSEA calls are "traced", and the data can not be
423
423
changed in a way other than issuing a PSEA call. Important:
945
957
/****************************************************************************
946
** General handler functions
958
** General Cursor functions
947
959
****************************************************************************/
948
handler::~handler(void)
960
Cursor::~Cursor(void)
950
962
assert(locked == false);
951
963
/* TODO: assert(inited == NONE); */
955
handler *handler::clone(MEM_ROOT *mem_root)
967
Cursor *Cursor::clone(MEM_ROOT *mem_root)
957
handler *new_handler= get_new_handler(table->s, mem_root, table->s->db_type());
969
Cursor *new_handler= table->s->db_type()->getCursor(table->s, mem_root);
959
Allocate handler->ref here because otherwise ha_open will allocate it
972
Allocate Cursor->ref here because otherwise ha_open will allocate it
960
973
on this->table->mem_root and we will not be able to reclaim that memory
961
when the clone handler object is destroyed.
974
when the clone Cursor object is destroyed.
963
976
if (!(new_handler->ref= (unsigned char*) alloc_root(mem_root, ALIGN_SIZE(ref_length)*2)))
1000
int handler::ha_rnd_end()
1013
int Cursor::ha_rnd_end()
1002
1015
assert(inited==RND);
1004
1017
return(rnd_end());
1007
int handler::ha_index_or_rnd_end()
1020
int Cursor::ha_index_or_rnd_end()
1009
1022
return inited == INDEX ? ha_index_end() : inited == RND ? ha_rnd_end() : 0;
1012
handler::Table_flags handler::ha_table_flags() const
1025
Cursor::Table_flags Cursor::ha_table_flags() const
1014
1027
return cached_table_flags;
1017
void handler::ha_start_bulk_insert(ha_rows rows)
1030
void Cursor::ha_start_bulk_insert(ha_rows rows)
1019
1032
estimation_rows_to_insert= rows;
1020
1033
start_bulk_insert(rows);
1023
int handler::ha_end_bulk_insert()
1036
int Cursor::ha_end_bulk_insert()
1025
1038
estimation_rows_to_insert= 0;
1026
1039
return end_bulk_insert();
1029
void handler::change_table_ptr(Table *table_arg, TableShare *share)
1042
void Cursor::change_table_ptr(Table *table_arg, TableShare *share)
1031
1044
table= table_arg;
1032
1045
table_share= share;
1035
const key_map *handler::keys_to_use_for_scanning()
1048
const key_map *Cursor::keys_to_use_for_scanning()
1037
1050
return &key_map_empty;
1040
bool handler::has_transactions()
1053
bool Cursor::has_transactions()
1042
1055
return (ha_table_flags() & HA_NO_TRANSACTIONS) == 0;
1045
void handler::ha_statistic_increment(ulong SSV::*offset) const
1058
void Cursor::ha_statistic_increment(ulong SSV::*offset) const
1047
1060
status_var_increment(table->in_use->status_var.*offset);
1050
void **handler::ha_data(Session *session) const
1063
void **Cursor::ha_data(Session *session) const
1052
1065
return session_ha_data(session, engine);
1055
Session *handler::ha_session(void) const
1068
Session *Cursor::ha_session(void) const
1057
1070
assert(!table || !table->in_use || table->in_use == current_session);
1058
1071
return (table && table->in_use) ? table->in_use : current_session;
1062
bool handler::is_fatal_error(int error, uint32_t flags)
1075
bool Cursor::is_fatal_error(int error, uint32_t flags)
1065
1078
((flags & HA_CHECK_DUP_KEY) &&
1073
ha_rows handler::records() { return stats.records; }
1086
ha_rows Cursor::records() { return stats.records; }
1076
Open database-handler.
1089
Open database-Cursor.
1078
1091
Try O_RDONLY if cannot open as O_RDWR
1079
1092
Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
1081
int handler::ha_open(Table *table_arg, const char *name, int mode,
1094
int Cursor::ha_open(Table *table_arg, const char *name, int mode,
1082
1095
int test_if_locked)
1498
1511
error=index_last(table->record[1]);
1500
1513
MySQL implicitely assumes such method does locking (as MySQL decides to
1501
use nr+increment without checking again with the handler, in
1502
handler::update_auto_increment()), so reserves to infinite.
1514
use nr+increment without checking again with the Cursor, in
1515
Cursor::update_auto_increment()), so reserves to infinite.
1504
1517
*nb_reserved_values= UINT64_MAX;
1775
Return an error message specific to this handler.
1790
Return an error message specific to this Cursor.
1777
@param error error code previously returned by handler
1792
@param error error code previously returned by Cursor
1778
1793
@param buf pointer to String where to add error message
1781
1796
Returns true if this is a temporary error
1783
bool handler::get_error_message(int , String* )
1798
bool Cursor::get_error_message(int , String* )
1789
1804
/* Code left, but Drizzle has no legacy yet (while MySQL did) */
1790
int handler::check_old_types()
1805
int Cursor::check_old_types()
2089
2097
It is assumed that we will read trough the whole key range and that all
2090
2098
key blocks are half full (normally things are much better). It is also
2091
assumed that each time we read the next key from the index, the handler
2099
assumed that each time we read the next key from the index, the Cursor
2092
2100
performs a random seek, thus the cost is proportional to the number of
2096
Consider joining this function and handler::read_time() into one
2097
handler::read_time(keynr, records, ranges, bool index_only) function.
2104
Consider joining this function and Cursor::read_time() into one
2105
Cursor::read_time(keynr, records, ranges, bool index_only) function.
2100
2108
Estimated cost of 'index only' scan
2103
double handler::index_only_read_time(uint32_t keynr, double key_records)
2111
double Cursor::index_only_read_time(uint32_t keynr, double key_records)
2105
2113
uint32_t keys_per_block= (stats.block_size/2/
2106
2114
(table->key_info[keynr].key_length + ref_length) + 1);
2236
2244
other Error or can't perform the requested scan
2239
int handler::multi_range_read_info(uint32_t keyno, uint32_t n_ranges, uint32_t n_rows,
2247
int Cursor::multi_range_read_info(uint32_t keyno, uint32_t n_ranges, uint32_t n_rows,
2240
2248
uint32_t *bufsz, uint32_t *flags, COST_VECT *cost)
2242
2250
*bufsz= 0; /* Default implementation doesn't need a buffer */
2380
Get cost of reading nrows table records in a "disk sweep"
2382
A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
2383
for an ordered sequence of rowids.
2385
We assume hard disk IO. The read is performed as follows:
2387
1. The disk head is moved to the needed cylinder
2388
2. The controller waits for the plate to rotate
2389
3. The data is transferred
2391
Time to do #3 is insignificant compared to #2+#1.
2393
Time to move the disk head is proportional to head travel distance.
2395
Time to wait for the plate to rotate depends on whether the disk head
2398
If disk head wasn't moved, the wait time is proportional to distance
2399
between the previous block and the block we're reading.
2401
If the head was moved, we don't know how much we'll need to wait for the
2402
plate to rotate. We assume the wait time to be a variate with a mean of
2403
0.5 of full rotation time.
2405
Our cost units are "random disk seeks". The cost of random disk seek is
2406
actually not a constant, it depends one range of cylinders we're going
2407
to access. We make it constant by introducing a fuzzy concept of "typical
2408
datafile length" (it's fuzzy as it's hard to tell whether it should
2409
include index file, temp.tables etc). Then random seek cost is:
2411
1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
2413
We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
2415
@param table Table to be accessed
2416
@param nrows Number of rows to retrieve
2417
@param interrupted true <=> Assume that the disk sweep will be
2418
interrupted by other disk IO. false - otherwise.
2419
@param cost OUT The cost.
2422
void get_sweep_read_cost(Table *table, ha_rows nrows, bool interrupted,
2426
if (table->file->primary_key_is_clustered())
2428
cost->io_count= table->file->read_time(table->s->primary_key,
2429
(uint32_t) nrows, nrows);
2434
ceil(uint64_t2double(table->file->stats.data_file_length) / IO_SIZE);
2436
n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
2437
if (busy_blocks < 1.0)
2440
cost->io_count= busy_blocks;
2444
/* Assume reading is done in one 'sweep' */
2445
cost->avg_io_cost= (DISK_SEEK_BASE_COST +
2446
DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
2452
2387
/* **************************************************************************
2453
2388
* DS-MRR implementation ends
2454
2389
***************************************************************************/
2598
2533
Check if the conditions for row-based binlogging is correct for the table.
2600
2535
A row in the given table should be replicated if:
2601
- Row-based replication is enabled in the current thread
2602
- The binlog is enabled
2603
2536
- It is not a temporary table
2604
- The binary log is open
2605
- The database the table resides in shall be binlogged (binlog_*_db rules)
2606
- table is not mysql.event
2609
2539
static bool log_row_for_replication(Table* table,
2610
const unsigned char *before_record,
2611
const unsigned char *after_record)
2540
const unsigned char *before_record,
2541
const unsigned char *after_record)
2613
2543
ReplicationServices &replication_services= ReplicationServices::singleton();
2614
2544
Session *const session= table->in_use;
2546
if (table->s->tmp_table || ! replication_services.isActive())
2616
2549
switch (session->lex->sql_command)
2618
2551
case SQLCOM_REPLACE:
2552
case SQLCOM_REPLACE_SELECT:
2554
* This is a total hack because of the code that is
2555
* in write_record() in sql_insert.cc. During
2556
* a REPLACE statement, a call to ha_write_row() is
2557
* called. If it fails, then a call to ha_delete_row()
2558
* is called, followed by a repeat of the original
2559
* call to ha_write_row(). So, log_row_for_replication
2560
* could be called either once or twice for a REPLACE
2561
* statement. The below looks at the values of before_record
2562
* and after_record to determine which call to this
2563
* function is for the delete or the insert, since NULL
2564
* is passed for after_record for the delete and NULL is
2565
* passed for before_record for the insert...
2567
* In addition, there is an optimization that allows an
2568
* engine to convert the above delete + insert into an
2569
* update, so we must also check for this case below...
2571
if (after_record == NULL)
2573
replication_services.deleteRecord(session, table);
2575
* We set the "current" statement message to NULL. This triggers
2576
* the replication services component to generate a new statement
2577
* message for the inserted record which will come next.
2579
replication_services.finalizeStatement(*session->getStatementMessage(), session);
2583
if (before_record == NULL)
2584
replication_services.insertRecord(session, table);
2586
replication_services.updateRecord(session, table, before_record, after_record);
2619
2589
case SQLCOM_INSERT:
2620
case SQLCOM_REPLACE_SELECT:
2621
2590
case SQLCOM_INSERT_SELECT:
2622
case SQLCOM_CREATE_TABLE:
2623
replication_services.insertRecord(session, table);
2592
* The else block below represents an
2593
* INSERT ... ON DUPLICATE KEY UPDATE that
2594
* has hit a key conflict and actually done
2597
if (before_record == NULL)
2598
replication_services.insertRecord(session, table);
2600
replication_services.updateRecord(session, table, before_record, after_record);
2626
2603
case SQLCOM_UPDATE: