~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000-2006 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/**
17
  @file handler.cc
18
19
  Handler-calling-functions
20
*/
21
22
#ifdef USE_PRAGMA_IMPLEMENTATION
23
#pragma implementation				// gcc: Class implementation
24
#endif
25
26
#include "mysql_priv.h"
27
#include "rpl_filter.h"
28
#include <errno.h>
29
30
/*
31
  While we have legacy_db_type, we have this array to
32
  check for dups and to find handlerton from legacy_db_type.
33
  Remove when legacy_db_type is finally gone
34
*/
35
st_plugin_int *hton2plugin[MAX_HA];
36
37
static handlerton *installed_htons[128];
38
39
#define BITMAP_STACKBUF_SIZE (128/8)
40
41
KEY_CREATE_INFO default_key_create_info= { HA_KEY_ALG_UNDEF, 0, {NullS,0}, {NullS,0} };
42
43
/* number of entries in handlertons[] */
61 by Brian Aker
Conversion of handler type.
44
uint32_t total_ha= 0;
1 by brian
clean slate
45
/* number of storage engines (from handlertons[]) that support 2pc */
61 by Brian Aker
Conversion of handler type.
46
uint32_t total_ha_2pc= 0;
1 by brian
clean slate
47
/* size of savepoint storage area (see ha_init) */
61 by Brian Aker
Conversion of handler type.
48
uint32_t savepoint_alloc_size= 0;
1 by brian
clean slate
49
50
static const LEX_STRING sys_table_aliases[]=
51
{
52
  { C_STRING_WITH_LEN("INNOBASE") },  { C_STRING_WITH_LEN("INNODB") },
53
  { C_STRING_WITH_LEN("HEAP") },      { C_STRING_WITH_LEN("MEMORY") },
54
  {NullS, 0}
55
};
56
57
const char *ha_row_type[] = {
58
  "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE", "?","?","?"
59
};
60
61
const char *tx_isolation_names[] =
62
{ "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
63
  NullS};
64
TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
65
			       tx_isolation_names, NULL};
66
67
static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
68
uint known_extensions_id= 0;
69
70
71
72
static plugin_ref ha_default_plugin(THD *thd)
73
{
74
  if (thd->variables.table_plugin)
75
    return thd->variables.table_plugin;
76
  return my_plugin_lock(thd, &global_system_variables.table_plugin);
77
}
78
79
80
/**
81
  Return the default storage engine handlerton for thread
82
83
  @param ha_default_handlerton(thd)
84
  @param thd         current thread
85
86
  @return
87
    pointer to handlerton
88
*/
89
handlerton *ha_default_handlerton(THD *thd)
90
{
91
  plugin_ref plugin= ha_default_plugin(thd);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
92
  assert(plugin);
1 by brian
clean slate
93
  handlerton *hton= plugin_data(plugin, handlerton*);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
94
  assert(hton);
1 by brian
clean slate
95
  return hton;
96
}
97
98
99
/**
100
  Return the storage engine handlerton for the supplied name
101
  
102
  @param thd         current thread
103
  @param name        name of storage engine
104
  
105
  @return
106
    pointer to storage engine plugin handle
107
*/
108
plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name)
109
{
110
  const LEX_STRING *table_alias;
111
  plugin_ref plugin;
112
113
redo:
114
  /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
115
  if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
116
                           (const uchar *)name->str, name->length,
117
                           (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
118
    return ha_default_plugin(thd);
119
120
  if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
121
  {
122
    handlerton *hton= plugin_data(plugin, handlerton *);
123
    if (!(hton->flags & HTON_NOT_USER_SELECTABLE))
124
      return plugin;
125
      
126
    /*
127
      unlocking plugin immediately after locking is relatively low cost.
128
    */
129
    plugin_unlock(thd, plugin);
130
  }
131
132
  /*
133
    We check for the historical aliases.
134
  */
135
  for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
136
  {
137
    if (!my_strnncoll(&my_charset_latin1,
138
                      (const uchar *)name->str, name->length,
139
                      (const uchar *)table_alias->str, table_alias->length))
140
    {
141
      name= table_alias + 1;
142
      goto redo;
143
    }
144
  }
145
146
  return NULL;
147
}
148
149
150
plugin_ref ha_lock_engine(THD *thd, handlerton *hton)
151
{
152
  if (hton)
153
  {
154
    st_plugin_int **plugin= hton2plugin + hton->slot;
155
    
156
    return my_plugin_lock(thd, &plugin);
157
  }
158
  return NULL;
159
}
160
161
162
handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
163
{
164
  plugin_ref plugin;
165
  switch (db_type) {
166
  case DB_TYPE_DEFAULT:
167
    return ha_default_handlerton(thd);
168
  default:
169
    if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
170
        (plugin= ha_lock_engine(thd, installed_htons[db_type])))
171
      return plugin_data(plugin, handlerton*);
172
    /* fall through */
173
  case DB_TYPE_UNKNOWN:
174
    return NULL;
175
  }
176
}
177
178
179
/**
180
  Use other database handler if databasehandler is not compiled in.
181
*/
182
handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
183
                          bool no_substitute, bool report_error)
184
{
185
  handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
186
  if (ha_storage_engine_is_enabled(hton))
187
    return hton;
188
189
  if (no_substitute)
190
  {
191
    if (report_error)
192
    {
193
      const char *engine_name= ha_resolve_storage_engine_name(hton);
194
      my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
195
    }
196
    return NULL;
197
  }
198
199
  switch (database_type) {
200
  case DB_TYPE_HASH:
201
    return ha_resolve_by_legacy_type(thd, DB_TYPE_HASH);
202
  default:
203
    break;
204
  }
205
206
  return ha_default_handlerton(thd);
207
} /* ha_checktype */
208
209
210
handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
211
                         handlerton *db_type)
212
{
213
  handler *file;
214
215
  if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
216
  {
217
    if ((file= db_type->create(db_type, share, alloc)))
218
      file->init();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
219
    return(file);
1 by brian
clean slate
220
  }
221
  /*
222
    Try the default table type
223
    Here the call to current_thd() is ok as we call this function a lot of
224
    times but we enter this branch very seldom.
225
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
226
  return(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
1 by brian
clean slate
227
}
228
229
230
/**
231
  Register handler error messages for use with my_error().
232
233
  @retval
234
    0           OK
235
  @retval
236
    !=0         Error
237
*/
238
239
int ha_init_errors(void)
240
{
241
#define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg)
242
  const char    **errmsgs;
243
244
  /* Allocate a pointer array for the error message strings. */
245
  /* Zerofill it to avoid uninitialized gaps. */
246
  if (! (errmsgs= (const char**) my_malloc(HA_ERR_ERRORS * sizeof(char*),
247
                                           MYF(MY_WME | MY_ZEROFILL))))
248
    return 1;
249
250
  /* Set the dedicated error messages. */
251
  SETMSG(HA_ERR_KEY_NOT_FOUND,          ER(ER_KEY_NOT_FOUND));
252
  SETMSG(HA_ERR_FOUND_DUPP_KEY,         ER(ER_DUP_KEY));
253
  SETMSG(HA_ERR_RECORD_CHANGED,         "Update wich is recoverable");
254
  SETMSG(HA_ERR_WRONG_INDEX,            "Wrong index given to function");
255
  SETMSG(HA_ERR_CRASHED,                ER(ER_NOT_KEYFILE));
256
  SETMSG(HA_ERR_WRONG_IN_RECORD,        ER(ER_CRASHED_ON_USAGE));
257
  SETMSG(HA_ERR_OUT_OF_MEM,             "Table handler out of memory");
258
  SETMSG(HA_ERR_NOT_A_TABLE,            "Incorrect file format '%.64s'");
259
  SETMSG(HA_ERR_WRONG_COMMAND,          "Command not supported");
260
  SETMSG(HA_ERR_OLD_FILE,               ER(ER_OLD_KEYFILE));
261
  SETMSG(HA_ERR_NO_ACTIVE_RECORD,       "No record read in update");
262
  SETMSG(HA_ERR_RECORD_DELETED,         "Intern record deleted");
263
  SETMSG(HA_ERR_RECORD_FILE_FULL,       ER(ER_RECORD_FILE_FULL));
264
  SETMSG(HA_ERR_INDEX_FILE_FULL,        "No more room in index file '%.64s'");
265
  SETMSG(HA_ERR_END_OF_FILE,            "End in next/prev/first/last");
266
  SETMSG(HA_ERR_UNSUPPORTED,            ER(ER_ILLEGAL_HA));
267
  SETMSG(HA_ERR_TO_BIG_ROW,             "Too big row");
268
  SETMSG(HA_WRONG_CREATE_OPTION,        "Wrong create option");
269
  SETMSG(HA_ERR_FOUND_DUPP_UNIQUE,      ER(ER_DUP_UNIQUE));
270
  SETMSG(HA_ERR_UNKNOWN_CHARSET,        "Can't open charset");
271
  SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF,    ER(ER_WRONG_MRG_TABLE));
272
  SETMSG(HA_ERR_CRASHED_ON_REPAIR,      ER(ER_CRASHED_ON_REPAIR));
273
  SETMSG(HA_ERR_CRASHED_ON_USAGE,       ER(ER_CRASHED_ON_USAGE));
274
  SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT,      ER(ER_LOCK_WAIT_TIMEOUT));
275
  SETMSG(HA_ERR_LOCK_TABLE_FULL,        ER(ER_LOCK_TABLE_FULL));
276
  SETMSG(HA_ERR_READ_ONLY_TRANSACTION,  ER(ER_READ_ONLY_TRANSACTION));
277
  SETMSG(HA_ERR_LOCK_DEADLOCK,          ER(ER_LOCK_DEADLOCK));
278
  SETMSG(HA_ERR_CANNOT_ADD_FOREIGN,     ER(ER_CANNOT_ADD_FOREIGN));
279
  SETMSG(HA_ERR_NO_REFERENCED_ROW,      ER(ER_NO_REFERENCED_ROW_2));
280
  SETMSG(HA_ERR_ROW_IS_REFERENCED,      ER(ER_ROW_IS_REFERENCED_2));
281
  SETMSG(HA_ERR_NO_SAVEPOINT,           "No savepoint with that name");
282
  SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE,  "Non unique key block size");
283
  SETMSG(HA_ERR_NO_SUCH_TABLE,          "No such table: '%.64s'");
284
  SETMSG(HA_ERR_TABLE_EXIST,            ER(ER_TABLE_EXISTS_ERROR));
285
  SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
286
  SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER(ER_TABLE_DEF_CHANGED));
287
  SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,  "FK constraint would lead to duplicate key");
288
  SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    ER(ER_TABLE_NEEDS_UPGRADE));
289
  SETMSG(HA_ERR_TABLE_READONLY,         ER(ER_OPEN_AS_READONLY));
290
  SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER(ER_AUTOINC_READ_FAILED));
291
  SETMSG(HA_ERR_AUTOINC_ERANGE,         ER(ER_WARN_DATA_OUT_OF_RANGE));
292
293
  /* Register the error messages for use with my_error(). */
294
  return my_error_register(errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
295
}
296
297
298
/**
299
  Unregister handler error messages.
300
301
  @retval
302
    0           OK
303
  @retval
304
    !=0         Error
305
*/
306
static int ha_finish_errors(void)
307
{
308
  const char    **errmsgs;
309
310
  /* Allocate a pointer array for the error message strings. */
311
  if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST)))
312
    return 1;
313
  my_free((uchar*) errmsgs, MYF(0));
314
  return 0;
315
}
316
317
318
int ha_finalize_handlerton(st_plugin_int *plugin)
319
{
320
  handlerton *hton= (handlerton *)plugin->data;
321
322
  switch (hton->state)
323
  {
324
  case SHOW_OPTION_NO:
325
  case SHOW_OPTION_DISABLED:
326
    break;
327
  case SHOW_OPTION_YES:
328
    if (installed_htons[hton->db_type] == hton)
329
      installed_htons[hton->db_type]= NULL;
330
    break;
331
  };
332
224.2.3 by Brian Aker
Fix for memory leak in shutdown/restart of an engine (not fixed in 5.1)
333
  if (hton && plugin->plugin->deinit)
334
    (void)plugin->plugin->deinit(hton);
1 by brian
clean slate
335
336
  my_free((uchar*)hton, MYF(0));
337
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
338
  return(0);
1 by brian
clean slate
339
}
340
341
342
int ha_initialize_handlerton(st_plugin_int *plugin)
343
{
344
  handlerton *hton;
345
346
  hton= (handlerton *)my_malloc(sizeof(handlerton),
347
                                MYF(MY_WME | MY_ZEROFILL));
348
  /* 
349
    FIXME: the MY_ZEROFILL flag above doesn't zero all the bytes.
350
    
351
    This was detected after adding get_backup_engine member to handlerton
352
    structure. Apparently get_backup_engine was not NULL even though it was
353
    not initialized.
354
   */
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
355
  memset(hton, 0, sizeof(hton));
1 by brian
clean slate
356
  /* Historical Requirement */
357
  plugin->data= hton; // shortcut for the future
358
  if (plugin->plugin->init)
359
  {
360
    if (plugin->plugin->init(hton))
361
    {
362
      sql_print_error("Plugin '%s' init function returned error.",
363
                      plugin->name.str);
364
      goto err;
365
    }
366
  }
367
368
  /*
369
    the switch below and hton->state should be removed when
370
    command-line options for plugins will be implemented
371
  */
372
  switch (hton->state) {
373
  case SHOW_OPTION_NO:
374
    break;
375
  case SHOW_OPTION_YES:
376
    {
377
      uint tmp;
378
      /* now check the db_type for conflict */
379
      if (hton->db_type <= DB_TYPE_UNKNOWN ||
380
          hton->db_type >= DB_TYPE_DEFAULT ||
381
          installed_htons[hton->db_type])
382
      {
383
        int idx= (int) DB_TYPE_FIRST_DYNAMIC;
384
385
        while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
386
          idx++;
387
388
        if (idx == (int) DB_TYPE_DEFAULT)
389
        {
390
          sql_print_warning("Too many storage engines!");
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
391
          return(1);
1 by brian
clean slate
392
        }
393
        if (hton->db_type != DB_TYPE_UNKNOWN)
394
          sql_print_warning("Storage engine '%s' has conflicting typecode. "
395
                            "Assigning value %d.", plugin->plugin->name, idx);
396
        hton->db_type= (enum legacy_db_type) idx;
397
      }
398
      installed_htons[hton->db_type]= hton;
399
      tmp= hton->savepoint_offset;
400
      hton->savepoint_offset= savepoint_alloc_size;
401
      savepoint_alloc_size+= tmp;
402
      hton->slot= total_ha++;
403
      hton2plugin[hton->slot]=plugin;
404
      if (hton->prepare)
405
        total_ha_2pc++;
406
      break;
407
    }
408
    /* fall through */
409
  default:
410
    hton->state= SHOW_OPTION_DISABLED;
411
    break;
412
  }
413
  
414
  /* 
415
    This is entirely for legacy. We will create a new "disk based" hton and a 
416
    "memory" hton which will be configurable longterm. We should be able to 
417
    remove partition and myisammrg.
418
  */
419
  switch (hton->db_type) {
420
  case DB_TYPE_HEAP:
421
    heap_hton= hton;
422
    break;
423
  case DB_TYPE_MYISAM:
424
    myisam_hton= hton;
425
    break;
426
  default:
427
    break;
428
  };
429
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
430
  return(0);
1 by brian
clean slate
431
err:
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
432
  return(1);
1 by brian
clean slate
433
}
434
435
int ha_init()
436
{
437
  int error= 0;
438
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
439
  assert(total_ha < MAX_HA);
1 by brian
clean slate
440
  /*
441
    Check if there is a transaction-capable storage engine besides the
442
    binary log (which is considered a transaction-capable storage engine in
443
    counting total_ha)
444
  */
61 by Brian Aker
Conversion of handler type.
445
  opt_using_transactions= total_ha>(uint32_t)opt_bin_log;
1 by brian
clean slate
446
  savepoint_alloc_size+= sizeof(SAVEPOINT);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
447
  return(error);
1 by brian
clean slate
448
}
449
450
int ha_end()
451
{
452
  int error= 0;
453
454
  /* 
455
    This should be eventualy based  on the graceful shutdown flag.
456
    So if flag is equal to HA_PANIC_CLOSE, the deallocate
457
    the errors.
458
  */
459
  if (ha_finish_errors())
460
    error= 1;
461
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
462
  return(error);
1 by brian
clean slate
463
}
464
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
465
static bool dropdb_handlerton(THD *unused1 __attribute__((unused)),
149 by Brian Aker
More bool conversion.
466
                              plugin_ref plugin,
467
                              void *path)
1 by brian
clean slate
468
{
469
  handlerton *hton= plugin_data(plugin, handlerton *);
470
  if (hton->state == SHOW_OPTION_YES && hton->drop_database)
471
    hton->drop_database(hton, (char *)path);
56 by brian
Next pass of true/false update.
472
  return false;
1 by brian
clean slate
473
}
474
475
476
void ha_drop_database(char* path)
477
{
478
  plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
479
}
480
481
149 by Brian Aker
More bool conversion.
482
static bool closecon_handlerton(THD *thd, plugin_ref plugin,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
483
                                void *unused __attribute__((unused)))
1 by brian
clean slate
484
{
485
  handlerton *hton= plugin_data(plugin, handlerton *);
486
  /*
487
    there's no need to rollback here as all transactions must
488
    be rolled back already
489
  */
490
  if (hton->state == SHOW_OPTION_YES && hton->close_connection &&
491
      thd_get_ha_data(thd, hton))
492
    hton->close_connection(hton, thd);
56 by brian
Next pass of true/false update.
493
  return false;
1 by brian
clean slate
494
}
495
496
497
/**
498
  @note
499
    don't bother to rollback here, it's done already
500
*/
501
void ha_close_connection(THD* thd)
502
{
503
  plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
504
}
505
506
/* ========================================================================
507
 ======================= TRANSACTIONS ===================================*/
508
509
/**
510
  Transaction handling in the server
511
  ==================================
512
513
  In each client connection, MySQL maintains two transactional
514
  states:
515
  - a statement transaction,
516
  - a standard, also called normal transaction.
517
518
  Historical note
519
  ---------------
520
  "Statement transaction" is a non-standard term that comes
521
  from the times when MySQL supported BerkeleyDB storage engine.
522
523
  First of all, it should be said that in BerkeleyDB auto-commit
524
  mode auto-commits operations that are atomic to the storage
525
  engine itself, such as a write of a record, and are too
526
  high-granular to be atomic from the application perspective
527
  (MySQL). One SQL statement could involve many BerkeleyDB
528
  auto-committed operations and thus BerkeleyDB auto-commit was of
529
  little use to MySQL.
530
531
  Secondly, instead of SQL standard savepoints, BerkeleyDB
532
  provided the concept of "nested transactions". In a nutshell,
533
  transactions could be arbitrarily nested, but when the parent
534
  transaction was committed or aborted, all its child (nested)
535
  transactions were handled committed or aborted as well.
536
  Commit of a nested transaction, in turn, made its changes
537
  visible, but not durable: it destroyed the nested transaction,
538
  all its changes would become available to the parent and
539
  currently active nested transactions of this parent.
540
541
  So the mechanism of nested transactions was employed to
542
  provide "all or nothing" guarantee of SQL statements
543
  required by the standard.
544
  A nested transaction would be created at start of each SQL
545
  statement, and destroyed (committed or aborted) at statement
546
  end. Such nested transaction was internally referred to as
547
  a "statement transaction" and gave birth to the term.
548
549
  <Historical note ends>
550
551
  Since then a statement transaction is started for each statement
552
  that accesses transactional tables or uses the binary log.  If
553
  the statement succeeds, the statement transaction is committed.
554
  If the statement fails, the transaction is rolled back. Commits
555
  of statement transactions are not durable -- each such
556
  transaction is nested in the normal transaction, and if the
557
  normal transaction is rolled back, the effects of all enclosed
558
  statement transactions are undone as well.  Technically,
559
  a statement transaction can be viewed as a savepoint which is
560
  maintained automatically in order to make effects of one
561
  statement atomic.
562
563
  The normal transaction is started by the user and is ended
564
  usually upon a user request as well. The normal transaction
565
  encloses transactions of all statements issued between
566
  its beginning and its end.
567
  In autocommit mode, the normal transaction is equivalent
568
  to the statement transaction.
569
570
  Since MySQL supports PSEA (pluggable storage engine
571
  architecture), more than one transactional engine can be
572
  active at a time. Hence transactions, from the server
573
  point of view, are always distributed. In particular,
574
  transactional state is maintained independently for each
575
  engine. In order to commit a transaction the two phase
576
  commit protocol is employed.
577
578
  Not all statements are executed in context of a transaction.
579
  Administrative and status information statements do not modify
580
  engine data, and thus do not start a statement transaction and
581
  also have no effect on the normal transaction. Examples of such
582
  statements are SHOW STATUS and RESET SLAVE.
583
584
  Similarly DDL statements are not transactional,
585
  and therefore a transaction is [almost] never started for a DDL
586
  statement. The difference between a DDL statement and a purely
587
  administrative statement though is that a DDL statement always
588
  commits the current transaction before proceeding, if there is
589
  any.
590
591
  At last, SQL statements that work with non-transactional
592
  engines also have no effect on the transaction state of the
593
  connection. Even though they are written to the binary log,
594
  and the binary log is, overall, transactional, the writes
595
  are done in "write-through" mode, directly to the binlog
596
  file, followed with a OS cache sync, in other words,
597
  bypassing the binlog undo log (translog).
598
  They do not commit the current normal transaction.
599
  A failure of a statement that uses non-transactional tables
600
  would cause a rollback of the statement transaction, but
601
  in case there no non-transactional tables are used,
602
  no statement transaction is started.
603
604
  Data layout
605
  -----------
606
607
  The server stores its transaction-related data in
608
  thd->transaction. This structure has two members of type
609
  THD_TRANS. These members correspond to the statement and
610
  normal transactions respectively:
611
612
  - thd->transaction.stmt contains a list of engines
613
  that are participating in the given statement
614
  - thd->transaction.all contains a list of engines that
615
  have participated in any of the statement transactions started
616
  within the context of the normal transaction.
617
  Each element of the list contains a pointer to the storage
618
  engine, engine-specific transactional data, and engine-specific
619
  transaction flags.
620
621
  In autocommit mode thd->transaction.all is empty.
622
  Instead, data of thd->transaction.stmt is
623
  used to commit/rollback the normal transaction.
624
625
  The list of registered engines has a few important properties:
626
  - no engine is registered in the list twice
627
  - engines are present in the list a reverse temporal order --
628
  new participants are always added to the beginning of the list.
629
630
  Transaction life cycle
631
  ----------------------
632
633
  When a new connection is established, thd->transaction
634
  members are initialized to an empty state.
635
  If a statement uses any tables, all affected engines
636
  are registered in the statement engine list. In
637
  non-autocommit mode, the same engines are registered in
638
  the normal transaction list.
639
  At the end of the statement, the server issues a commit
640
  or a roll back for all engines in the statement list.
641
  At this point transaction flags of an engine, if any, are
642
  propagated from the statement list to the list of the normal
643
  transaction.
644
  When commit/rollback is finished, the statement list is
645
  cleared. It will be filled in again by the next statement,
646
  and emptied again at the next statement's end.
647
648
  The normal transaction is committed in a similar way
649
  (by going over all engines in thd->transaction.all list)
650
  but at different times:
651
  - upon COMMIT SQL statement is issued by the user
652
  - implicitly, by the server, at the beginning of a DDL statement
653
  or SET AUTOCOMMIT={0|1} statement.
654
655
  The normal transaction can be rolled back as well:
656
  - if the user has requested so, by issuing ROLLBACK SQL
657
  statement
658
  - if one of the storage engines requested a rollback
659
  by setting thd->transaction_rollback_request. This may
660
  happen in case, e.g., when the transaction in the engine was
661
  chosen a victim of the internal deadlock resolution algorithm
662
  and rolled back internally. When such a situation happens, there
663
  is little the server can do and the only option is to rollback
664
  transactions in all other participating engines.  In this case
665
  the rollback is accompanied by an error sent to the user.
666
667
  As follows from the use cases above, the normal transaction
668
  is never committed when there is an outstanding statement
669
  transaction. In most cases there is no conflict, since
670
  commits of the normal transaction are issued by a stand-alone
671
  administrative or DDL statement, thus no outstanding statement
672
  transaction of the previous statement exists. Besides,
673
  all statements that manipulate with the normal transaction
674
  are prohibited in stored functions and triggers, therefore
675
  no conflicting situation can occur in a sub-statement either.
676
  The remaining rare cases when the server explicitly has
677
  to commit the statement transaction prior to committing the normal
678
  one cover error-handling scenarios (see for example
679
  SQLCOM_LOCK_TABLES).
680
681
  When committing a statement or a normal transaction, the server
682
  either uses the two-phase commit protocol, or issues a commit
683
  in each engine independently. The two-phase commit protocol
684
  is used only if:
685
  - all participating engines support two-phase commit (provide
686
    handlerton::prepare PSEA API call) and
687
  - transactions in at least two engines modify data (i.e. are
688
  not read-only).
689
690
  Note that the two phase commit is used for
691
  statement transactions, even though they are not durable anyway.
692
  This is done to ensure logical consistency of data in a multiple-
693
  engine transaction.
694
  For example, imagine that some day MySQL supports unique
695
  constraint checks deferred till the end of statement. In such
696
  case a commit in one of the engines may yield ER_DUP_KEY,
697
  and MySQL should be able to gracefully abort statement
698
  transactions of other participants.
699
700
  After the normal transaction has been committed,
701
  thd->transaction.all list is cleared.
702
703
  When a connection is closed, the current normal transaction, if
704
  any, is rolled back.
705
706
  Roles and responsibilities
707
  --------------------------
708
709
  The server has no way to know that an engine participates in
710
  the statement and a transaction has been started
711
  in it unless the engine says so. Thus, in order to be
712
  a part of a transaction, the engine must "register" itself.
713
  This is done by invoking trans_register_ha() server call.
714
  Normally the engine registers itself whenever handler::external_lock()
715
  is called. trans_register_ha() can be invoked many times: if
716
  an engine is already registered, the call does nothing.
717
  In case autocommit is not set, the engine must register itself
718
  twice -- both in the statement list and in the normal transaction
719
  list.
720
  In which list to register is a parameter of trans_register_ha().
721
722
  Note, that although the registration interface in itself is
723
  fairly clear, the current usage practice often leads to undesired
724
  effects. E.g. since a call to trans_register_ha() in most engines
725
  is embedded into implementation of handler::external_lock(), some
726
  DDL statements start a transaction (at least from the server
727
  point of view) even though they are not expected to. E.g.
728
  CREATE TABLE does not start a transaction, since
729
  handler::external_lock() is never called during CREATE TABLE. But
730
  CREATE TABLE ... SELECT does, since handler::external_lock() is
731
  called for the table that is being selected from. This has no
732
  practical effects currently, but must be kept in mind
733
  nevertheless.
734
735
  Once an engine is registered, the server will do the rest
736
  of the work.
737
738
  During statement execution, whenever any of data-modifying
739
  PSEA API methods is used, e.g. handler::write_row() or
740
  handler::update_row(), the read-write flag is raised in the
741
  statement transaction for the involved engine.
742
  Currently All PSEA calls are "traced", and the data can not be
743
  changed in a way other than issuing a PSEA call. Important:
744
  unless this invariant is preserved the server will not know that
745
  a transaction in a given engine is read-write and will not
746
  involve the two-phase commit protocol!
747
748
  At the end of a statement, server call
749
  ha_autocommit_or_rollback() is invoked. This call in turn
750
  invokes handlerton::prepare() for every involved engine.
751
  Prepare is followed by a call to handlerton::commit_one_phase()
752
  If a one-phase commit will suffice, handlerton::prepare() is not
753
  invoked and the server only calls handlerton::commit_one_phase().
754
  At statement commit, the statement-related read-write engine
755
  flag is propagated to the corresponding flag in the normal
756
  transaction.  When the commit is complete, the list of registered
757
  engines is cleared.
758
759
  Rollback is handled in a similar fashion.
760
761
  Additional notes on DDL and the normal transaction.
762
  ---------------------------------------------------
763
764
  DDLs and operations with non-transactional engines
765
  do not "register" in thd->transaction lists, and thus do not
766
  modify the transaction state. Besides, each DDL in
767
  MySQL is prefixed with an implicit normal transaction commit
768
  (a call to end_active_trans()), and thus leaves nothing
769
  to modify.
770
  However, as it has been pointed out with CREATE TABLE .. SELECT,
771
  some DDL statements can start a *new* transaction.
772
773
  Behaviour of the server in this case is currently badly
774
  defined.
775
  DDL statements use a form of "semantic" logging
776
  to maintain atomicity: if CREATE TABLE .. SELECT failed,
777
  the newly created table is deleted.
778
  In addition, some DDL statements issue interim transaction
779
  commits: e.g. ALTER TABLE issues a commit after data is copied
780
  from the original table to the internal temporary table. Other
781
  statements, e.g. CREATE TABLE ... SELECT do not always commit
782
  after itself.
783
  And finally there is a group of DDL statements such as
784
  RENAME/DROP TABLE that doesn't start a new transaction
785
  and doesn't commit.
786
787
  This diversity makes it hard to say what will happen if
788
  by chance a stored function is invoked during a DDL --
789
  whether any modifications it makes will be committed or not
790
  is not clear. Fortunately, SQL grammar of few DDLs allows
791
  invocation of a stored function.
792
793
  A consistent behaviour is perhaps to always commit the normal
794
  transaction after all DDLs, just like the statement transaction
795
  is always committed at the end of all statements.
796
*/
797
798
/**
799
  Register a storage engine for a transaction.
800
801
  Every storage engine MUST call this function when it starts
802
  a transaction or a statement (that is it must be called both for the
803
  "beginning of transaction" and "beginning of statement").
804
  Only storage engines registered for the transaction/statement
805
  will know when to commit/rollback it.
806
807
  @note
808
    trans_register_ha is idempotent - storage engine may register many
809
    times per transaction.
810
811
*/
812
void trans_register_ha(THD *thd, bool all, handlerton *ht_arg)
813
{
814
  THD_TRANS *trans;
815
  Ha_trx_info *ha_info;
816
817
  if (all)
818
  {
819
    trans= &thd->transaction.all;
820
    thd->server_status|= SERVER_STATUS_IN_TRANS;
821
  }
822
  else
823
    trans= &thd->transaction.stmt;
824
825
  ha_info= thd->ha_data[ht_arg->slot].ha_info + static_cast<unsigned>(all);
826
827
  if (ha_info->is_started())
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
828
    return; /* already registered, return */
1 by brian
clean slate
829
830
  ha_info->register_ha(trans, ht_arg);
831
832
  trans->no_2pc|=(ht_arg->prepare==0);
833
  if (thd->transaction.xid_state.xid.is_null())
834
    thd->transaction.xid_state.xid.set(thd->query_id);
835
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
836
  return;
1 by brian
clean slate
837
}
838
839
/**
840
  @retval
841
    0   ok
842
  @retval
843
    1   error, transaction was rolled back
844
*/
845
int ha_prepare(THD *thd)
846
{
847
  int error=0, all=1;
848
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
849
  Ha_trx_info *ha_info= trans->ha_list;
850
  if (ha_info)
851
  {
852
    for (; ha_info; ha_info= ha_info->next())
853
    {
854
      int err;
855
      handlerton *ht= ha_info->ht();
856
      status_var_increment(thd->status_var.ha_prepare_count);
857
      if (ht->prepare)
858
      {
859
        if ((err= ht->prepare(ht, thd, all)))
860
        {
861
          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
862
          ha_rollback_trans(thd, all);
863
          error=1;
864
          break;
865
        }
866
      }
867
      else
868
      {
869
        push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
870
                            ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
871
                            ha_resolve_storage_engine_name(ht));
872
      }
873
    }
874
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
875
  return(error);
1 by brian
clean slate
876
}
877
878
/**
879
  Check if we can skip the two-phase commit.
880
881
  A helper function to evaluate if two-phase commit is mandatory.
882
  As a side effect, propagates the read-only/read-write flags
883
  of the statement transaction to its enclosing normal transaction.
884
56 by brian
Next pass of true/false update.
885
  @retval true   we must run a two-phase commit. Returned
1 by brian
clean slate
886
                 if we have at least two engines with read-write changes.
56 by brian
Next pass of true/false update.
887
  @retval false  Don't need two-phase commit. Even if we have two
1 by brian
clean slate
888
                 transactional engines, we can run two independent
889
                 commits if changes in one of the engines are read-only.
890
*/
891
892
static
893
bool
894
ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
895
                                    bool all)
896
{
897
  /* The number of storage engines that have actual changes. */
898
  unsigned rw_ha_count= 0;
899
  Ha_trx_info *ha_info;
900
901
  for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
902
  {
903
    if (ha_info->is_trx_read_write())
904
      ++rw_ha_count;
905
906
    if (! all)
907
    {
908
      Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
909
      assert(ha_info != ha_info_all);
1 by brian
clean slate
910
      /*
911
        Merge read-only/read-write information about statement
912
        transaction to its enclosing normal transaction. Do this
913
        only if in a real transaction -- that is, if we know
914
        that ha_info_all is registered in thd->transaction.all.
915
        Since otherwise we only clutter the normal transaction flags.
916
      */
56 by brian
Next pass of true/false update.
917
      if (ha_info_all->is_started()) /* false if autocommit. */
1 by brian
clean slate
918
        ha_info_all->coalesce_trx_with(ha_info);
919
    }
920
    else if (rw_ha_count > 1)
921
    {
922
      /*
923
        It is a normal transaction, so we don't need to merge read/write
924
        information up, and the need for two-phase commit has been
925
        already established. Break the loop prematurely.
926
      */
927
      break;
928
    }
929
  }
930
  return rw_ha_count > 1;
931
}
932
933
934
/**
935
  @retval
936
    0   ok
937
  @retval
938
    1   transaction was rolled back
939
  @retval
940
    2   error during commit, data may be inconsistent
941
942
  @todo
943
    Since we don't support nested statement transactions in 5.0,
944
    we can't commit or rollback stmt transactions while we are inside
945
    stored functions or triggers. So we simply do nothing now.
946
    TODO: This should be fixed in later ( >= 5.1) releases.
947
*/
948
int ha_commit_trans(THD *thd, bool all)
949
{
950
  int error= 0, cookie= 0;
951
  /*
952
    'all' means that this is either an explicit commit issued by
953
    user, or an implicit commit issued by a DDL.
954
  */
955
  THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
956
  bool is_real_trans= all || thd->transaction.all.ha_list == 0;
957
  Ha_trx_info *ha_info= trans->ha_list;
958
  my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
959
960
  /*
961
    We must not commit the normal transaction if a statement
962
    transaction is pending. Otherwise statement transaction
963
    flags will not get propagated to its normal transaction's
964
    counterpart.
965
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
966
  assert(thd->transaction.stmt.ha_list == NULL ||
1 by brian
clean slate
967
              trans == &thd->transaction.stmt);
968
969
  if (thd->in_sub_stmt)
970
  {
971
    /*
972
      Since we don't support nested statement transactions in 5.0,
973
      we can't commit or rollback stmt transactions while we are inside
974
      stored functions or triggers. So we simply do nothing now.
975
      TODO: This should be fixed in later ( >= 5.1) releases.
976
    */
977
    if (!all)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
978
      return(0);
1 by brian
clean slate
979
    /*
980
      We assume that all statements which commit or rollback main transaction
981
      are prohibited inside of stored functions or triggers. So they should
982
      bail out with error even before ha_commit_trans() call. To be 100% safe
983
      let us throw error in non-debug builds.
984
    */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
985
    assert(0);
1 by brian
clean slate
986
    my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
987
    return(2);
1 by brian
clean slate
988
  }
989
  if (ha_info)
990
  {
991
    bool must_2pc;
992
993
    if (is_real_trans && wait_if_global_read_lock(thd, 0, 0))
994
    {
995
      ha_rollback_trans(thd, all);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
996
      return(1);
1 by brian
clean slate
997
    }
998
999
    if (   is_real_trans
1000
        && opt_readonly
1001
        && ! thd->slave_thread
1002
       )
1003
    {
1004
      my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
1005
      ha_rollback_trans(thd, all);
1006
      error= 1;
1007
      goto end;
1008
    }
1009
1010
    must_2pc= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1011
1012
    if (!trans->no_2pc && must_2pc)
1013
    {
1014
      for (; ha_info && !error; ha_info= ha_info->next())
1015
      {
1016
        int err;
1017
        handlerton *ht= ha_info->ht();
1018
        /*
1019
          Do not call two-phase commit if this particular
1020
          transaction is read-only. This allows for simpler
1021
          implementation in engines that are always read-only.
1022
        */
1023
        if (! ha_info->is_trx_read_write())
1024
          continue;
1025
        /*
1026
          Sic: we know that prepare() is not NULL since otherwise
1027
          trans->no_2pc would have been set.
1028
        */
1029
        if ((err= ht->prepare(ht, thd, all)))
1030
        {
1031
          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1032
          error= 1;
1033
        }
1034
        status_var_increment(thd->status_var.ha_prepare_count);
1035
      }
1036
      if (error || (is_real_trans && xid &&
1037
                    (error= !(cookie= tc_log->log_xid(thd, xid)))))
1038
      {
1039
        ha_rollback_trans(thd, all);
1040
        error= 1;
1041
        goto end;
1042
      }
1043
    }
1044
    error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
1045
    if (cookie)
1046
      tc_log->unlog(cookie, xid);
1047
end:
1048
    if (is_real_trans)
1049
      start_waiting_global_read_lock(thd);
1050
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1051
  return(error);
1 by brian
clean slate
1052
}
1053
1054
/**
1055
  @note
1056
  This function does not care about global read lock. A caller should.
1057
*/
1058
int ha_commit_one_phase(THD *thd, bool all)
1059
{
1060
  int error=0;
1061
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1062
  bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1063
  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1064
  if (ha_info)
1065
  {
1066
    for (; ha_info; ha_info= ha_info_next)
1067
    {
1068
      int err;
1069
      handlerton *ht= ha_info->ht();
1070
      if ((err= ht->commit(ht, thd, all)))
1071
      {
1072
        my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1073
        error=1;
1074
      }
1075
      status_var_increment(thd->status_var.ha_commit_count);
1076
      ha_info_next= ha_info->next();
1077
      ha_info->reset(); /* keep it conveniently zero-filled */
1078
    }
1079
    trans->ha_list= 0;
1080
    trans->no_2pc=0;
1081
    if (is_real_trans)
1082
      thd->transaction.xid_state.xid.null();
1083
    if (all)
1084
    {
1085
      thd->variables.tx_isolation=thd->session_tx_isolation;
1086
      thd->transaction.cleanup();
1087
    }
1088
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1089
  return(error);
1 by brian
clean slate
1090
}
1091
1092
1093
int ha_rollback_trans(THD *thd, bool all)
1094
{
1095
  int error=0;
1096
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1097
  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1098
  bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1099
1100
  /*
1101
    We must not rollback the normal transaction if a statement
1102
    transaction is pending.
1103
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1104
  assert(thd->transaction.stmt.ha_list == NULL ||
1 by brian
clean slate
1105
              trans == &thd->transaction.stmt);
1106
1107
  if (thd->in_sub_stmt)
1108
  {
1109
    /*
1110
      If we are inside stored function or trigger we should not commit or
1111
      rollback current statement transaction. See comment in ha_commit_trans()
1112
      call for more information.
1113
    */
1114
    if (!all)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1115
      return(0);
1116
    assert(0);
1 by brian
clean slate
1117
    my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1118
    return(1);
1 by brian
clean slate
1119
  }
1120
  if (ha_info)
1121
  {
1122
    for (; ha_info; ha_info= ha_info_next)
1123
    {
1124
      int err;
1125
      handlerton *ht= ha_info->ht();
1126
      if ((err= ht->rollback(ht, thd, all)))
1127
      { // cannot happen
1128
        my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1129
        error=1;
1130
      }
1131
      status_var_increment(thd->status_var.ha_rollback_count);
1132
      ha_info_next= ha_info->next();
1133
      ha_info->reset(); /* keep it conveniently zero-filled */
1134
    }
1135
    trans->ha_list= 0;
1136
    trans->no_2pc=0;
1137
    if (is_real_trans)
1138
      thd->transaction.xid_state.xid.null();
1139
    if (all)
1140
    {
1141
      thd->variables.tx_isolation=thd->session_tx_isolation;
1142
      thd->transaction.cleanup();
1143
    }
1144
  }
1145
  if (all)
56 by brian
Next pass of true/false update.
1146
    thd->transaction_rollback_request= false;
1 by brian
clean slate
1147
1148
  /*
1149
    If a non-transactional table was updated, warn; don't warn if this is a
1150
    slave thread (because when a slave thread executes a ROLLBACK, it has
1151
    been read from the binary log, so it's 100% sure and normal to produce
1152
    error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
1153
    slave SQL thread, it would not stop the thread but just be printed in
1154
    the error log; but we don't want users to wonder why they have this
1155
    message in the error log, so we don't send it.
1156
  */
1157
  if (is_real_trans && thd->transaction.all.modified_non_trans_table &&
1158
      !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
1159
    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
1160
                 ER_WARNING_NOT_COMPLETE_ROLLBACK,
1161
                 ER(ER_WARNING_NOT_COMPLETE_ROLLBACK));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1162
  return(error);
1 by brian
clean slate
1163
}
1164
1165
/**
1166
  This is used to commit or rollback a single statement depending on
1167
  the value of error.
1168
1169
  @note
1170
    Note that if the autocommit is on, then the following call inside
1171
    InnoDB will commit or rollback the whole transaction (= the statement). The
1172
    autocommit mechanism built into InnoDB is based on counting locks, but if
1173
    the user has used LOCK TABLES then that mechanism does not know to do the
1174
    commit.
1175
*/
1176
int ha_autocommit_or_rollback(THD *thd, int error)
1177
{
1178
  if (thd->transaction.stmt.ha_list)
1179
  {
1180
    if (!error)
1181
    {
1182
      if (ha_commit_trans(thd, 0))
1183
	error=1;
1184
    }
1185
    else 
1186
    {
1187
      (void) ha_rollback_trans(thd, 0);
1188
      if (thd->transaction_rollback_request && !thd->in_sub_stmt)
1189
        (void) ha_rollback(thd);
1190
    }
1191
1192
    thd->variables.tx_isolation=thd->session_tx_isolation;
1193
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1194
  return(error);
1 by brian
clean slate
1195
}
1196
1197
1198
struct xahton_st {
1199
  XID *xid;
1200
  int result;
1201
};
1202
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1203
static bool xacommit_handlerton(THD *unused1 __attribute__((unused)),
149 by Brian Aker
More bool conversion.
1204
                                plugin_ref plugin,
1205
                                void *arg)
1 by brian
clean slate
1206
{
1207
  handlerton *hton= plugin_data(plugin, handlerton *);
1208
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1209
  {
1210
    hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
1211
    ((struct xahton_st *)arg)->result= 0;
1212
  }
56 by brian
Next pass of true/false update.
1213
  return false;
1 by brian
clean slate
1214
}
1215
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1216
static bool xarollback_handlerton(THD *unused1 __attribute__((unused)),
149 by Brian Aker
More bool conversion.
1217
                                  plugin_ref plugin,
1218
                                  void *arg)
1 by brian
clean slate
1219
{
1220
  handlerton *hton= plugin_data(plugin, handlerton *);
1221
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1222
  {
1223
    hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
1224
    ((struct xahton_st *)arg)->result= 0;
1225
  }
56 by brian
Next pass of true/false update.
1226
  return false;
1 by brian
clean slate
1227
}
1228
1229
1230
int ha_commit_or_rollback_by_xid(XID *xid, bool commit)
1231
{
1232
  struct xahton_st xaop;
1233
  xaop.xid= xid;
1234
  xaop.result= 1;
1235
1236
  plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
1237
                 MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
1238
1239
  return xaop.result;
1240
}
1241
1242
/**
1243
  recover() step of xa.
1244
1245
  @note
1246
    there are three modes of operation:
1247
    - automatic recover after a crash
1248
    in this case commit_list != 0, tc_heuristic_recover==0
1249
    all xids from commit_list are committed, others are rolled back
1250
    - manual (heuristic) recover
1251
    in this case commit_list==0, tc_heuristic_recover != 0
1252
    DBA has explicitly specified that all prepared transactions should
1253
    be committed (or rolled back).
1254
    - no recovery (MySQL did not detect a crash)
1255
    in this case commit_list==0, tc_heuristic_recover == 0
1256
    there should be no prepared transactions in this case.
1257
*/
1258
struct xarecover_st
1259
{
1260
  int len, found_foreign_xids, found_my_xids;
1261
  XID *list;
1262
  HASH *commit_list;
1263
  bool dry_run;
1264
};
1265
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1266
static bool xarecover_handlerton(THD *unused __attribute__((unused)),
149 by Brian Aker
More bool conversion.
1267
                                 plugin_ref plugin,
1268
                                 void *arg)
1 by brian
clean slate
1269
{
1270
  handlerton *hton= plugin_data(plugin, handlerton *);
1271
  struct xarecover_st *info= (struct xarecover_st *) arg;
1272
  int got;
1273
1274
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1275
  {
1276
    while ((got= hton->recover(hton, info->list, info->len)) > 0 )
1277
    {
1278
      sql_print_information("Found %d prepared transaction(s) in %s",
1279
                            got, ha_resolve_storage_engine_name(hton));
1280
      for (int i=0; i < got; i ++)
1281
      {
1282
        my_xid x=info->list[i].get_my_xid();
1283
        if (!x) // not "mine" - that is generated by external TM
1284
        {
1285
          xid_cache_insert(info->list+i, XA_PREPARED);
1286
          info->found_foreign_xids++;
1287
          continue;
1288
        }
1289
        if (info->dry_run)
1290
        {
1291
          info->found_my_xids++;
1292
          continue;
1293
        }
1294
        // recovery mode
1295
        if (info->commit_list ?
1296
            hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
1297
            tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
1298
        {
1299
          hton->commit_by_xid(hton, info->list+i);
1300
        }
1301
        else
1302
        {
1303
          hton->rollback_by_xid(hton, info->list+i);
1304
        }
1305
      }
1306
      if (got < info->len)
1307
        break;
1308
    }
1309
  }
56 by brian
Next pass of true/false update.
1310
  return false;
1 by brian
clean slate
1311
}
1312
1313
int ha_recover(HASH *commit_list)
1314
{
1315
  struct xarecover_st info;
1316
  info.found_foreign_xids= info.found_my_xids= 0;
1317
  info.commit_list= commit_list;
1318
  info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
1319
  info.list= NULL;
1320
1321
  /* commit_list and tc_heuristic_recover cannot be set both */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1322
  assert(info.commit_list==0 || tc_heuristic_recover==0);
1 by brian
clean slate
1323
  /* if either is set, total_ha_2pc must be set too */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1324
  assert(info.dry_run || total_ha_2pc>(uint32_t)opt_bin_log);
1 by brian
clean slate
1325
61 by Brian Aker
Conversion of handler type.
1326
  if (total_ha_2pc <= (uint32_t)opt_bin_log)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1327
    return(0);
1 by brian
clean slate
1328
1329
  if (info.commit_list)
1330
    sql_print_information("Starting crash recovery...");
1331
1332
1333
#ifndef WILL_BE_DELETED_LATER
1334
1335
  /*
1336
    for now, only InnoDB supports 2pc. It means we can always safely
1337
    rollback all pending transactions, without risking inconsistent data
1338
  */
1339
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1340
  assert(total_ha_2pc == (uint32_t) opt_bin_log+1); // only InnoDB and binlog
1 by brian
clean slate
1341
  tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
56 by brian
Next pass of true/false update.
1342
  info.dry_run=false;
1 by brian
clean slate
1343
#endif
1344
1345
1346
  for (info.len= MAX_XID_LIST_SIZE ; 
1347
       info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
1348
  {
1349
    info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0));
1350
  }
1351
  if (!info.list)
1352
  {
1353
    sql_print_error(ER(ER_OUTOFMEMORY), info.len*sizeof(XID));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1354
    return(1);
1 by brian
clean slate
1355
  }
1356
1357
  plugin_foreach(NULL, xarecover_handlerton, 
1358
                 MYSQL_STORAGE_ENGINE_PLUGIN, &info);
1359
1360
  my_free((uchar*)info.list, MYF(0));
1361
  if (info.found_foreign_xids)
1362
    sql_print_warning("Found %d prepared XA transactions", 
1363
                      info.found_foreign_xids);
1364
  if (info.dry_run && info.found_my_xids)
1365
  {
1366
    sql_print_error("Found %d prepared transactions! It means that mysqld was "
1367
                    "not shut down properly last time and critical recovery "
1368
                    "information (last binlog or %s file) was manually deleted "
1369
                    "after a crash. You have to start mysqld with "
1370
                    "--tc-heuristic-recover switch to commit or rollback "
1371
                    "pending transactions.",
1372
                    info.found_my_xids, opt_tc_log_file);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1373
    return(1);
1 by brian
clean slate
1374
  }
1375
  if (info.commit_list)
1376
    sql_print_information("Crash recovery finished.");
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1377
  return(0);
1 by brian
clean slate
1378
}
1379
1380
/**
1381
  return the list of XID's to a client, the same way SHOW commands do.
1382
1383
  @note
1384
    I didn't find in XA specs that an RM cannot return the same XID twice,
1385
    so mysql_xa_recover does not filter XID's to ensure uniqueness.
1386
    It can be easily fixed later, if necessary.
1387
*/
1388
bool mysql_xa_recover(THD *thd)
1389
{
1390
  List<Item> field_list;
1391
  Protocol *protocol= thd->protocol;
1392
  int i=0;
1393
  XID_STATE *xs;
1394
1395
  field_list.push_back(new Item_int("formatID", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1396
  field_list.push_back(new Item_int("gtrid_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1397
  field_list.push_back(new Item_int("bqual_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1398
  field_list.push_back(new Item_empty_string("data",XIDDATASIZE));
1399
1400
  if (protocol->send_fields(&field_list,
1401
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1402
    return(1);
1 by brian
clean slate
1403
1404
  pthread_mutex_lock(&LOCK_xid_cache);
1405
  while ((xs= (XID_STATE*)hash_element(&xid_cache, i++)))
1406
  {
1407
    if (xs->xa_state==XA_PREPARED)
1408
    {
1409
      protocol->prepare_for_resend();
152 by Brian Aker
longlong replacement
1410
      protocol->store_int64_t((int64_t)xs->xid.formatID, false);
1411
      protocol->store_int64_t((int64_t)xs->xid.gtrid_length, false);
1412
      protocol->store_int64_t((int64_t)xs->xid.bqual_length, false);
1 by brian
clean slate
1413
      protocol->store(xs->xid.data, xs->xid.gtrid_length+xs->xid.bqual_length,
1414
                      &my_charset_bin);
1415
      if (protocol->write())
1416
      {
1417
        pthread_mutex_unlock(&LOCK_xid_cache);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1418
        return(1);
1 by brian
clean slate
1419
      }
1420
    }
1421
  }
1422
1423
  pthread_mutex_unlock(&LOCK_xid_cache);
1424
  my_eof(thd);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1425
  return(0);
1 by brian
clean slate
1426
}
1427
1428
/**
1429
  @details
1430
  This function should be called when MySQL sends rows of a SELECT result set
1431
  or the EOF mark to the client. It releases a possible adaptive hash index
1432
  S-latch held by thd in InnoDB and also releases a possible InnoDB query
1433
  FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a thd to
1434
  keep them over several calls of the InnoDB handler interface when a join
1435
  is executed. But when we let the control to pass to the client they have
1436
  to be released because if the application program uses mysql_use_result(),
1437
  it may deadlock on the S-latch if the application on another connection
1438
  performs another SQL query. In MySQL-4.1 this is even more important because
1439
  there a connection can have several SELECT queries open at the same time.
1440
1441
  @param thd           the thread handle of the current connection
1442
1443
  @return
1444
    always 0
1445
*/
149 by Brian Aker
More bool conversion.
1446
static bool release_temporary_latches(THD *thd, plugin_ref plugin,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1447
                                      void *unused __attribute__((unused)))
1 by brian
clean slate
1448
{
1449
  handlerton *hton= plugin_data(plugin, handlerton *);
1450
1451
  if (hton->state == SHOW_OPTION_YES && hton->release_temporary_latches)
1452
    hton->release_temporary_latches(hton, thd);
1453
56 by brian
Next pass of true/false update.
1454
  return false;
1 by brian
clean slate
1455
}
1456
1457
1458
int ha_release_temporary_latches(THD *thd)
1459
{
1460
  plugin_foreach(thd, release_temporary_latches, MYSQL_STORAGE_ENGINE_PLUGIN, 
1461
                 NULL);
1462
1463
  return 0;
1464
}
1465
1466
int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
1467
{
1468
  int error=0;
1469
  THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
1470
                                        &thd->transaction.all);
1471
  Ha_trx_info *ha_info, *ha_info_next;
1472
1473
  trans->no_2pc=0;
1474
  /*
1475
    rolling back to savepoint in all storage engines that were part of the
1476
    transaction when the savepoint was set
1477
  */
1478
  for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
1479
  {
1480
    int err;
1481
    handlerton *ht= ha_info->ht();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1482
    assert(ht);
1483
    assert(ht->savepoint_set != 0);
1 by brian
clean slate
1484
    if ((err= ht->savepoint_rollback(ht, thd,
1485
                                     (uchar *)(sv+1)+ht->savepoint_offset)))
1486
    { // cannot happen
1487
      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1488
      error=1;
1489
    }
1490
    status_var_increment(thd->status_var.ha_savepoint_rollback_count);
1491
    trans->no_2pc|= ht->prepare == 0;
1492
  }
1493
  /*
1494
    rolling back the transaction in all storage engines that were not part of
1495
    the transaction when the savepoint was set
1496
  */
1497
  for (ha_info= trans->ha_list; ha_info != sv->ha_list;
1498
       ha_info= ha_info_next)
1499
  {
1500
    int err;
1501
    handlerton *ht= ha_info->ht();
1502
    if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
1503
    { // cannot happen
1504
      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1505
      error=1;
1506
    }
1507
    status_var_increment(thd->status_var.ha_rollback_count);
1508
    ha_info_next= ha_info->next();
1509
    ha_info->reset(); /* keep it conveniently zero-filled */
1510
  }
1511
  trans->ha_list= sv->ha_list;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1512
  return(error);
1 by brian
clean slate
1513
}
1514
1515
/**
1516
  @note
1517
  according to the sql standard (ISO/IEC 9075-2:2003)
1518
  section "4.33.4 SQL-statements and transaction states",
1519
  SAVEPOINT is *not* transaction-initiating SQL-statement
1520
*/
1521
int ha_savepoint(THD *thd, SAVEPOINT *sv)
1522
{
1523
  int error=0;
1524
  THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
1525
                                        &thd->transaction.all);
1526
  Ha_trx_info *ha_info= trans->ha_list;
1527
  for (; ha_info; ha_info= ha_info->next())
1528
  {
1529
    int err;
1530
    handlerton *ht= ha_info->ht();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1531
    assert(ht);
1 by brian
clean slate
1532
    if (! ht->savepoint_set)
1533
    {
1534
      my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
1535
      error=1;
1536
      break;
1537
    }
1538
    if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
1539
    { // cannot happen
1540
      my_error(ER_GET_ERRNO, MYF(0), err);
1541
      error=1;
1542
    }
1543
    status_var_increment(thd->status_var.ha_savepoint_count);
1544
  }
1545
  /*
1546
    Remember the list of registered storage engines. All new
1547
    engines are prepended to the beginning of the list.
1548
  */
1549
  sv->ha_list= trans->ha_list;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1550
  return(error);
1 by brian
clean slate
1551
}
1552
1553
int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
1554
{
1555
  int error=0;
1556
  Ha_trx_info *ha_info= sv->ha_list;
1557
1558
  for (; ha_info; ha_info= ha_info->next())
1559
  {
1560
    int err;
1561
    handlerton *ht= ha_info->ht();
1562
    /* Savepoint life time is enclosed into transaction life time. */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1563
    assert(ht);
1 by brian
clean slate
1564
    if (!ht->savepoint_release)
1565
      continue;
1566
    if ((err= ht->savepoint_release(ht, thd,
1567
                                    (uchar *)(sv+1) + ht->savepoint_offset)))
1568
    { // cannot happen
1569
      my_error(ER_GET_ERRNO, MYF(0), err);
1570
      error=1;
1571
    }
1572
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1573
  return(error);
1 by brian
clean slate
1574
}
1575
1576
149 by Brian Aker
More bool conversion.
1577
static bool snapshot_handlerton(THD *thd, plugin_ref plugin, void *arg)
1 by brian
clean slate
1578
{
1579
  handlerton *hton= plugin_data(plugin, handlerton *);
1580
  if (hton->state == SHOW_OPTION_YES &&
1581
      hton->start_consistent_snapshot)
1582
  {
1583
    hton->start_consistent_snapshot(hton, thd);
1584
    *((bool *)arg)= false;
1585
  }
56 by brian
Next pass of true/false update.
1586
  return false;
1 by brian
clean slate
1587
}
1588
1589
int ha_start_consistent_snapshot(THD *thd)
1590
{
1591
  bool warn= true;
1592
1593
  plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
1594
1595
  /*
1596
    Same idea as when one wants to CREATE TABLE in one engine which does not
1597
    exist:
1598
  */
1599
  if (warn)
1600
    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
1601
                 "This MySQL server does not support any "
1602
                 "consistent-read capable storage engine");
1603
  return 0;
1604
}
1605
1606
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1607
static bool flush_handlerton(THD *thd __attribute__((unused)),
149 by Brian Aker
More bool conversion.
1608
                             plugin_ref plugin,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1609
                             void *arg __attribute__((unused)))
1 by brian
clean slate
1610
{
1611
  handlerton *hton= plugin_data(plugin, handlerton *);
1612
  if (hton->state == SHOW_OPTION_YES && hton->flush_logs && 
1613
      hton->flush_logs(hton))
56 by brian
Next pass of true/false update.
1614
    return true;
1615
  return false;
1 by brian
clean slate
1616
}
1617
1618
1619
bool ha_flush_logs(handlerton *db_type)
1620
{
1621
  if (db_type == NULL)
1622
  {
1623
    if (plugin_foreach(NULL, flush_handlerton,
1624
                          MYSQL_STORAGE_ENGINE_PLUGIN, 0))
56 by brian
Next pass of true/false update.
1625
      return true;
1 by brian
clean slate
1626
  }
1627
  else
1628
  {
1629
    if (db_type->state != SHOW_OPTION_YES ||
1630
        (db_type->flush_logs && db_type->flush_logs(db_type)))
56 by brian
Next pass of true/false update.
1631
      return true;
1 by brian
clean slate
1632
  }
56 by brian
Next pass of true/false update.
1633
  return false;
1 by brian
clean slate
1634
}
1635
1636
static const char *check_lowercase_names(handler *file, const char *path,
1637
                                         char *tmp_path)
1638
{
1639
  if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
1640
    return path;
1641
1642
  /* Ensure that table handler get path in lower case */
1643
  if (tmp_path != path)
1644
    strmov(tmp_path, path);
1645
1646
  /*
1647
    we only should turn into lowercase database/table part
1648
    so start the process after homedirectory
1649
  */
1650
  my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
1651
  return tmp_path;
1652
}
1653
1654
1655
/**
1656
  An interceptor to hijack the text of the error message without
1657
  setting an error in the thread. We need the text to present it
1658
  in the form of a warning to the user.
1659
*/
1660
1661
struct Ha_delete_table_error_handler: public Internal_error_handler
1662
{
1663
public:
1664
  virtual bool handle_error(uint sql_errno,
1665
                            const char *message,
1666
                            MYSQL_ERROR::enum_warning_level level,
1667
                            THD *thd);
1668
  char buff[MYSQL_ERRMSG_SIZE];
1669
};
1670
1671
1672
bool
1673
Ha_delete_table_error_handler::
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1674
handle_error(uint sql_errno  __attribute__((unused)),
1 by brian
clean slate
1675
             const char *message,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1676
             MYSQL_ERROR::enum_warning_level level __attribute__((unused)),
1677
             THD *thd __attribute__((unused)))
1 by brian
clean slate
1678
{
1679
  /* Grab the error message */
1680
  strmake(buff, message, sizeof(buff)-1);
56 by brian
Next pass of true/false update.
1681
  return true;
1 by brian
clean slate
1682
}
1683
1684
1685
/**
1686
  This should return ENOENT if the file doesn't exists.
1687
  The .frm file will be deleted only if we return 0 or ENOENT
1688
*/
1689
int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
1690
                    const char *db, const char *alias, bool generate_warning)
1691
{
1692
  handler *file;
1693
  char tmp_path[FN_REFLEN];
1694
  int error;
1695
  TABLE dummy_table;
1696
  TABLE_SHARE dummy_share;
1697
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
1698
  memset((char*) &dummy_table, 0, sizeof(dummy_table));
1699
  memset((char*) &dummy_share, 0, sizeof(dummy_share));
1 by brian
clean slate
1700
  dummy_table.s= &dummy_share;
1701
1702
  /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
1703
  if (table_type == NULL ||
1704
      ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1705
    return(ENOENT);
1 by brian
clean slate
1706
1707
  path= check_lowercase_names(file, path, tmp_path);
1708
  if ((error= file->ha_delete_table(path)) && generate_warning)
1709
  {
1710
    /*
1711
      Because file->print_error() use my_error() to generate the error message
1712
      we use an internal error handler to intercept it and store the text
1713
      in a temporary buffer. Later the message will be presented to user
1714
      as a warning.
1715
    */
1716
    Ha_delete_table_error_handler ha_delete_table_error_handler;
1717
1718
    /* Fill up strucutures that print_error may need */
1719
    dummy_share.path.str= (char*) path;
1720
    dummy_share.path.length= strlen(path);
1721
    dummy_share.db.str= (char*) db;
1722
    dummy_share.db.length= strlen(db);
1723
    dummy_share.table_name.str= (char*) alias;
1724
    dummy_share.table_name.length= strlen(alias);
1725
    dummy_table.alias= alias;
1726
1727
    file->change_table_ptr(&dummy_table, &dummy_share);
1728
1729
    thd->push_internal_handler(&ha_delete_table_error_handler);
1730
    file->print_error(error, 0);
1731
1732
    thd->pop_internal_handler();
1733
1734
    /*
1735
      XXX: should we convert *all* errors to warnings here?
1736
      What if the error is fatal?
1737
    */
1738
    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, error,
1739
                ha_delete_table_error_handler.buff);
1740
  }
1741
  delete file;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1742
  return(error);
1 by brian
clean slate
1743
}
1744
1745
/****************************************************************************
1746
** General handler functions
1747
****************************************************************************/
1748
handler *handler::clone(MEM_ROOT *mem_root)
1749
{
1750
  handler *new_handler= get_new_handler(table->s, mem_root, table->s->db_type());
1751
  /*
1752
    Allocate handler->ref here because otherwise ha_open will allocate it
1753
    on this->table->mem_root and we will not be able to reclaim that memory 
1754
    when the clone handler object is destroyed.
1755
  */
1756
  if (!(new_handler->ref= (uchar*) alloc_root(mem_root, ALIGN_SIZE(ref_length)*2)))
1757
    return NULL;
1758
  if (new_handler && !new_handler->ha_open(table,
1759
                                           table->s->normalized_path.str,
1760
                                           table->db_stat,
1761
                                           HA_OPEN_IGNORE_IF_LOCKED))
1762
    return new_handler;
1763
  return NULL;
1764
}
1765
1766
1767
1768
void handler::ha_statistic_increment(ulong SSV::*offset) const
1769
{
1770
  status_var_increment(table->in_use->status_var.*offset);
1771
}
1772
1773
void **handler::ha_data(THD *thd) const
1774
{
1775
  return thd_ha_data(thd, ht);
1776
}
1777
1778
THD *handler::ha_thd(void) const
1779
{
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1780
  assert(!table || !table->in_use || table->in_use == current_thd);
1 by brian
clean slate
1781
  return (table && table->in_use) ? table->in_use : current_thd;
1782
}
1783
1784
/**
1785
  Open database-handler.
1786
1787
  Try O_RDONLY if cannot open as O_RDWR
1788
  Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
1789
*/
1790
int handler::ha_open(TABLE *table_arg, const char *name, int mode,
1791
                     int test_if_locked)
1792
{
1793
  int error;
1794
1795
  table= table_arg;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1796
  assert(table->s == table_share);
1797
  assert(alloc_root_inited(&table->mem_root));
1 by brian
clean slate
1798
1799
  if ((error=open(name,mode,test_if_locked)))
1800
  {
1801
    if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
1802
	(table->db_stat & HA_TRY_READ_ONLY))
1803
    {
1804
      table->db_stat|=HA_READ_ONLY;
1805
      error=open(name,O_RDONLY,test_if_locked);
1806
    }
1807
  }
1808
  if (error)
1809
  {
1810
    my_errno= error;                            /* Safeguard */
1811
  }
1812
  else
1813
  {
1814
    if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
1815
      table->db_stat|=HA_READ_ONLY;
1816
    (void) extra(HA_EXTRA_NO_READCHECK);	// Not needed in SQL
1817
1818
    /* ref is already allocated for us if we're called from handler::clone() */
1819
    if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root, 
1820
                                          ALIGN_SIZE(ref_length)*2)))
1821
    {
1822
      close();
1823
      error=HA_ERR_OUT_OF_MEM;
1824
    }
1825
    else
1826
      dup_ref=ref+ALIGN_SIZE(ref_length);
1827
    cached_table_flags= table_flags();
1828
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1829
  return(error);
1 by brian
clean slate
1830
}
1831
1832
/**
1833
  one has to use this method when to find
1834
  random position by record as the plain
1835
  position() call doesn't work for some
1836
  handlers for random position
1837
*/
1838
1839
int handler::rnd_pos_by_record(uchar *record)
1840
{
1841
  register int error;
1842
1843
  position(record);
1844
  if (inited && (error= ha_index_end()))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1845
    return(error);
56 by brian
Next pass of true/false update.
1846
  if ((error= ha_rnd_init(false)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1847
    return(error);
1 by brian
clean slate
1848
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1849
  return(rnd_pos(record, ref));
1 by brian
clean slate
1850
}
1851
1852
/**
1853
  Read first row (only) from a table.
1854
1855
  This is never called for InnoDB tables, as these table types
1856
  has the HA_STATS_RECORDS_IS_EXACT set.
1857
*/
1858
int handler::read_first_row(uchar * buf, uint primary_key)
1859
{
1860
  register int error;
1861
1862
  ha_statistic_increment(&SSV::ha_read_first_count);
1863
1864
  /*
1865
    If there is very few deleted rows in the table, find the first row by
1866
    scanning the table.
1867
    TODO remove the test for HA_READ_ORDER
1868
  */
1869
  if (stats.deleted < 10 || primary_key >= MAX_KEY ||
1870
      !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
1871
  {
1872
    (void) ha_rnd_init(1);
1873
    while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED) ;
1874
    (void) ha_rnd_end();
1875
  }
1876
  else
1877
  {
1878
    /* Find the first row through the primary key */
1879
    (void) ha_index_init(primary_key, 0);
1880
    error=index_first(buf);
1881
    (void) ha_index_end();
1882
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1883
  return(error);
1 by brian
clean slate
1884
}
1885
1886
/**
1887
  Generate the next auto-increment number based on increment and offset.
1888
  computes the lowest number
1889
  - strictly greater than "nr"
1890
  - of the form: auto_increment_offset + N * auto_increment_increment
1891
1892
  In most cases increment= offset= 1, in which case we get:
1893
  @verbatim 1,2,3,4,5,... @endverbatim
1894
    If increment=10 and offset=5 and previous number is 1, we get:
1895
  @verbatim 1,5,15,25,35,... @endverbatim
1896
*/
1897
inline uint64_t
1898
compute_next_insert_id(uint64_t nr,struct system_variables *variables)
1899
{
1900
  if (variables->auto_increment_increment == 1)
1901
    return (nr+1); // optimization of the formula below
1902
  nr= (((nr+ variables->auto_increment_increment -
1903
         variables->auto_increment_offset)) /
1904
       (uint64_t) variables->auto_increment_increment);
1905
  return (nr* (uint64_t) variables->auto_increment_increment +
1906
          variables->auto_increment_offset);
1907
}
1908
1909
1910
void handler::adjust_next_insert_id_after_explicit_value(uint64_t nr)
1911
{
1912
  /*
1913
    If we have set THD::next_insert_id previously and plan to insert an
1914
    explicitely-specified value larger than this, we need to increase
1915
    THD::next_insert_id to be greater than the explicit value.
1916
  */
1917
  if ((next_insert_id > 0) && (nr >= next_insert_id))
1918
    set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
1919
}
1920
1921
1922
/**
1923
  Compute a previous insert id
1924
1925
  Computes the largest number X:
1926
  - smaller than or equal to "nr"
1927
  - of the form: auto_increment_offset + N * auto_increment_increment
1928
    where N>=0.
1929
1930
  @param nr            Number to "round down"
1931
  @param variables     variables struct containing auto_increment_increment and
1932
                       auto_increment_offset
1933
1934
  @return
1935
    The number X if it exists, "nr" otherwise.
1936
*/
1937
inline uint64_t
1938
prev_insert_id(uint64_t nr, struct system_variables *variables)
1939
{
1940
  if (unlikely(nr < variables->auto_increment_offset))
1941
  {
1942
    /*
1943
      There's nothing good we can do here. That is a pathological case, where
1944
      the offset is larger than the column's max possible value, i.e. not even
1945
      the first sequence value may be inserted. User will receive warning.
1946
    */
1947
    return nr;
1948
  }
1949
  if (variables->auto_increment_increment == 1)
1950
    return nr; // optimization of the formula below
1951
  nr= (((nr - variables->auto_increment_offset)) /
1952
       (uint64_t) variables->auto_increment_increment);
1953
  return (nr * (uint64_t) variables->auto_increment_increment +
1954
          variables->auto_increment_offset);
1955
}
1956
1957
1958
/**
1959
  Update the auto_increment field if necessary.
1960
1961
  Updates columns with type NEXT_NUMBER if:
1962
1963
  - If column value is set to NULL (in which case
1964
    auto_increment_field_not_null is 0)
1965
  - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
1966
    set. In the future we will only set NEXT_NUMBER fields if one sets them
1967
    to NULL (or they are not included in the insert list).
1968
1969
    In those cases, we check if the currently reserved interval still has
1970
    values we have not used. If yes, we pick the smallest one and use it.
1971
    Otherwise:
1972
1973
  - If a list of intervals has been provided to the statement via SET
1974
    INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
1975
    first unused interval from this list, consider it as reserved.
1976
1977
  - Otherwise we set the column for the first row to the value
1978
    next_insert_id(get_auto_increment(column))) which is usually
1979
    max-used-column-value+1.
1980
    We call get_auto_increment() for the first row in a multi-row
1981
    statement. get_auto_increment() will tell us the interval of values it
1982
    reserved for us.
1983
1984
  - In both cases, for the following rows we use those reserved values without
1985
    calling the handler again (we just progress in the interval, computing
1986
    each new value from the previous one). Until we have exhausted them, then
1987
    we either take the next provided interval or call get_auto_increment()
1988
    again to reserve a new interval.
1989
1990
  - In both cases, the reserved intervals are remembered in
1991
    thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
1992
    binlogging; the last reserved interval is remembered in
1993
    auto_inc_interval_for_cur_row.
1994
1995
    The idea is that generated auto_increment values are predictable and
1996
    independent of the column values in the table.  This is needed to be
1997
    able to replicate into a table that already has rows with a higher
1998
    auto-increment value than the one that is inserted.
1999
2000
    After we have already generated an auto-increment number and the user
2001
    inserts a column with a higher value than the last used one, we will
2002
    start counting from the inserted value.
2003
2004
    This function's "outputs" are: the table's auto_increment field is filled
2005
    with a value, thd->next_insert_id is filled with the value to use for the
2006
    next row, if a value was autogenerated for the current row it is stored in
2007
    thd->insert_id_for_cur_row, if get_auto_increment() was called
2008
    thd->auto_inc_interval_for_cur_row is modified, if that interval is not
2009
    present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
2010
    this list.
2011
2012
  @todo
2013
    Replace all references to "next number" or NEXT_NUMBER to
2014
    "auto_increment", everywhere (see below: there is
2015
    table->auto_increment_field_not_null, and there also exists
2016
    table->next_number_field, it's not consistent).
2017
2018
  @retval
2019
    0	ok
2020
  @retval
2021
    HA_ERR_AUTOINC_READ_FAILED  get_auto_increment() was called and
2022
    returned ~(uint64_t) 0
2023
  @retval
2024
    HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
2025
    failure.
2026
*/
2027
2028
#define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
2029
#define AUTO_INC_DEFAULT_NB_MAX_BITS 16
2030
#define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
2031
2032
int handler::update_auto_increment()
2033
{
2034
  uint64_t nr, nb_reserved_values;
56 by brian
Next pass of true/false update.
2035
  bool append= false;
1 by brian
clean slate
2036
  THD *thd= table->in_use;
2037
  struct system_variables *variables= &thd->variables;
2038
2039
  /*
2040
    next_insert_id is a "cursor" into the reserved interval, it may go greater
2041
    than the interval, but not smaller.
2042
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2043
  assert(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
1 by brian
clean slate
2044
2045
  if (((nr= table->next_number_field->val_int()) != 0) || 
2046
      (table->auto_increment_field_not_null && (thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO)))
2047
  {
2048
    /*
2049
      Update next_insert_id if we had already generated a value in this
2050
      statement (case of INSERT VALUES(null),(3763),(null):
2051
      the last NULL needs to insert 3764, not the value of the first NULL plus
2052
      1).
2053
    */
2054
    adjust_next_insert_id_after_explicit_value(nr);
2055
    insert_id_for_cur_row= 0; // didn't generate anything
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2056
    return(0);
1 by brian
clean slate
2057
  }
2058
2059
  if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
2060
  {
2061
    /* next_insert_id is beyond what is reserved, so we reserve more. */
2062
    const Discrete_interval *forced=
2063
      thd->auto_inc_intervals_forced.get_next();
2064
    if (forced != NULL)
2065
    {
2066
      nr= forced->minimum();
2067
      nb_reserved_values= forced->values();
2068
    }
2069
    else
2070
    {
2071
      /*
2072
        handler::estimation_rows_to_insert was set by
2073
        handler::ha_start_bulk_insert(); if 0 it means "unknown".
2074
      */
2075
      uint nb_already_reserved_intervals=
2076
        thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements();
2077
      uint64_t nb_desired_values;
2078
      /*
2079
        If an estimation was given to the engine:
2080
        - use it.
2081
        - if we already reserved numbers, it means the estimation was
2082
        not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
2083
        time, twice that the 3rd time etc.
2084
        If no estimation was given, use those increasing defaults from the
2085
        start, starting from AUTO_INC_DEFAULT_NB_ROWS.
2086
        Don't go beyond a max to not reserve "way too much" (because
2087
        reservation means potentially losing unused values).
2088
      */
2089
      if (nb_already_reserved_intervals == 0 &&
2090
          (estimation_rows_to_insert > 0))
2091
        nb_desired_values= estimation_rows_to_insert;
2092
      else /* go with the increasing defaults */
2093
      {
2094
        /* avoid overflow in formula, with this if() */
2095
        if (nb_already_reserved_intervals <= AUTO_INC_DEFAULT_NB_MAX_BITS)
2096
        {
2097
          nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS * 
2098
            (1 << nb_already_reserved_intervals);
2099
          set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
2100
        }
2101
        else
2102
          nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
2103
      }
2104
      /* This call ignores all its parameters but nr, currently */
2105
      get_auto_increment(variables->auto_increment_offset,
2106
                         variables->auto_increment_increment,
2107
                         nb_desired_values, &nr,
2108
                         &nb_reserved_values);
2109
      if (nr == ~(uint64_t) 0)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2110
        return(HA_ERR_AUTOINC_READ_FAILED);  // Mark failure
1 by brian
clean slate
2111
      
2112
      /*
2113
        That rounding below should not be needed when all engines actually
2114
        respect offset and increment in get_auto_increment(). But they don't
2115
        so we still do it. Wonder if for the not-first-in-index we should do
2116
        it. Hope that this rounding didn't push us out of the interval; even
2117
        if it did we cannot do anything about it (calling the engine again
2118
        will not help as we inserted no row).
2119
      */
2120
      nr= compute_next_insert_id(nr-1, variables);
2121
    }
2122
    
2123
    if (table->s->next_number_keypart == 0)
2124
    {
2125
      /* We must defer the appending until "nr" has been possibly truncated */
56 by brian
Next pass of true/false update.
2126
      append= true;
1 by brian
clean slate
2127
    }
2128
  }
2129
152 by Brian Aker
longlong replacement
2130
  if (unlikely(table->next_number_field->store((int64_t) nr, true)))
1 by brian
clean slate
2131
  {
2132
    /*
2133
      first test if the query was aborted due to strict mode constraints
2134
    */
2135
    if (thd->killed == THD::KILL_BAD_DATA)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2136
      return(HA_ERR_AUTOINC_ERANGE);
1 by brian
clean slate
2137
2138
    /*
2139
      field refused this value (overflow) and truncated it, use the result of
2140
      the truncation (which is going to be inserted); however we try to
2141
      decrease it to honour auto_increment_* variables.
2142
      That will shift the left bound of the reserved interval, we don't
2143
      bother shifting the right bound (anyway any other value from this
2144
      interval will cause a duplicate key).
2145
    */
2146
    nr= prev_insert_id(table->next_number_field->val_int(), variables);
152 by Brian Aker
longlong replacement
2147
    if (unlikely(table->next_number_field->store((int64_t) nr, true)))
1 by brian
clean slate
2148
      nr= table->next_number_field->val_int();
2149
  }
2150
  if (append)
2151
  {
2152
    auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
2153
                                          variables->auto_increment_increment);
2154
    /* Row-based replication does not need to store intervals in binlog */
2155
    if (!thd->current_stmt_binlog_row_based)
2156
        thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
2157
                                                              auto_inc_interval_for_cur_row.values(),
2158
                                                              variables->auto_increment_increment);
2159
  }
2160
2161
  /*
2162
    Record this autogenerated value. If the caller then
2163
    succeeds to insert this value, it will call
2164
    record_first_successful_insert_id_in_cur_stmt()
2165
    which will set first_successful_insert_id_in_cur_stmt if it's not
2166
    already set.
2167
  */
2168
  insert_id_for_cur_row= nr;
2169
  /*
2170
    Set next insert id to point to next auto-increment value to be able to
2171
    handle multi-row statements.
2172
  */
2173
  set_next_insert_id(compute_next_insert_id(nr, variables));
2174
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2175
  return(0);
1 by brian
clean slate
2176
}
2177
2178
2179
/**
2180
  MySQL signal that it changed the column bitmap
2181
2182
  This is for handlers that needs to setup their own column bitmaps.
2183
  Normally the handler should set up their own column bitmaps in
2184
  index_init() or rnd_init() and in any column_bitmaps_signal() call after
2185
  this.
2186
2187
  The handler is allowed to do changes to the bitmap after a index_init or
2188
  rnd_init() call is made as after this, MySQL will not use the bitmap
2189
  for any program logic checking.
2190
*/
2191
void handler::column_bitmaps_signal()
2192
{
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2193
  return;
1 by brian
clean slate
2194
}
2195
2196
2197
/**
2198
  Reserves an interval of auto_increment values from the handler.
2199
2200
  offset and increment means that we want values to be of the form
2201
  offset + N * increment, where N>=0 is integer.
2202
  If the function sets *first_value to ~(uint64_t)0 it means an error.
163 by Brian Aker
Merge Monty's code.
2203
  If the function sets *nb_reserved_values to UINT64_MAX it means it has
1 by brian
clean slate
2204
  reserved to "positive infinite".
2205
2206
  @param offset
2207
  @param increment
2208
  @param nb_desired_values   how many values we want
2209
  @param first_value         (OUT) the first value reserved by the handler
2210
  @param nb_reserved_values  (OUT) how many values the handler reserved
2211
*/
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
2212
void handler::get_auto_increment(uint64_t offset __attribute__((unused)),
2213
                                 uint64_t increment __attribute__((unused)),
2214
                                 uint64_t nb_desired_values __attribute__((unused)),
1 by brian
clean slate
2215
                                 uint64_t *first_value,
2216
                                 uint64_t *nb_reserved_values)
2217
{
2218
  uint64_t nr;
2219
  int error;
2220
2221
  (void) extra(HA_EXTRA_KEYREAD);
2222
  table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
2223
                                        table->read_set);
2224
  column_bitmaps_signal();
2225
  index_init(table->s->next_number_index, 1);
2226
  if (table->s->next_number_keypart == 0)
2227
  {						// Autoincrement at key-start
2228
    error=index_last(table->record[1]);
2229
    /*
2230
      MySQL implicitely assumes such method does locking (as MySQL decides to
2231
      use nr+increment without checking again with the handler, in
2232
      handler::update_auto_increment()), so reserves to infinite.
2233
    */
163 by Brian Aker
Merge Monty's code.
2234
    *nb_reserved_values= UINT64_MAX;
1 by brian
clean slate
2235
  }
2236
  else
2237
  {
2238
    uchar key[MAX_KEY_LENGTH];
2239
    key_copy(key, table->record[0],
2240
             table->key_info + table->s->next_number_index,
2241
             table->s->next_number_key_offset);
2242
    error= index_read_map(table->record[1], key,
2243
                          make_prev_keypart_map(table->s->next_number_keypart),
2244
                          HA_READ_PREFIX_LAST);
2245
    /*
2246
      MySQL needs to call us for next row: assume we are inserting ("a",null)
2247
      here, we return 3, and next this statement will want to insert
2248
      ("b",null): there is no reason why ("b",3+1) would be the good row to
2249
      insert: maybe it already exists, maybe 3+1 is too large...
2250
    */
2251
    *nb_reserved_values= 1;
2252
  }
2253
2254
  if (error)
2255
    nr=1;
2256
  else
2257
    nr= ((uint64_t) table->next_number_field->
2258
         val_int_offset(table->s->rec_buff_length)+1);
2259
  index_end();
2260
  (void) extra(HA_EXTRA_NO_KEYREAD);
2261
  *first_value= nr;
2262
}
2263
2264
2265
void handler::ha_release_auto_increment()
2266
{
2267
  release_auto_increment();
2268
  insert_id_for_cur_row= 0;
2269
  auto_inc_interval_for_cur_row.replace(0, 0, 0);
2270
  if (next_insert_id > 0)
2271
  {
2272
    next_insert_id= 0;
2273
    /*
2274
      this statement used forced auto_increment values if there were some,
2275
      wipe them away for other statements.
2276
    */
2277
    table->in_use->auto_inc_intervals_forced.empty();
2278
  }
2279
}
2280
2281
2282
void handler::print_keydup_error(uint key_nr, const char *msg)
2283
{
2284
  /* Write the duplicated key in the error message */
2285
  char key[MAX_KEY_LENGTH];
2286
  String str(key,sizeof(key),system_charset_info);
2287
2288
  if (key_nr == MAX_KEY)
2289
  {
2290
    /* Key is unknown */
2291
    str.copy("", 0, system_charset_info);
2292
    my_printf_error(ER_DUP_ENTRY, msg, MYF(0), str.c_ptr(), "*UNKNOWN*");
2293
  }
2294
  else
2295
  {
2296
    /* Table is opened and defined at this point */
2297
    key_unpack(&str,table,(uint) key_nr);
2298
    uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
2299
    if (str.length() >= max_length)
2300
    {
2301
      str.length(max_length-4);
2302
      str.append(STRING_WITH_LEN("..."));
2303
    }
2304
    my_printf_error(ER_DUP_ENTRY, msg,
2305
		    MYF(0), str.c_ptr(), table->key_info[key_nr].name);
2306
  }
2307
}
2308
2309
2310
/**
2311
  Print error that we got from handler function.
2312
2313
  @note
2314
    In case of delete table it's only safe to use the following parts of
2315
    the 'table' structure:
2316
    - table->s->path
2317
    - table->alias
2318
*/
2319
void handler::print_error(int error, myf errflag)
2320
{
2321
  int textno=ER_GET_ERRNO;
2322
  switch (error) {
2323
  case EACCES:
2324
    textno=ER_OPEN_AS_READONLY;
2325
    break;
2326
  case EAGAIN:
2327
    textno=ER_FILE_USED;
2328
    break;
2329
  case ENOENT:
2330
    textno=ER_FILE_NOT_FOUND;
2331
    break;
2332
  case HA_ERR_KEY_NOT_FOUND:
2333
  case HA_ERR_NO_ACTIVE_RECORD:
2334
  case HA_ERR_END_OF_FILE:
2335
    textno=ER_KEY_NOT_FOUND;
2336
    break;
2337
  case HA_ERR_WRONG_MRG_TABLE_DEF:
2338
    textno=ER_WRONG_MRG_TABLE;
2339
    break;
2340
  case HA_ERR_FOUND_DUPP_KEY:
2341
  {
2342
    uint key_nr=get_dup_key(error);
2343
    if ((int) key_nr >= 0)
2344
    {
2345
      print_keydup_error(key_nr, ER(ER_DUP_ENTRY_WITH_KEY_NAME));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2346
      return;
1 by brian
clean slate
2347
    }
2348
    textno=ER_DUP_KEY;
2349
    break;
2350
  }
2351
  case HA_ERR_FOREIGN_DUPLICATE_KEY:
2352
  {
2353
    uint key_nr= get_dup_key(error);
2354
    if ((int) key_nr >= 0)
2355
    {
2356
      uint max_length;
2357
      /* Write the key in the error message */
2358
      char key[MAX_KEY_LENGTH];
2359
      String str(key,sizeof(key),system_charset_info);
2360
      /* Table is opened and defined at this point */
2361
      key_unpack(&str,table,(uint) key_nr);
2362
      max_length= (MYSQL_ERRMSG_SIZE-
2363
                   (uint) strlen(ER(ER_FOREIGN_DUPLICATE_KEY)));
2364
      if (str.length() >= max_length)
2365
      {
2366
        str.length(max_length-4);
2367
        str.append(STRING_WITH_LEN("..."));
2368
      }
2369
      my_error(ER_FOREIGN_DUPLICATE_KEY, MYF(0), table_share->table_name.str,
2370
        str.c_ptr(), key_nr+1);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2371
      return;
1 by brian
clean slate
2372
    }
2373
    textno= ER_DUP_KEY;
2374
    break;
2375
  }
2376
  case HA_ERR_FOUND_DUPP_UNIQUE:
2377
    textno=ER_DUP_UNIQUE;
2378
    break;
2379
  case HA_ERR_RECORD_CHANGED:
2380
    textno=ER_CHECKREAD;
2381
    break;
2382
  case HA_ERR_CRASHED:
2383
    textno=ER_NOT_KEYFILE;
2384
    break;
2385
  case HA_ERR_WRONG_IN_RECORD:
2386
    textno= ER_CRASHED_ON_USAGE;
2387
    break;
2388
  case HA_ERR_CRASHED_ON_USAGE:
2389
    textno=ER_CRASHED_ON_USAGE;
2390
    break;
2391
  case HA_ERR_NOT_A_TABLE:
2392
    textno= error;
2393
    break;
2394
  case HA_ERR_CRASHED_ON_REPAIR:
2395
    textno=ER_CRASHED_ON_REPAIR;
2396
    break;
2397
  case HA_ERR_OUT_OF_MEM:
2398
    textno=ER_OUT_OF_RESOURCES;
2399
    break;
2400
  case HA_ERR_WRONG_COMMAND:
2401
    textno=ER_ILLEGAL_HA;
2402
    break;
2403
  case HA_ERR_OLD_FILE:
2404
    textno=ER_OLD_KEYFILE;
2405
    break;
2406
  case HA_ERR_UNSUPPORTED:
2407
    textno=ER_UNSUPPORTED_EXTENSION;
2408
    break;
2409
  case HA_ERR_RECORD_FILE_FULL:
2410
  case HA_ERR_INDEX_FILE_FULL:
2411
    textno=ER_RECORD_FILE_FULL;
2412
    break;
2413
  case HA_ERR_LOCK_WAIT_TIMEOUT:
2414
    textno=ER_LOCK_WAIT_TIMEOUT;
2415
    break;
2416
  case HA_ERR_LOCK_TABLE_FULL:
2417
    textno=ER_LOCK_TABLE_FULL;
2418
    break;
2419
  case HA_ERR_LOCK_DEADLOCK:
2420
    textno=ER_LOCK_DEADLOCK;
2421
    break;
2422
  case HA_ERR_READ_ONLY_TRANSACTION:
2423
    textno=ER_READ_ONLY_TRANSACTION;
2424
    break;
2425
  case HA_ERR_CANNOT_ADD_FOREIGN:
2426
    textno=ER_CANNOT_ADD_FOREIGN;
2427
    break;
2428
  case HA_ERR_ROW_IS_REFERENCED:
2429
  {
2430
    String str;
2431
    get_error_message(error, &str);
2432
    my_error(ER_ROW_IS_REFERENCED_2, MYF(0), str.c_ptr_safe());
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2433
    return;
1 by brian
clean slate
2434
  }
2435
  case HA_ERR_NO_REFERENCED_ROW:
2436
  {
2437
    String str;
2438
    get_error_message(error, &str);
2439
    my_error(ER_NO_REFERENCED_ROW_2, MYF(0), str.c_ptr_safe());
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2440
    return;
1 by brian
clean slate
2441
  }
2442
  case HA_ERR_TABLE_DEF_CHANGED:
2443
    textno=ER_TABLE_DEF_CHANGED;
2444
    break;
2445
  case HA_ERR_NO_SUCH_TABLE:
2446
    my_error(ER_NO_SUCH_TABLE, MYF(0), table_share->db.str,
2447
             table_share->table_name.str);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2448
    return;
1 by brian
clean slate
2449
  case HA_ERR_RBR_LOGGING_FAILED:
2450
    textno= ER_BINLOG_ROW_LOGGING_FAILED;
2451
    break;
2452
  case HA_ERR_DROP_INDEX_FK:
2453
  {
2454
    const char *ptr= "???";
2455
    uint key_nr= get_dup_key(error);
2456
    if ((int) key_nr >= 0)
2457
      ptr= table->key_info[key_nr].name;
2458
    my_error(ER_DROP_INDEX_FK, MYF(0), ptr);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2459
    return;
1 by brian
clean slate
2460
  }
2461
  case HA_ERR_TABLE_NEEDS_UPGRADE:
2462
    textno=ER_TABLE_NEEDS_UPGRADE;
2463
    break;
2464
  case HA_ERR_TABLE_READONLY:
2465
    textno= ER_OPEN_AS_READONLY;
2466
    break;
2467
  case HA_ERR_AUTOINC_READ_FAILED:
2468
    textno= ER_AUTOINC_READ_FAILED;
2469
    break;
2470
  case HA_ERR_AUTOINC_ERANGE:
2471
    textno= ER_WARN_DATA_OUT_OF_RANGE;
2472
    break;
2473
  case HA_ERR_LOCK_OR_ACTIVE_TRANSACTION:
2474
    my_message(ER_LOCK_OR_ACTIVE_TRANSACTION,
2475
               ER(ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2476
    return;
1 by brian
clean slate
2477
    break;
2478
  default:
2479
    {
2480
      /* The error was "unknown" to this function.
2481
	 Ask handler if it has got a message for this error */
56 by brian
Next pass of true/false update.
2482
      bool temporary= false;
1 by brian
clean slate
2483
      String str;
2484
      temporary= get_error_message(error, &str);
2485
      if (!str.is_empty())
2486
      {
2487
	const char* engine= table_type();
2488
	if (temporary)
2489
	  my_error(ER_GET_TEMPORARY_ERRMSG, MYF(0), error, str.ptr(), engine);
2490
	else
2491
	  my_error(ER_GET_ERRMSG, MYF(0), error, str.ptr(), engine);
2492
      }
2493
      else
2494
	my_error(ER_GET_ERRNO,errflag,error);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2495
      return;
1 by brian
clean slate
2496
    }
2497
  }
2498
  my_error(textno, errflag, table_share->table_name.str, error);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2499
  return;
1 by brian
clean slate
2500
}
2501
2502
2503
/**
2504
  Return an error message specific to this handler.
2505
2506
  @param error  error code previously returned by handler
2507
  @param buf    pointer to String where to add error message
2508
2509
  @return
2510
    Returns true if this is a temporary error
2511
*/
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
2512
bool handler::get_error_message(int error __attribute__((unused)),
2513
                                String* buf __attribute__((unused)))
1 by brian
clean slate
2514
{
56 by brian
Next pass of true/false update.
2515
  return false;
1 by brian
clean slate
2516
}
2517
2518
2519
int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
2520
{
2521
  KEY *keyinfo, *keyend;
2522
  KEY_PART_INFO *keypart, *keypartend;
2523
2524
  if (!table->s->mysql_version)
2525
  {
2526
    /* check for blob-in-key error */
2527
    keyinfo= table->key_info;
2528
    keyend= table->key_info + table->s->keys;
2529
    for (; keyinfo < keyend; keyinfo++)
2530
    {
2531
      keypart= keyinfo->key_part;
2532
      keypartend= keypart + keyinfo->key_parts;
2533
      for (; keypart < keypartend; keypart++)
2534
      {
2535
        if (!keypart->fieldnr)
2536
          continue;
2537
        Field *field= table->field[keypart->fieldnr-1];
212.2.2 by Patrick Galbraith
Renamed FIELD_TYPE to DRIZZLE_TYPE
2538
        if (field->type() == DRIZZLE_TYPE_BLOB)
1 by brian
clean slate
2539
        {
2540
          if (check_opt->sql_flags & TT_FOR_UPGRADE)
2541
            check_opt->flags= T_MEDIUM;
2542
          return HA_ADMIN_NEEDS_CHECK;
2543
        }
2544
      }
2545
    }
2546
  }
2547
  return check_for_upgrade(check_opt);
2548
}
2549
2550
2551
/* Code left, but Drizzle has no legacy yet (while MySQL did) */
2552
int handler::check_old_types()
2553
{
2554
  return 0;
2555
}
2556
2557
2558
static bool update_frm_version(TABLE *table)
2559
{
2560
  char path[FN_REFLEN];
2561
  File file;
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2562
  bool result= true;
1 by brian
clean slate
2563
2564
  /*
2565
    No need to update frm version in case table was created or checked
2566
    by server with the same version. This also ensures that we do not
2567
    update frm version for temporary tables as this code doesn't support
2568
    temporary tables.
2569
  */
2570
  if (table->s->mysql_version == MYSQL_VERSION_ID)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2571
    return(0);
1 by brian
clean slate
2572
2573
  strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
2574
2575
  if ((file= my_open(path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
2576
  {
2577
    uchar version[4];
2578
    char *key= table->s->table_cache_key.str;
2579
    uint key_length= table->s->table_cache_key.length;
2580
    TABLE *entry;
2581
    HASH_SEARCH_STATE state;
2582
2583
    int4store(version, MYSQL_VERSION_ID);
2584
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2585
    if (pwrite(file, (uchar*)version, 4, 51L) == 0)
2586
    {
2587
      result= false;
1 by brian
clean slate
2588
      goto err;
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2589
    }
1 by brian
clean slate
2590
2591
    for (entry=(TABLE*) hash_first(&open_cache,(uchar*) key,key_length, &state);
2592
         entry;
2593
         entry= (TABLE*) hash_next(&open_cache,(uchar*) key,key_length, &state))
2594
      entry->s->mysql_version= MYSQL_VERSION_ID;
2595
  }
2596
err:
2597
  if (file >= 0)
2598
    VOID(my_close(file,MYF(MY_WME)));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2599
  return(result);
1 by brian
clean slate
2600
}
2601
2602
2603
2604
/**
2605
  @return
2606
    key if error because of duplicated keys
2607
*/
2608
uint handler::get_dup_key(int error)
2609
{
2610
  table->file->errkey  = (uint) -1;
2611
  if (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOREIGN_DUPLICATE_KEY ||
2612
      error == HA_ERR_FOUND_DUPP_UNIQUE ||
2613
      error == HA_ERR_DROP_INDEX_FK)
2614
    info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2615
  return(table->file->errkey);
1 by brian
clean slate
2616
}
2617
2618
2619
/**
2620
  Delete all files with extension from bas_ext().
2621
2622
  @param name		Base name of table
2623
2624
  @note
2625
    We assume that the handler may return more extensions than
2626
    was actually used for the file.
2627
2628
  @retval
2629
    0   If we successfully deleted at least one file from base_ext and
2630
    didn't get any other errors than ENOENT
2631
  @retval
2632
    !0  Error
2633
*/
2634
int handler::delete_table(const char *name)
2635
{
2636
  int error= 0;
2637
  int enoent_or_zero= ENOENT;                   // Error if no file was deleted
2638
  char buff[FN_REFLEN];
2639
2640
  for (const char **ext=bas_ext(); *ext ; ext++)
2641
  {
2642
    fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT);
2643
    if (my_delete_with_symlink(buff, MYF(0)))
2644
    {
2645
      if ((error= my_errno) != ENOENT)
2646
	break;
2647
    }
2648
    else
2649
      enoent_or_zero= 0;                        // No error for ENOENT
2650
    error= enoent_or_zero;
2651
  }
2652
  return error;
2653
}
2654
2655
2656
int handler::rename_table(const char * from, const char * to)
2657
{
2658
  int error= 0;
2659
  for (const char **ext= bas_ext(); *ext ; ext++)
2660
  {
2661
    if (rename_file_ext(from, to, *ext))
2662
    {
2663
      if ((error=my_errno) != ENOENT)
2664
	break;
2665
      error= 0;
2666
    }
2667
  }
2668
  return error;
2669
}
2670
2671
2672
void handler::drop_table(const char *name)
2673
{
2674
  close();
2675
  delete_table(name);
2676
}
2677
2678
2679
/**
2680
  Performs checks upon the table.
2681
2682
  @param thd                thread doing CHECK TABLE operation
2683
  @param check_opt          options from the parser
2684
2685
  @retval
2686
    HA_ADMIN_OK               Successful upgrade
2687
  @retval
2688
    HA_ADMIN_NEEDS_UPGRADE    Table has structures requiring upgrade
2689
  @retval
2690
    HA_ADMIN_NEEDS_ALTER      Table has structures requiring ALTER TABLE
2691
  @retval
2692
    HA_ADMIN_NOT_IMPLEMENTED
2693
*/
2694
int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
2695
{
2696
  int error;
2697
2698
  if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
2699
      (check_opt->sql_flags & TT_FOR_UPGRADE))
2700
    return 0;
2701
2702
  if (table->s->mysql_version < MYSQL_VERSION_ID)
2703
  {
2704
    if ((error= check_old_types()))
2705
      return error;
2706
    error= ha_check_for_upgrade(check_opt);
2707
    if (error && (error != HA_ADMIN_NEEDS_CHECK))
2708
      return error;
2709
    if (!error && (check_opt->sql_flags & TT_FOR_UPGRADE))
2710
      return 0;
2711
  }
2712
  if ((error= check(thd, check_opt)))
2713
    return error;
2714
  return update_frm_version(table);
2715
}
2716
2717
/**
2718
  A helper function to mark a transaction read-write,
2719
  if it is started.
2720
*/
2721
2722
inline
2723
void
2724
handler::mark_trx_read_write()
2725
{
2726
  Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
2727
  /*
2728
    When a storage engine method is called, the transaction must
2729
    have been started, unless it's a DDL call, for which the
2730
    storage engine starts the transaction internally, and commits
2731
    it internally, without registering in the ha_list.
2732
    Unfortunately here we can't know know for sure if the engine
2733
    has registered the transaction or not, so we must check.
2734
  */
2735
  if (ha_info->is_started())
2736
  {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2737
    assert(has_transactions());
1 by brian
clean slate
2738
    /*
2739
      table_share can be NULL in ha_delete_table(). See implementation
2740
      of standalone function ha_delete_table() in sql_base.cc.
2741
    */
2742
    if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
2743
      ha_info->set_trx_read_write();
2744
  }
2745
}
2746
2747
2748
/**
2749
  Repair table: public interface.
2750
2751
  @sa handler::repair()
2752
*/
2753
2754
int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
2755
{
2756
  int result;
2757
2758
  mark_trx_read_write();
2759
2760
  if ((result= repair(thd, check_opt)))
2761
    return result;
2762
  return update_frm_version(table);
2763
}
2764
2765
2766
/**
2767
  Bulk update row: public interface.
2768
2769
  @sa handler::bulk_update_row()
2770
*/
2771
2772
int
2773
handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
2774
                            uint *dup_key_found)
2775
{
2776
  mark_trx_read_write();
2777
2778
  return bulk_update_row(old_data, new_data, dup_key_found);
2779
}
2780
2781
2782
/**
2783
  Delete all rows: public interface.
2784
2785
  @sa handler::delete_all_rows()
2786
*/
2787
2788
int
2789
handler::ha_delete_all_rows()
2790
{
2791
  mark_trx_read_write();
2792
2793
  return delete_all_rows();
2794
}
2795
2796
2797
/**
2798
  Reset auto increment: public interface.
2799
2800
  @sa handler::reset_auto_increment()
2801
*/
2802
2803
int
2804
handler::ha_reset_auto_increment(uint64_t value)
2805
{
2806
  mark_trx_read_write();
2807
2808
  return reset_auto_increment(value);
2809
}
2810
2811
2812
/**
2813
  Optimize table: public interface.
2814
2815
  @sa handler::optimize()
2816
*/
2817
2818
int
2819
handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
2820
{
2821
  mark_trx_read_write();
2822
2823
  return optimize(thd, check_opt);
2824
}
2825
2826
2827
/**
2828
  Analyze table: public interface.
2829
2830
  @sa handler::analyze()
2831
*/
2832
2833
int
2834
handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
2835
{
2836
  mark_trx_read_write();
2837
2838
  return analyze(thd, check_opt);
2839
}
2840
2841
2842
/**
2843
  Check and repair table: public interface.
2844
2845
  @sa handler::check_and_repair()
2846
*/
2847
2848
bool
2849
handler::ha_check_and_repair(THD *thd)
2850
{
2851
  mark_trx_read_write();
2852
2853
  return check_and_repair(thd);
2854
}
2855
2856
2857
/**
2858
  Disable indexes: public interface.
2859
2860
  @sa handler::disable_indexes()
2861
*/
2862
2863
int
2864
handler::ha_disable_indexes(uint mode)
2865
{
2866
  mark_trx_read_write();
2867
2868
  return disable_indexes(mode);
2869
}
2870
2871
2872
/**
2873
  Enable indexes: public interface.
2874
2875
  @sa handler::enable_indexes()
2876
*/
2877
2878
int
2879
handler::ha_enable_indexes(uint mode)
2880
{
2881
  mark_trx_read_write();
2882
2883
  return enable_indexes(mode);
2884
}
2885
2886
2887
/**
2888
  Discard or import tablespace: public interface.
2889
2890
  @sa handler::discard_or_import_tablespace()
2891
*/
2892
2893
int
200 by Brian Aker
my_bool from handler and set_var
2894
handler::ha_discard_or_import_tablespace(bool discard)
1 by brian
clean slate
2895
{
2896
  mark_trx_read_write();
2897
2898
  return discard_or_import_tablespace(discard);
2899
}
2900
2901
2902
/**
2903
  Prepare for alter: public interface.
2904
2905
  Called to prepare an *online* ALTER.
2906
2907
  @sa handler::prepare_for_alter()
2908
*/
2909
2910
void
2911
handler::ha_prepare_for_alter()
2912
{
2913
  mark_trx_read_write();
2914
2915
  prepare_for_alter();
2916
}
2917
2918
2919
/**
2920
  Rename table: public interface.
2921
2922
  @sa handler::rename_table()
2923
*/
2924
2925
int
2926
handler::ha_rename_table(const char *from, const char *to)
2927
{
2928
  mark_trx_read_write();
2929
2930
  return rename_table(from, to);
2931
}
2932
2933
2934
/**
2935
  Delete table: public interface.
2936
2937
  @sa handler::delete_table()
2938
*/
2939
2940
int
2941
handler::ha_delete_table(const char *name)
2942
{
2943
  mark_trx_read_write();
2944
2945
  return delete_table(name);
2946
}
2947
2948
2949
/**
2950
  Drop table in the engine: public interface.
2951
2952
  @sa handler::drop_table()
2953
*/
2954
2955
void
2956
handler::ha_drop_table(const char *name)
2957
{
2958
  mark_trx_read_write();
2959
2960
  return drop_table(name);
2961
}
2962
2963
2964
/**
2965
  Create a table in the engine: public interface.
2966
2967
  @sa handler::create()
2968
*/
2969
2970
int
2971
handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info)
2972
{
2973
  mark_trx_read_write();
2974
2975
  return create(name, form, info);
2976
}
2977
2978
2979
/**
2980
  Create handler files for CREATE TABLE: public interface.
2981
2982
  @sa handler::create_handler_files()
2983
*/
2984
2985
int
2986
handler::ha_create_handler_files(const char *name, const char *old_name,
2987
                        int action_flag, HA_CREATE_INFO *info)
2988
{
2989
  mark_trx_read_write();
2990
2991
  return create_handler_files(name, old_name, action_flag, info);
2992
}
2993
2994
2995
/**
2996
  Tell the storage engine that it is allowed to "disable transaction" in the
2997
  handler. It is a hint that ACID is not required - it is used in NDB for
2998
  ALTER TABLE, for example, when data are copied to temporary table.
2999
  A storage engine may treat this hint any way it likes. NDB for example
3000
  starts to commit every now and then automatically.
3001
  This hint can be safely ignored.
3002
*/
3003
int ha_enable_transaction(THD *thd, bool on)
3004
{
3005
  int error=0;
3006
3007
  if ((thd->transaction.on= on))
3008
  {
3009
    /*
3010
      Now all storage engines should have transaction handling enabled.
3011
      But some may have it enabled all the time - "disabling" transactions
3012
      is an optimization hint that storage engine is free to ignore.
3013
      So, let's commit an open transaction (if any) now.
3014
    */
3015
    if (!(error= ha_commit_trans(thd, 0)))
3016
      error= end_trans(thd, COMMIT);
3017
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3018
  return(error);
1 by brian
clean slate
3019
}
3020
3021
int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
3022
{
3023
  int error;
3024
  if (!(error=index_next(buf)))
3025
  {
3026
    my_ptrdiff_t ptrdiff= buf - table->record[0];
3027
    uchar *save_record_0= NULL;
3028
    KEY *key_info= NULL;
3029
    KEY_PART_INFO *key_part;
3030
    KEY_PART_INFO *key_part_end= NULL;
3031
3032
    /*
3033
      key_cmp_if_same() compares table->record[0] against 'key'.
3034
      In parts it uses table->record[0] directly, in parts it uses
3035
      field objects with their local pointers into table->record[0].
3036
      If 'buf' is distinct from table->record[0], we need to move
3037
      all record references. This is table->record[0] itself and
3038
      the field pointers of the fields used in this key.
3039
    */
3040
    if (ptrdiff)
3041
    {
3042
      save_record_0= table->record[0];
3043
      table->record[0]= buf;
3044
      key_info= table->key_info + active_index;
3045
      key_part= key_info->key_part;
3046
      key_part_end= key_part + key_info->key_parts;
3047
      for (; key_part < key_part_end; key_part++)
3048
      {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3049
        assert(key_part->field);
1 by brian
clean slate
3050
        key_part->field->move_field_offset(ptrdiff);
3051
      }
3052
    }
3053
3054
    if (key_cmp_if_same(table, key, active_index, keylen))
3055
    {
3056
      table->status=STATUS_NOT_FOUND;
3057
      error=HA_ERR_END_OF_FILE;
3058
    }
3059
3060
    /* Move back if necessary. */
3061
    if (ptrdiff)
3062
    {
3063
      table->record[0]= save_record_0;
3064
      for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
3065
        key_part->field->move_field_offset(-ptrdiff);
3066
    }
3067
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3068
  return(error);
1 by brian
clean slate
3069
}
3070
3071
3072
/****************************************************************************
3073
** Some general functions that isn't in the handler class
3074
****************************************************************************/
3075
3076
/**
3077
  Initiates table-file and calls appropriate database-creator.
3078
3079
  @retval
3080
   0  ok
3081
  @retval
3082
   1  error
3083
*/
3084
int ha_create_table(THD *thd, const char *path,
3085
                    const char *db, const char *table_name,
3086
                    HA_CREATE_INFO *create_info,
3087
		    bool update_create_info)
3088
{
3089
  int error= 1;
3090
  TABLE table;
3091
  char name_buff[FN_REFLEN];
3092
  const char *name;
3093
  TABLE_SHARE share;
3094
  
3095
  init_tmp_table_share(thd, &share, db, 0, table_name, path);
3096
  if (open_table_def(thd, &share, 0) ||
3097
      open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
3098
                            OTM_CREATE))
3099
    goto err;
3100
3101
  if (update_create_info)
3102
    update_create_info_from_table(create_info, &table);
3103
3104
  name= check_lowercase_names(table.file, share.path.str, name_buff);
3105
3106
  error= table.file->ha_create(name, &table, create_info);
3107
  VOID(closefrm(&table, 0));
3108
  if (error)
3109
  {
3110
    strxmov(name_buff, db, ".", table_name, NullS);
3111
    my_error(ER_CANT_CREATE_TABLE, MYF(ME_BELL+ME_WAITTANG), name_buff, error);
3112
  }
3113
err:
3114
  free_table_share(&share);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3115
  return(error != 0);
1 by brian
clean slate
3116
}
3117
3118
/**
3119
  Try to discover table from engine.
3120
3121
  @note
3122
    If found, write the frm file to disk.
3123
3124
  @retval
3125
  -1    Table did not exists
3126
  @retval
3127
   0    Table created ok
3128
  @retval
3129
   > 0  Error, table existed but could not be created
3130
*/
3131
int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
3132
{
3133
  int error;
3134
  uchar *frmblob;
3135
  size_t frmlen;
3136
  char path[FN_REFLEN];
3137
  HA_CREATE_INFO create_info;
3138
  TABLE table;
3139
  TABLE_SHARE share;
3140
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
3141
  memset((uchar*) &create_info, 0, sizeof(create_info));
1 by brian
clean slate
3142
  if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
3143
  {
3144
    /* Table could not be discovered and thus not created */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3145
    return(error);
1 by brian
clean slate
3146
  }
3147
3148
  /*
3149
    Table exists in handler and could be discovered
3150
    frmblob and frmlen are set, write the frm to disk
3151
  */
3152
3153
  build_table_filename(path, FN_REFLEN-1, db, name, "", 0);
3154
  // Save the frm file
3155
  error= writefrm(path, frmblob, frmlen);
3156
  my_free(frmblob, MYF(0));
3157
  if (error)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3158
    return(2);
1 by brian
clean slate
3159
3160
  init_tmp_table_share(thd, &share, db, 0, name, path);
3161
  if (open_table_def(thd, &share, 0))
3162
  {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3163
    return(3);
1 by brian
clean slate
3164
  }
3165
  if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, OTM_OPEN))
3166
  {
3167
    free_table_share(&share);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3168
    return(3);
1 by brian
clean slate
3169
  }
3170
3171
  update_create_info_from_table(&create_info, &table);
3172
  create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
3173
3174
  check_lowercase_names(table.file, path, path);
3175
  error=table.file->ha_create(path, &table, &create_info);
3176
  VOID(closefrm(&table, 1));
3177
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3178
  return(error != 0);
1 by brian
clean slate
3179
}
3180
3181
void st_ha_check_opt::init()
3182
{
3183
  flags= sql_flags= 0;
3184
  sort_buffer_size = current_thd->variables.myisam_sort_buff_size;
3185
}
3186
3187
3188
/*****************************************************************************
3189
  Key cache handling.
3190
3191
  This code is only relevant for ISAM/MyISAM tables
3192
3193
  key_cache->cache may be 0 only in the case where a key cache is not
3194
  initialized or when we where not able to init the key cache in a previous
3195
  call to ha_init_key_cache() (probably out of memory)
3196
*****************************************************************************/
3197
3198
/**
3199
  Init a key cache if it has not been initied before.
3200
*/
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
3201
int ha_init_key_cache(const char *name __attribute__((unused)),
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3202
                      KEY_CACHE *key_cache)
1 by brian
clean slate
3203
{
3204
  if (!key_cache->key_cache_inited)
3205
  {
3206
    pthread_mutex_lock(&LOCK_global_system_variables);
61 by Brian Aker
Conversion of handler type.
3207
    uint32_t tmp_buff_size= (uint32_t) key_cache->param_buff_size;
1 by brian
clean slate
3208
    uint tmp_block_size= (uint) key_cache->param_block_size;
3209
    uint division_limit= key_cache->param_division_limit;
3210
    uint age_threshold=  key_cache->param_age_threshold;
3211
    pthread_mutex_unlock(&LOCK_global_system_variables);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3212
    return(!init_key_cache(key_cache,
1 by brian
clean slate
3213
				tmp_block_size,
3214
				tmp_buff_size,
3215
				division_limit, age_threshold));
3216
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3217
  return(0);
1 by brian
clean slate
3218
}
3219
3220
3221
/**
3222
  Resize key cache.
3223
*/
3224
int ha_resize_key_cache(KEY_CACHE *key_cache)
3225
{
3226
  if (key_cache->key_cache_inited)
3227
  {
3228
    pthread_mutex_lock(&LOCK_global_system_variables);
3229
    long tmp_buff_size= (long) key_cache->param_buff_size;
3230
    long tmp_block_size= (long) key_cache->param_block_size;
3231
    uint division_limit= key_cache->param_division_limit;
3232
    uint age_threshold=  key_cache->param_age_threshold;
3233
    pthread_mutex_unlock(&LOCK_global_system_variables);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3234
    return(!resize_key_cache(key_cache, tmp_block_size,
1 by brian
clean slate
3235
				  tmp_buff_size,
3236
				  division_limit, age_threshold));
3237
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3238
  return(0);
1 by brian
clean slate
3239
}
3240
3241
3242
/**
3243
  Change parameters for key cache (like size)
3244
*/
3245
int ha_change_key_cache_param(KEY_CACHE *key_cache)
3246
{
3247
  if (key_cache->key_cache_inited)
3248
  {
3249
    pthread_mutex_lock(&LOCK_global_system_variables);
3250
    uint division_limit= key_cache->param_division_limit;
3251
    uint age_threshold=  key_cache->param_age_threshold;
3252
    pthread_mutex_unlock(&LOCK_global_system_variables);
3253
    change_key_cache_param(key_cache, division_limit, age_threshold);
3254
  }
3255
  return 0;
3256
}
3257
3258
/**
3259
  Free memory allocated by a key cache.
3260
*/
3261
int ha_end_key_cache(KEY_CACHE *key_cache)
3262
{
3263
  end_key_cache(key_cache, 1);		// Can never fail
3264
  return 0;
3265
}
3266
3267
/**
3268
  Move all tables from one key cache to another one.
3269
*/
3270
int ha_change_key_cache(KEY_CACHE *old_key_cache,
3271
			KEY_CACHE *new_key_cache)
3272
{
3273
  mi_change_key_cache(old_key_cache, new_key_cache);
3274
  return 0;
3275
}
3276
3277
3278
/**
3279
  Try to discover one table from handler(s).
3280
3281
  @retval
3282
    -1   Table did not exists
3283
  @retval
3284
    0   OK. In this case *frmblob and *frmlen are set
3285
  @retval
3286
    >0   error.  frmblob and frmlen may not be set
3287
*/
3288
struct st_discover_args
3289
{
3290
  const char *db;
3291
  const char *name;
3292
  uchar **frmblob; 
3293
  size_t *frmlen;
3294
};
3295
149 by Brian Aker
More bool conversion.
3296
static bool discover_handlerton(THD *thd, plugin_ref plugin,
3297
                                void *arg)
1 by brian
clean slate
3298
{
3299
  st_discover_args *vargs= (st_discover_args *)arg;
3300
  handlerton *hton= plugin_data(plugin, handlerton *);
3301
  if (hton->state == SHOW_OPTION_YES && hton->discover &&
3302
      (!(hton->discover(hton, thd, vargs->db, vargs->name, 
3303
                        vargs->frmblob, 
3304
                        vargs->frmlen))))
56 by brian
Next pass of true/false update.
3305
    return true;
1 by brian
clean slate
3306
56 by brian
Next pass of true/false update.
3307
  return false;
1 by brian
clean slate
3308
}
3309
3310
int ha_discover(THD *thd, const char *db, const char *name,
3311
		uchar **frmblob, size_t *frmlen)
3312
{
3313
  int error= -1; // Table does not exist in any handler
3314
  st_discover_args args= {db, name, frmblob, frmlen};
3315
3316
  if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3317
    return(error);
1 by brian
clean slate
3318
3319
  if (plugin_foreach(thd, discover_handlerton,
3320
                 MYSQL_STORAGE_ENGINE_PLUGIN, &args))
3321
    error= 0;
3322
3323
  if (!error)
3324
    status_var_increment(thd->status_var.ha_discover_count);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3325
  return(error);
1 by brian
clean slate
3326
}
3327
3328
3329
/**
3330
  Call this function in order to give the handler the possiblity
3331
  to ask engine if there are any new tables that should be written to disk
3332
  or any dropped tables that need to be removed from disk
3333
*/
3334
struct st_find_files_args
3335
{
3336
  const char *db;
3337
  const char *path;
3338
  const char *wild;
3339
  bool dir;
3340
  List<LEX_STRING> *files;
3341
};
3342
3343
/**
3344
  Ask handler if the table exists in engine.
3345
  @retval
3346
    HA_ERR_NO_SUCH_TABLE     Table does not exist
3347
  @retval
3348
    HA_ERR_TABLE_EXIST       Table exists
3349
  @retval
3350
    \#                  Error code
3351
*/
3352
struct st_table_exists_in_engine_args
3353
{
3354
  const char *db;
3355
  const char *name;
3356
  int err;
3357
};
3358
149 by Brian Aker
More bool conversion.
3359
static bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
3360
                                              void *arg)
1 by brian
clean slate
3361
{
3362
  st_table_exists_in_engine_args *vargs= (st_table_exists_in_engine_args *)arg;
3363
  handlerton *hton= plugin_data(plugin, handlerton *);
3364
3365
  int err= HA_ERR_NO_SUCH_TABLE;
3366
3367
  if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
3368
    err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
3369
3370
  vargs->err = err;
3371
  if (vargs->err == HA_ERR_TABLE_EXIST)
56 by brian
Next pass of true/false update.
3372
    return true;
1 by brian
clean slate
3373
56 by brian
Next pass of true/false update.
3374
  return false;
1 by brian
clean slate
3375
}
3376
3377
int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
3378
{
3379
  st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
3380
  plugin_foreach(thd, table_exists_in_engine_handlerton,
3381
                 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3382
  return(args.err);
1 by brian
clean slate
3383
}
3384
3385
/**
3386
  Calculate cost of 'index only' scan for given index and number of records
3387
3388
  @param keynr    Index number
3389
  @param records  Estimated number of records to be retrieved
3390
3391
  @note
3392
    It is assumed that we will read trough the whole key range and that all
3393
    key blocks are half full (normally things are much better). It is also
3394
    assumed that each time we read the next key from the index, the handler
3395
    performs a random seek, thus the cost is proportional to the number of
3396
    blocks read.
3397
3398
  @todo
3399
    Consider joining this function and handler::read_time() into one
3400
    handler::read_time(keynr, records, ranges, bool index_only) function.
3401
3402
  @return
3403
    Estimated cost of 'index only' scan
3404
*/
3405
3406
double handler::index_only_read_time(uint keynr, double records)
3407
{
3408
  double read_time;
3409
  uint keys_per_block= (stats.block_size/2/
3410
			(table->key_info[keynr].key_length + ref_length) + 1);
3411
  read_time=((double) (records + keys_per_block-1) /
3412
             (double) keys_per_block);
3413
  return read_time;
3414
}
3415
3416
3417
/****************************************************************************
3418
 * Default MRR implementation (MRR to non-MRR converter)
3419
 ***************************************************************************/
3420
3421
/**
3422
  Get cost and other information about MRR scan over a known list of ranges
3423
3424
  Calculate estimated cost and other information about an MRR scan for given
3425
  sequence of ranges.
3426
3427
  @param keyno           Index number
3428
  @param seq             Range sequence to be traversed
3429
  @param seq_init_param  First parameter for seq->init()
3430
  @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
3431
                         can't efficiently determine it
3432
  @param bufsz    INOUT  IN:  Size of the buffer available for use
3433
                         OUT: Size of the buffer that is expected to be actually
3434
                              used, or 0 if buffer is not needed.
3435
  @param flags    INOUT  A combination of HA_MRR_* flags
3436
  @param cost     OUT    Estimated cost of MRR access
3437
3438
  @note
3439
    This method (or an overriding one in a derived class) must check for
3440
    thd->killed and return HA_POS_ERROR if it is not zero. This is required
3441
    for a user to be able to interrupt the calculation by killing the
3442
    connection/query.
3443
3444
  @retval
3445
    HA_POS_ERROR  Error or the engine is unable to perform the requested
3446
                  scan. Values of OUT parameters are undefined.
3447
  @retval
3448
    other         OK, *cost contains cost of the scan, *bufsz and *flags
3449
                  contain scan parameters.
3450
*/
3451
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3452
ha_rows
1 by brian
clean slate
3453
handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3454
                                     void *seq_init_param,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
3455
                                     uint n_ranges_arg __attribute__((unused)),
1 by brian
clean slate
3456
                                     uint *bufsz, uint *flags, COST_VECT *cost)
3457
{
3458
  KEY_MULTI_RANGE range;
3459
  range_seq_t seq_it;
3460
  ha_rows rows, total_rows= 0;
3461
  uint n_ranges=0;
3462
  THD *thd= current_thd;
3463
  
3464
  /* Default MRR implementation doesn't need buffer */
3465
  *bufsz= 0;
3466
3467
  seq_it= seq->init(seq_init_param, n_ranges, *flags);
3468
  while (!seq->next(seq_it, &range))
3469
  {
3470
    if (unlikely(thd->killed != 0))
3471
      return HA_POS_ERROR;
3472
    
3473
    n_ranges++;
3474
    key_range *min_endp, *max_endp;
3475
    {
3476
      min_endp= range.start_key.length? &range.start_key : NULL;
3477
      max_endp= range.end_key.length? &range.end_key : NULL;
3478
    }
3479
    if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
3480
      rows= 1; /* there can be at most one row */
3481
    else
3482
    {
3483
      if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, 
3484
                                                        max_endp)))
3485
      {
3486
        /* Can't scan one range => can't do MRR scan at all */
3487
        total_rows= HA_POS_ERROR;
3488
        break;
3489
      }
3490
    }
3491
    total_rows += rows;
3492
  }
3493
  
3494
  if (total_rows != HA_POS_ERROR)
3495
  {
3496
    /* The following calculation is the same as in multi_range_read_info(): */
3497
    *flags |= HA_MRR_USE_DEFAULT_IMPL;
3498
    cost->zero();
3499
    cost->avg_io_cost= 1; /* assume random seeks */
3500
    if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
3501
      cost->io_count= index_only_read_time(keyno, (uint)total_rows);
3502
    else
3503
      cost->io_count= read_time(keyno, n_ranges, total_rows);
3504
    cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
3505
  }
3506
  return total_rows;
3507
}
3508
3509
3510
/**
3511
  Get cost and other information about MRR scan over some sequence of ranges
3512
3513
  Calculate estimated cost and other information about an MRR scan for some
3514
  sequence of ranges.
3515
3516
  The ranges themselves will be known only at execution phase. When this
3517
  function is called we only know number of ranges and a (rough) E(#records)
3518
  within those ranges.
3519
3520
  Currently this function is only called for "n-keypart singlepoint" ranges,
3521
  i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
3522
3523
  The flags parameter is a combination of those flags: HA_MRR_SORTED,
3524
  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
3525
3526
  @param keyno           Index number
3527
  @param n_ranges        Estimated number of ranges (i.e. intervals) in the
3528
                         range sequence.
3529
  @param n_rows          Estimated total number of records contained within all
3530
                         of the ranges
3531
  @param bufsz    INOUT  IN:  Size of the buffer available for use
3532
                         OUT: Size of the buffer that will be actually used, or
3533
                              0 if buffer is not needed.
3534
  @param flags    INOUT  A combination of HA_MRR_* flags
3535
  @param cost     OUT    Estimated cost of MRR access
3536
3537
  @retval
3538
    0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
3539
          parameters.
3540
  @retval
3541
    other Error or can't perform the requested scan
3542
*/
3543
3544
int handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
3545
                                   uint *bufsz, uint *flags, COST_VECT *cost)
3546
{
3547
  *bufsz= 0; /* Default implementation doesn't need a buffer */
3548
3549
  *flags |= HA_MRR_USE_DEFAULT_IMPL;
3550
3551
  cost->zero();
3552
  cost->avg_io_cost= 1; /* assume random seeks */
3553
3554
  /* Produce the same cost as non-MRR code does */
3555
  if (*flags & HA_MRR_INDEX_ONLY)
3556
    cost->io_count= index_only_read_time(keyno, n_rows);
3557
  else
3558
    cost->io_count= read_time(keyno, n_ranges, n_rows);
3559
  return 0;
3560
}
3561
3562
3563
/**
3564
  Initialize the MRR scan
3565
3566
  Initialize the MRR scan. This function may do heavyweight scan 
3567
  initialization like row prefetching/sorting/etc (NOTE: but better not do
3568
  it here as we may not need it, e.g. if we never satisfy WHERE clause on
3569
  previous tables. For many implementations it would be natural to do such
3570
  initializations in the first multi_read_range_next() call)
3571
3572
  mode is a combination of the following flags: HA_MRR_SORTED,
3573
  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION 
3574
3575
  @param seq             Range sequence to be traversed
3576
  @param seq_init_param  First parameter for seq->init()
3577
  @param n_ranges        Number of ranges in the sequence
3578
  @param mode            Flags, see the description section for the details
3579
  @param buf             INOUT: memory buffer to be used
3580
3581
  @note
3582
    One must have called index_init() before calling this function. Several
3583
    multi_range_read_init() calls may be made in course of one query.
3584
3585
    Until WL#2623 is done (see its text, section 3.2), the following will 
3586
    also hold:
3587
    The caller will guarantee that if "seq->init == mrr_ranges_array_init"
3588
    then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
3589
    This property will only be used by NDB handler until WL#2623 is done.
3590
     
3591
    Buffer memory management is done according to the following scenario:
3592
    The caller allocates the buffer and provides it to the callee by filling
3593
    the members of HANDLER_BUFFER structure.
3594
    The callee consumes all or some fraction of the provided buffer space, and
3595
    sets the HANDLER_BUFFER members accordingly.
3596
    The callee may use the buffer memory until the next multi_range_read_init()
3597
    call is made, all records have been read, or until index_end() call is
3598
    made, whichever comes first.
3599
3600
  @retval 0  OK
3601
  @retval 1  Error
3602
*/
3603
3604
int
3605
handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3606
                               uint n_ranges, uint mode,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
3607
                               HANDLER_BUFFER *buf __attribute__((unused)))
1 by brian
clean slate
3608
{
3609
  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
3610
  mrr_funcs= *seq_funcs;
3611
  mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
56 by brian
Next pass of true/false update.
3612
  mrr_have_range= false;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3613
  return(0);
1 by brian
clean slate
3614
}
3615
3616
3617
/**
3618
  Get next record in MRR scan
3619
3620
  Default MRR implementation: read the next record
3621
3622
  @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
3623
                          Otherwise, the opaque value associated with the range
3624
                          that contains the returned record.
3625
3626
  @retval 0      OK
3627
  @retval other  Error code
3628
*/
3629
3630
int handler::multi_range_read_next(char **range_info)
3631
{
3632
  int result= 0;
3633
  int range_res;
3634
3635
  if (!mrr_have_range)
3636
  {
56 by brian
Next pass of true/false update.
3637
    mrr_have_range= true;
1 by brian
clean slate
3638
    goto start;
3639
  }
3640
3641
  do
3642
  {
3643
    /* Save a call if there can be only one row in range. */
3644
    if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
3645
    {
3646
      result= read_range_next();
3647
      /* On success or non-EOF errors jump to the end. */
3648
      if (result != HA_ERR_END_OF_FILE)
3649
        break;
3650
    }
3651
    else
3652
    {
3653
      if (was_semi_consistent_read())
3654
        goto scan_it_again;
3655
      /*
3656
        We need to set this for the last range only, but checking this
3657
        condition is more expensive than just setting the result code.
3658
      */
3659
      result= HA_ERR_END_OF_FILE;
3660
    }
3661
3662
start:
3663
    /* Try the next range(s) until one matches a record. */
3664
    while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
3665
    {
3666
scan_it_again:
3667
      result= read_range_first(mrr_cur_range.start_key.keypart_map ?
3668
                                 &mrr_cur_range.start_key : 0,
3669
                               mrr_cur_range.end_key.keypart_map ?
3670
                                 &mrr_cur_range.end_key : 0,
3671
                               test(mrr_cur_range.range_flag & EQ_RANGE),
3672
                               mrr_is_output_sorted);
3673
      if (result != HA_ERR_END_OF_FILE)
3674
        break;
3675
    }
3676
  }
3677
  while ((result == HA_ERR_END_OF_FILE) && !range_res);
3678
3679
  *range_info= mrr_cur_range.ptr;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3680
  return(result);
1 by brian
clean slate
3681
}
3682
3683
3684
/* **************************************************************************
3685
 * DS-MRR implementation 
3686
 ***************************************************************************/
3687
3688
/**
3689
  DS-MRR: Initialize and start MRR scan
3690
3691
  Initialize and start the MRR scan. Depending on the mode parameter, this
3692
  may use default or DS-MRR implementation.
3693
3694
  @param h               Table handler to be used
3695
  @param key             Index to be used
3696
  @param seq_funcs       Interval sequence enumeration functions
3697
  @param seq_init_param  Interval sequence enumeration parameter
3698
  @param n_ranges        Number of ranges in the sequence.
3699
  @param mode            HA_MRR_* modes to use
3700
  @param buf             INOUT Buffer to use
3701
3702
  @retval 0     Ok, Scan started.
3703
  @retval other Error
3704
*/
3705
3706
int DsMrr_impl::dsmrr_init(handler *h, KEY *key,
3707
                           RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
3708
                           uint n_ranges, uint mode, HANDLER_BUFFER *buf)
3709
{
3710
  uint elem_size;
3711
  uint keyno;
3712
  Item *pushed_cond= NULL;
3713
  handler *new_h2;
3714
  keyno= h->active_index;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3715
  assert(h2 == NULL);
1 by brian
clean slate
3716
  if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED)
3717
  {
56 by brian
Next pass of true/false update.
3718
    use_default_impl= true;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3719
    return(h->handler::multi_range_read_init(seq_funcs, seq_init_param,
1 by brian
clean slate
3720
                                                  n_ranges, mode, buf));
3721
  }
3722
  rowids_buf= buf->buffer;
3723
  //psergey-todo: don't add key_length as it is not needed anymore
3724
  rowids_buf += key->key_length + h->ref_length;
3725
3726
  is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
3727
  rowids_buf_end= buf->buffer_end;
3728
  
3729
  elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
3730
  rowids_buf_last= rowids_buf + 
3731
                      ((rowids_buf_end - rowids_buf)/ elem_size)*
3732
                      elem_size;
3733
  rowids_buf_end= rowids_buf_last;
3734
3735
  /* Create a separate handler object to do rndpos() calls. */
3736
  THD *thd= current_thd;
3737
  if (!(new_h2= h->clone(thd->mem_root)) || 
3738
      new_h2->ha_external_lock(thd, F_RDLCK))
3739
  {
3740
    delete new_h2;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3741
    return(1);
1 by brian
clean slate
3742
  }
3743
3744
  if (keyno == h->pushed_idx_cond_keyno)
3745
    pushed_cond= h->pushed_idx_cond;
3746
  if (h->ha_index_end())
3747
  {
3748
    new_h2= h2;
3749
    goto error;
3750
  }
3751
3752
  h2= new_h2;
3753
  table->prepare_for_position();
3754
  new_h2->extra(HA_EXTRA_KEYREAD);
3755
56 by brian
Next pass of true/false update.
3756
  if (h2->ha_index_init(keyno, false) || 
1 by brian
clean slate
3757
      h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
3758
                                         mode, buf))
3759
    goto error;
56 by brian
Next pass of true/false update.
3760
  use_default_impl= false;
1 by brian
clean slate
3761
  
3762
  if (pushed_cond)
3763
    h2->idx_cond_push(keyno, pushed_cond);
3764
  if (dsmrr_fill_buffer(new_h2))
3765
    goto error;
3766
3767
  /*
3768
    If the above call has scanned through all intervals in *seq, then
3769
    adjust *buf to indicate that the remaining buffer space will not be used.
3770
  */
3771
  if (dsmrr_eof) 
3772
    buf->end_of_used_area= rowids_buf_last;
3773
56 by brian
Next pass of true/false update.
3774
  if (h->ha_rnd_init(false))
1 by brian
clean slate
3775
    goto error;
3776
  
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3777
  return(0);
1 by brian
clean slate
3778
error:
3779
  h2->ha_index_or_rnd_end();
3780
  h2->ha_external_lock(thd, F_UNLCK);
3781
  h2->close();
3782
  delete h2;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3783
  return(1);
1 by brian
clean slate
3784
}
3785
3786
3787
void DsMrr_impl::dsmrr_close()
3788
{
3789
  if (h2)
3790
  {
3791
    h2->ha_external_lock(current_thd, F_UNLCK);
3792
    h2->close();
3793
    delete h2;
3794
    h2= NULL;
3795
  }
56 by brian
Next pass of true/false update.
3796
  use_default_impl= true;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3797
  return;
1 by brian
clean slate
3798
}
3799
3800
3801
static int rowid_cmp(void *h, uchar *a, uchar *b)
3802
{
3803
  return ((handler*)h)->cmp_ref(a, b);
3804
}
3805
3806
3807
/**
3808
  DS-MRR: Fill the buffer with rowids and sort it by rowid
3809
3810
  {This is an internal function of DiskSweep MRR implementation}
3811
  Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into 
3812
  buffer. When the buffer is full or scan is completed, sort the buffer by 
3813
  rowid and return.
3814
  
3815
  The function assumes that rowids buffer is empty when it is invoked. 
3816
  
3817
  @param h  Table handler
3818
3819
  @retval 0      OK, the next portion of rowids is in the buffer,
3820
                 properly ordered
3821
  @retval other  Error
3822
*/
3823
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
3824
int DsMrr_impl::dsmrr_fill_buffer(handler *unused __attribute__((unused)))
1 by brian
clean slate
3825
{
3826
  char *range_info;
3827
  int res;
3828
3829
  rowids_buf_cur= rowids_buf;
3830
  while ((rowids_buf_cur < rowids_buf_end) && 
3831
         !(res= h2->handler::multi_range_read_next(&range_info)))
3832
  {
3833
    /* Put rowid, or {rowid, range_id} pair into the buffer */
3834
    h2->position(table->record[0]);
3835
    memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
3836
    rowids_buf_cur += h->ref_length;
3837
3838
    if (is_mrr_assoc)
3839
    {
3840
      memcpy(rowids_buf_cur, &range_info, sizeof(void*));
3841
      rowids_buf_cur += sizeof(void*);
3842
    }
3843
  }
3844
3845
  if (res && res != HA_ERR_END_OF_FILE)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3846
    return(res); 
1 by brian
clean slate
3847
  dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
3848
3849
  /* Sort the buffer contents by rowid */
3850
  uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
3851
  uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
3852
  
3853
  my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
3854
            (void*)h);
3855
  rowids_buf_last= rowids_buf_cur;
3856
  rowids_buf_cur=  rowids_buf;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3857
  return(0);
1 by brian
clean slate
3858
}
3859
3860
3861
/**
3862
  DS-MRR implementation: multi_range_read_next() function
3863
*/
3864
3865
int DsMrr_impl::dsmrr_next(handler *h, char **range_info)
3866
{
3867
  int res;
3868
  
3869
  if (use_default_impl)
3870
    return h->handler::multi_range_read_next(range_info);
3871
    
3872
  if (rowids_buf_cur == rowids_buf_last)
3873
  {
3874
    if (dsmrr_eof)
3875
    {
3876
      res= HA_ERR_END_OF_FILE;
3877
      goto end;
3878
    }
3879
    res= dsmrr_fill_buffer(h);
3880
    if (res)
3881
      goto end;
3882
  }
3883
  
3884
  /* Return EOF if there are no rowids in the buffer after re-fill attempt */
3885
  if (rowids_buf_cur == rowids_buf_last)
3886
  {
3887
    res= HA_ERR_END_OF_FILE;
3888
    goto end;
3889
  }
3890
3891
  res= h->rnd_pos(table->record[0], rowids_buf_cur);
3892
  rowids_buf_cur += h->ref_length;
3893
  if (is_mrr_assoc)
3894
  {
3895
    memcpy(range_info, rowids_buf_cur, sizeof(void*));
3896
    rowids_buf_cur += sizeof(void*);
3897
  }
3898
3899
end:
3900
  if (res)
3901
    dsmrr_close();
3902
  return res;
3903
}
3904
3905
3906
/**
3907
  DS-MRR implementation: multi_range_read_info() function
3908
*/
3909
int DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, uint *bufsz,
3910
                           uint *flags, COST_VECT *cost)
3911
{  
3912
  int res;
3913
  uint def_flags= *flags;
3914
  uint def_bufsz= *bufsz;
3915
3916
  /* Get cost/flags/mem_usage of default MRR implementation */
3917
  res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
3918
                                         &def_flags, cost);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3919
  assert(!res);
1 by brian
clean slate
3920
3921
  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || 
3922
      choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost))
3923
  {
3924
    /* Default implementation is choosen */
3925
    *flags= def_flags;
3926
    *bufsz= def_bufsz;
3927
  }
3928
  return 0;
3929
}
3930
3931
3932
/**
3933
  DS-MRR Implementation: multi_range_read_info_const() function
3934
*/
3935
3936
ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
3937
                                 void *seq_init_param, uint n_ranges, 
3938
                                 uint *bufsz, uint *flags, COST_VECT *cost)
3939
{
3940
  ha_rows rows;
3941
  uint def_flags= *flags;
3942
  uint def_bufsz= *bufsz;
3943
  /* Get cost/flags/mem_usage of default MRR implementation */
3944
  rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
3945
                                                n_ranges, &def_bufsz, 
3946
                                                &def_flags, cost);
3947
  if (rows == HA_POS_ERROR)
3948
  {
3949
    /* Default implementation can't perform MRR scan => we can't either */
3950
    return rows;
3951
  }
3952
3953
  /*
3954
    If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
3955
    use the default MRR implementation (we need it for UPDATE/DELETE).
3956
    Otherwise, make a choice based on cost and @@optimizer_use_mrr.
3957
  */
3958
  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
3959
      choose_mrr_impl(keyno, rows, flags, bufsz, cost))
3960
  {
3961
    *flags= def_flags;
3962
    *bufsz= def_bufsz;
3963
  }
3964
  else
3965
  {
3966
    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;
3967
  }
3968
  return rows;
3969
}
3970
3971
3972
/**
3973
  Check if key has partially-covered columns
3974
3975
  We can't use DS-MRR to perform range scans when the ranges are over
3976
  partially-covered keys, because we'll not have full key part values
3977
  (we'll have their prefixes from the index) and will not be able to check
3978
  if we've reached the end the range.
3979
3980
  @param keyno  Key to check
3981
3982
  @todo
3983
    Allow use of DS-MRR in cases where the index has partially-covered
3984
    components but they are not used for scanning.
3985
56 by brian
Next pass of true/false update.
3986
  @retval true   Yes
3987
  @retval false  No
1 by brian
clean slate
3988
*/
3989
3990
bool DsMrr_impl::key_uses_partial_cols(uint keyno)
3991
{
3992
  KEY_PART_INFO *kp= table->key_info[keyno].key_part;
3993
  KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts;
3994
  for (; kp != kp_end; kp++)
3995
  {
3996
    if (!kp->field->part_of_key.is_set(keyno))
56 by brian
Next pass of true/false update.
3997
      return true;
1 by brian
clean slate
3998
  }
56 by brian
Next pass of true/false update.
3999
  return false;
1 by brian
clean slate
4000
}
4001
4002
4003
/**
4004
  DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
4005
4006
  Make the choice between using Default MRR implementation and DS-MRR.
4007
  This function contains common functionality factored out of dsmrr_info()
4008
  and dsmrr_info_const(). The function assumes that the default MRR
4009
  implementation's applicability requirements are satisfied.
4010
4011
  @param keyno       Index number
4012
  @param rows        E(full rows to be retrieved)
4013
  @param flags  IN   MRR flags provided by the MRR user
4014
                OUT  If DS-MRR is choosen, flags of DS-MRR implementation
4015
                     else the value is not modified
4016
  @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
4017
                     else the value is not modified
4018
  @param cost   IN   Cost of default MRR implementation
4019
                OUT  If DS-MRR is choosen, cost of DS-MRR scan
4020
                     else the value is not modified
4021
56 by brian
Next pass of true/false update.
4022
  @retval true   Default MRR implementation should be used
4023
  @retval false  DS-MRR implementation should be used
1 by brian
clean slate
4024
*/
4025
4026
bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
4027
                                 uint *bufsz, COST_VECT *cost)
4028
{
4029
  COST_VECT dsmrr_cost;
4030
  bool res;
4031
  THD *thd= current_thd;
4032
  if ((thd->variables.optimizer_use_mrr == 2) || 
4033
      (*flags & HA_MRR_INDEX_ONLY) || (*flags & HA_MRR_SORTED) ||
4034
      (keyno == table->s->primary_key && 
4035
       h->primary_key_is_clustered()) || 
4036
       key_uses_partial_cols(keyno))
4037
  {
4038
    /* Use the default implementation */
4039
    *flags |= HA_MRR_USE_DEFAULT_IMPL;
56 by brian
Next pass of true/false update.
4040
    return true;
1 by brian
clean slate
4041
  }
4042
  
4043
  uint add_len= table->key_info[keyno].key_length + h->ref_length; 
4044
  *bufsz -= add_len;
4045
  if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
56 by brian
Next pass of true/false update.
4046
    return true;
1 by brian
clean slate
4047
  *bufsz += add_len;
4048
  
4049
  bool force_dsmrr;
4050
  /* 
4051
    If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of
4052
    DS-MRR and Default implementations cost. This allows one to force use of
4053
    DS-MRR whenever it is applicable without affecting other cost-based
4054
    choices.
4055
  */
4056
  if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) &&
4057
      dsmrr_cost.total_cost() > cost->total_cost())
4058
    dsmrr_cost= *cost;
4059
4060
  if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
4061
  {
4062
    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
4063
    *flags &= ~HA_MRR_SORTED;          /* We will return unordered output */
4064
    *cost= dsmrr_cost;
56 by brian
Next pass of true/false update.
4065
    res= false;
1 by brian
clean slate
4066
  }
4067
  else
4068
  {
4069
    /* Use the default MRR implementation */
56 by brian
Next pass of true/false update.
4070
    res= true;
1 by brian
clean slate
4071
  }
4072
  return res;
4073
}
4074
4075
4076
static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost);
4077
4078
4079
/**
4080
  Get cost of DS-MRR scan
4081
4082
  @param keynr              Index to be used
4083
  @param rows               E(Number of rows to be scanned)
4084
  @param flags              Scan parameters (HA_MRR_* flags)
4085
  @param buffer_size INOUT  Buffer size
4086
  @param cost        OUT    The cost
4087
56 by brian
Next pass of true/false update.
4088
  @retval false  OK
4089
  @retval true   Error, DS-MRR cannot be used (the buffer is too small
1 by brian
clean slate
4090
                 for even 1 rowid)
4091
*/
4092
4093
bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
4094
                                         uint *buffer_size, COST_VECT *cost)
4095
{
61 by Brian Aker
Conversion of handler type.
4096
  uint32_t max_buff_entries, elem_size;
1 by brian
clean slate
4097
  ha_rows rows_in_full_step, rows_in_last_step;
4098
  uint n_full_steps;
4099
  double index_read_cost;
4100
4101
  elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
4102
  max_buff_entries = *buffer_size / elem_size;
4103
4104
  if (!max_buff_entries)
56 by brian
Next pass of true/false update.
4105
    return true; /* Buffer has not enough space for even 1 rowid */
1 by brian
clean slate
4106
4107
  /* Number of iterations we'll make with full buffer */
4108
  n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
4109
  
4110
  /* 
4111
    Get numbers of rows we'll be processing in 
4112
     - non-last sweep, with full buffer 
4113
     - last iteration, with non-full buffer
4114
  */
4115
  rows_in_full_step= max_buff_entries;
4116
  rows_in_last_step= rows % max_buff_entries;
4117
  
4118
  /* Adjust buffer size if we expect to use only part of the buffer */
4119
  if (n_full_steps)
4120
  {
4121
    get_sort_and_sweep_cost(table, rows, cost);
4122
    cost->multiply(n_full_steps);
4123
  }
4124
  else
4125
  {
4126
    cost->zero();
4127
    *buffer_size= max(*buffer_size, 
4128
                      (size_t)(1.2*rows_in_last_step) * elem_size + 
4129
                      h->ref_length + table->key_info[keynr].key_length);
4130
  }
4131
  
4132
  COST_VECT last_step_cost;
4133
  get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
4134
  cost->add(&last_step_cost);
4135
 
4136
  if (n_full_steps != 0)
4137
    cost->mem_cost= *buffer_size;
4138
  else
4139
    cost->mem_cost= (double)rows_in_last_step * elem_size;
4140
  
4141
  /* Total cost of all index accesses */
4142
  index_read_cost= h->index_only_read_time(keynr, (double)rows);
4143
  cost->add_io(index_read_cost, 1 /* Random seeks */);
56 by brian
Next pass of true/false update.
4144
  return false;
1 by brian
clean slate
4145
}
4146
4147
4148
/* 
4149
  Get cost of one sort-and-sweep step
4150
4151
  SYNOPSIS
4152
    get_sort_and_sweep_cost()
4153
      table       Table being accessed
4154
      nrows       Number of rows to be sorted and retrieved
4155
      cost   OUT  The cost
4156
4157
  DESCRIPTION
4158
    Get cost of these operations:
4159
     - sort an array of #nrows ROWIDs using qsort
4160
     - read #nrows records from table in a sweep.
4161
*/
4162
4163
static 
4164
void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost)
4165
{
4166
  if (nrows)
4167
  {
56 by brian
Next pass of true/false update.
4168
    get_sweep_read_cost(table, nrows, false, cost);
1 by brian
clean slate
4169
    /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
4170
    double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
4171
    if (cmp_op < 3)
4172
      cmp_op= 3;
4173
    cost->cpu_cost += cmp_op * log2(cmp_op);
4174
  }
4175
  else
4176
    cost->zero();
4177
}
4178
4179
4180
/**
4181
  Get cost of reading nrows table records in a "disk sweep"
4182
4183
  A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
4184
  for an ordered sequence of rowids.
4185
4186
  We assume hard disk IO. The read is performed as follows:
4187
4188
   1. The disk head is moved to the needed cylinder
4189
   2. The controller waits for the plate to rotate
4190
   3. The data is transferred
4191
4192
  Time to do #3 is insignificant compared to #2+#1.
4193
4194
  Time to move the disk head is proportional to head travel distance.
4195
4196
  Time to wait for the plate to rotate depends on whether the disk head
4197
  was moved or not. 
4198
4199
  If disk head wasn't moved, the wait time is proportional to distance
4200
  between the previous block and the block we're reading.
4201
4202
  If the head was moved, we don't know how much we'll need to wait for the
4203
  plate to rotate. We assume the wait time to be a variate with a mean of
4204
  0.5 of full rotation time.
4205
4206
  Our cost units are "random disk seeks". The cost of random disk seek is
4207
  actually not a constant, it depends one range of cylinders we're going
4208
  to access. We make it constant by introducing a fuzzy concept of "typical 
4209
  datafile length" (it's fuzzy as it's hard to tell whether it should
4210
  include index file, temp.tables etc). Then random seek cost is:
4211
4212
    1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
4213
4214
  We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
4215
4216
  @param table             Table to be accessed
4217
  @param nrows             Number of rows to retrieve
56 by brian
Next pass of true/false update.
4218
  @param interrupted       true <=> Assume that the disk sweep will be
4219
                           interrupted by other disk IO. false - otherwise.
1 by brian
clean slate
4220
  @param cost         OUT  The cost.
4221
*/
4222
4223
void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
4224
                         COST_VECT *cost)
4225
{
4226
  cost->zero();
4227
  if (table->file->primary_key_is_clustered())
4228
  {
4229
    cost->io_count= table->file->read_time(table->s->primary_key,
4230
                                           (uint) nrows, nrows);
4231
  }
4232
  else
4233
  {
4234
    double n_blocks=
151 by Brian Aker
Ulonglong to uint64_t
4235
      ceil(uint64_t2double(table->file->stats.data_file_length) / IO_SIZE);
1 by brian
clean slate
4236
    double busy_blocks=
4237
      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
4238
    if (busy_blocks < 1.0)
4239
      busy_blocks= 1.0;
4240
4241
    cost->io_count= busy_blocks;
4242
4243
    if (!interrupted)
4244
    {
4245
      /* Assume reading is done in one 'sweep' */
4246
      cost->avg_io_cost= (DISK_SEEK_BASE_COST +
4247
                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
4248
    }
4249
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4250
  return;
1 by brian
clean slate
4251
}
4252
4253
4254
/* **************************************************************************
4255
 * DS-MRR implementation ends
4256
 ***************************************************************************/
4257
4258
/**
4259
  Read first row between two ranges.
4260
4261
  @param start_key		Start key. Is 0 if no min range
4262
  @param end_key		End key.  Is 0 if no max range
4263
  @param eq_range_arg	        Set to 1 if start_key == end_key
4264
  @param sorted		Set to 1 if result should be sorted per key
4265
4266
  @note
4267
    Record is read into table->record[0]
4268
4269
  @retval
4270
    0			Found row
4271
  @retval
4272
    HA_ERR_END_OF_FILE	No rows in range
4273
  @retval
4274
    \#			Error code
4275
*/
4276
int handler::read_range_first(const key_range *start_key,
4277
			      const key_range *end_key,
4278
			      bool eq_range_arg,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
4279
                              bool sorted  __attribute__((unused)))
1 by brian
clean slate
4280
{
4281
  int result;
4282
4283
  eq_range= eq_range_arg;
4284
  end_range= 0;
4285
  if (end_key)
4286
  {
4287
    end_range= &save_end_range;
4288
    save_end_range= *end_key;
4289
    key_compare_result_on_equal= ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
4290
				  (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
4291
  }
4292
  range_key_part= table->key_info[active_index].key_part;
4293
4294
  if (!start_key)			// Read first record
4295
    result= index_first(table->record[0]);
4296
  else
4297
    result= index_read_map(table->record[0],
4298
                           start_key->key,
4299
                           start_key->keypart_map,
4300
                           start_key->flag);
4301
  if (result)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4302
    return((result == HA_ERR_KEY_NOT_FOUND) 
1 by brian
clean slate
4303
		? HA_ERR_END_OF_FILE
4304
		: result);
4305
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4306
  return (compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
1 by brian
clean slate
4307
}
4308
4309
4310
/**
4311
  Read next row between two endpoints.
4312
4313
  @note
4314
    Record is read into table->record[0]
4315
4316
  @retval
4317
    0			Found row
4318
  @retval
4319
    HA_ERR_END_OF_FILE	No rows in range
4320
  @retval
4321
    \#			Error code
4322
*/
4323
int handler::read_range_next()
4324
{
4325
  int result;
4326
4327
  if (eq_range)
4328
  {
4329
    /* We trust that index_next_same always gives a row in range */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4330
    return(index_next_same(table->record[0],
1 by brian
clean slate
4331
                                end_range->key,
4332
                                end_range->length));
4333
  }
4334
  result= index_next(table->record[0]);
4335
  if (result)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4336
    return(result);
4337
  return(compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
1 by brian
clean slate
4338
}
4339
4340
4341
/**
4342
  Compare if found key (in row) is over max-value.
4343
4344
  @param range		range to compare to row. May be 0 for no range
4345
4346
  @seealso
4347
    key.cc::key_cmp()
4348
4349
  @return
4350
    The return value is SIGN(key_in_row - range_key):
4351
4352
    - 0   : Key is equal to range or 'range' == 0 (no range)
4353
    - -1  : Key is less than range
4354
    - 1   : Key is larger than range
4355
*/
4356
int handler::compare_key(key_range *range)
4357
{
4358
  int cmp;
4359
  if (!range || in_range_check_pushed_down)
4360
    return 0;					// No max range
4361
  cmp= key_cmp(range_key_part, range->key, range->length);
4362
  if (!cmp)
4363
    cmp= key_compare_result_on_equal;
4364
  return cmp;
4365
}
4366
4367
4368
/*
4369
  Same as compare_key() but doesn't check have in_range_check_pushed_down.
4370
  This is used by index condition pushdown implementation.
4371
*/
4372
4373
int handler::compare_key2(key_range *range)
4374
{
4375
  int cmp;
4376
  if (!range)
4377
    return 0;					// no max range
4378
  cmp= key_cmp(range_key_part, range->key, range->length);
4379
  if (!cmp)
4380
    cmp= key_compare_result_on_equal;
4381
  return cmp;
4382
}
4383
4384
int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
4385
                                key_part_map keypart_map,
4386
                                enum ha_rkey_function find_flag)
4387
{
4388
  int error, error1;
4389
  error= index_init(index, 0);
4390
  if (!error)
4391
  {
4392
    error= index_read_map(buf, key, keypart_map, find_flag);
4393
    error1= index_end();
4394
  }
4395
  return error ?  error : error1;
4396
}
4397
4398
4399
/**
4400
  Returns a list of all known extensions.
4401
4402
    No mutexes, worst case race is a minor surplus memory allocation
4403
    We have to recreate the extension map if mysqld is restarted (for example
4404
    within libmysqld)
4405
4406
  @retval
4407
    pointer		pointer to TYPELIB structure
4408
*/
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
4409
static bool exts_handlerton(THD *unused __attribute__((unused)),
149 by Brian Aker
More bool conversion.
4410
                            plugin_ref plugin,
4411
                            void *arg)
1 by brian
clean slate
4412
{
4413
  List<char> *found_exts= (List<char> *) arg;
4414
  handlerton *hton= plugin_data(plugin, handlerton *);
4415
  handler *file;
4416
  if (hton->state == SHOW_OPTION_YES && hton->create &&
4417
      (file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
4418
  {
4419
    List_iterator_fast<char> it(*found_exts);
4420
    const char **ext, *old_ext;
4421
4422
    for (ext= file->bas_ext(); *ext; ext++)
4423
    {
4424
      while ((old_ext= it++))
4425
      {
4426
        if (!strcmp(old_ext, *ext))
4427
	  break;
4428
      }
4429
      if (!old_ext)
4430
        found_exts->push_back((char *) *ext);
4431
4432
      it.rewind();
4433
    }
4434
    delete file;
4435
  }
56 by brian
Next pass of true/false update.
4436
  return false;
1 by brian
clean slate
4437
}
4438
4439
TYPELIB *ha_known_exts(void)
4440
{
4441
  if (!known_extensions.type_names || mysys_usage_id != known_extensions_id)
4442
  {
4443
    List<char> found_exts;
4444
    const char **ext, *old_ext;
4445
4446
    known_extensions_id= mysys_usage_id;
4447
4448
    plugin_foreach(NULL, exts_handlerton,
4449
                   MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
4450
4451
    ext= (const char **) my_once_alloc(sizeof(char *)*
4452
                                       (found_exts.elements+1),
4453
                                       MYF(MY_WME | MY_FAE));
4454
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4455
    assert(ext != 0);
1 by brian
clean slate
4456
    known_extensions.count= found_exts.elements;
4457
    known_extensions.type_names= ext;
4458
4459
    List_iterator_fast<char> it(found_exts);
4460
    while ((old_ext= it++))
4461
      *ext++= old_ext;
4462
    *ext= 0;
4463
  }
4464
  return &known_extensions;
4465
}
4466
4467
4468
static bool stat_print(THD *thd, const char *type, uint type_len,
4469
                       const char *file, uint file_len,
4470
                       const char *status, uint status_len)
4471
{
4472
  Protocol *protocol= thd->protocol;
4473
  protocol->prepare_for_resend();
4474
  protocol->store(type, type_len, system_charset_info);
4475
  protocol->store(file, file_len, system_charset_info);
4476
  protocol->store(status, status_len, system_charset_info);
4477
  if (protocol->write())
56 by brian
Next pass of true/false update.
4478
    return true;
4479
  return false;
1 by brian
clean slate
4480
}
4481
4482
bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
4483
{
4484
  List<Item> field_list;
4485
  Protocol *protocol= thd->protocol;
4486
  bool result;
4487
4488
  field_list.push_back(new Item_empty_string("Type",10));
4489
  field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
4490
  field_list.push_back(new Item_empty_string("Status",10));
4491
4492
  if (protocol->send_fields(&field_list,
4493
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
56 by brian
Next pass of true/false update.
4494
    return true;
1 by brian
clean slate
4495
12.1.1 by Brian Aker
Cleaned up show status.
4496
  result= db_type->show_status &&
4497
    db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
1 by brian
clean slate
4498
4499
  if (!result)
4500
    my_eof(thd);
4501
  return result;
4502
}
4503
4504
4505
/**
4506
  Check if the conditions for row-based binlogging is correct for the table.
4507
4508
  A row in the given table should be replicated if:
4509
  - Row-based replication is enabled in the current thread
4510
  - The binlog is enabled
4511
  - It is not a temporary table
4512
  - The binary log is open
4513
  - The database the table resides in shall be binlogged (binlog_*_db rules)
4514
  - table is not mysql.event
4515
*/
4516
4517
static bool check_table_binlog_row_based(THD *thd, TABLE *table)
4518
{
4519
  if (table->s->cached_row_logging_check == -1)
4520
  {
4521
    int const check(table->s->tmp_table == NO_TMP_TABLE &&
4522
                    binlog_filter->db_ok(table->s->db.str));
4523
    table->s->cached_row_logging_check= check;
4524
  }
4525
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4526
  assert(table->s->cached_row_logging_check == 0 ||
1 by brian
clean slate
4527
              table->s->cached_row_logging_check == 1);
4528
4529
  return (thd->current_stmt_binlog_row_based &&
4530
          table->s->cached_row_logging_check &&
4531
          (thd->options & OPTION_BIN_LOG) &&
4532
          mysql_bin_log.is_open());
4533
}
4534
4535
4536
/**
4537
   Write table maps for all (manually or automatically) locked tables
4538
   to the binary log.
4539
4540
   This function will generate and write table maps for all tables
4541
   that are locked by the thread 'thd'.  Either manually locked
4542
   (stored in THD::locked_tables) and automatically locked (stored
4543
   in THD::lock) are considered.
4544
4545
   @param thd     Pointer to THD structure
4546
4547
   @retval 0   All OK
4548
   @retval 1   Failed to write all table maps
4549
4550
   @sa
4551
       THD::lock
4552
       THD::locked_tables
4553
*/
4554
4555
static int write_locked_table_maps(THD *thd)
4556
{
4557
  if (thd->get_binlog_table_maps() == 0)
4558
  {
4559
    MYSQL_LOCK *locks[3];
4560
    locks[0]= thd->extra_lock;
4561
    locks[1]= thd->lock;
4562
    locks[2]= thd->locked_tables;
4563
    for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
4564
    {
4565
      MYSQL_LOCK const *const lock= locks[i];
4566
      if (lock == NULL)
4567
        continue;
4568
4569
      TABLE **const end_ptr= lock->table + lock->table_count;
4570
      for (TABLE **table_ptr= lock->table ; 
4571
           table_ptr != end_ptr ;
4572
           ++table_ptr)
4573
      {
4574
        TABLE *const table= *table_ptr;
4575
        if (table->current_lock == F_WRLCK &&
4576
            check_table_binlog_row_based(thd, table))
4577
        {
4578
          int const has_trans= table->file->has_transactions();
4579
          int const error= thd->binlog_write_table_map(table, has_trans);
4580
          /*
4581
            If an error occurs, it is the responsibility of the caller to
4582
            roll back the transaction.
4583
          */
4584
          if (unlikely(error))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4585
            return(1);
1 by brian
clean slate
4586
        }
4587
      }
4588
    }
4589
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4590
  return(0);
1 by brian
clean slate
4591
}
4592
4593
4594
typedef bool Log_func(THD*, TABLE*, bool, const uchar*, const uchar*);
4595
4596
static int binlog_log_row(TABLE* table,
4597
                          const uchar *before_record,
4598
                          const uchar *after_record,
4599
                          Log_func *log_func)
4600
{
4601
  if (table->no_replicate)
4602
    return 0;
4603
  bool error= 0;
4604
  THD *const thd= table->in_use;
4605
4606
  if (check_table_binlog_row_based(thd, table))
4607
  {
4608
    /*
4609
      If there are no table maps written to the binary log, this is
4610
      the first row handled in this statement. In that case, we need
4611
      to write table maps for all locked tables to the binary log.
4612
    */
4613
    if (likely(!(error= write_locked_table_maps(thd))))
4614
    {
4615
      bool const has_trans= table->file->has_transactions();
4616
      error= (*log_func)(thd, table, has_trans, before_record, after_record);
4617
    }
4618
  }
4619
  return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
4620
}
4621
4622
int handler::ha_external_lock(THD *thd, int lock_type)
4623
{
4624
  /*
4625
    Whether this is lock or unlock, this should be true, and is to verify that
4626
    if get_auto_increment() was called (thus may have reserved intervals or
4627
    taken a table lock), ha_release_auto_increment() was too.
4628
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4629
  assert(next_insert_id == 0);
1 by brian
clean slate
4630
4631
  /*
4632
    We cache the table flags if the locking succeeded. Otherwise, we
4633
    keep them as they were when they were fetched in ha_open().
4634
  */
4635
  MYSQL_EXTERNAL_LOCK(lock_type);
4636
4637
  int error= external_lock(thd, lock_type);
4638
  if (error == 0)
4639
    cached_table_flags= table_flags();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4640
  return(error);
1 by brian
clean slate
4641
}
4642
4643
4644
/**
4645
  Check handler usage and reset state of file to after 'open'
4646
*/
4647
int handler::ha_reset()
4648
{
4649
  /* Check that we have called all proper deallocation functions */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4650
  assert((uchar*) table->def_read_set.bitmap +
1 by brian
clean slate
4651
              table->s->column_bitmap_size ==
4652
              (uchar*) table->def_write_set.bitmap);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4653
  assert(bitmap_is_set_all(&table->s->all_set));
4654
  assert(table->key_read == 0);
1 by brian
clean slate
4655
  /* ensure that ha_index_end / ha_rnd_end has been called */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4656
  assert(inited == NONE);
1 by brian
clean slate
4657
  /* Free cache used by filesort */
4658
  free_io_cache(table);
4659
  /* reset the bitmaps to point to defaults */
4660
  table->default_column_bitmaps();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4661
  return(reset());
1 by brian
clean slate
4662
}
4663
4664
4665
int handler::ha_write_row(uchar *buf)
4666
{
4667
  int error;
4668
  Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
4669
  MYSQL_INSERT_ROW_START();
4670
4671
  mark_trx_read_write();
4672
4673
  if (unlikely(error= write_row(buf)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4674
    return(error);
1 by brian
clean slate
4675
  if (unlikely(error= binlog_log_row(table, 0, buf, log_func)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4676
    return(error); /* purecov: inspected */
1 by brian
clean slate
4677
  MYSQL_INSERT_ROW_END();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4678
  return(0);
1 by brian
clean slate
4679
}
4680
4681
4682
int handler::ha_update_row(const uchar *old_data, uchar *new_data)
4683
{
4684
  int error;
4685
  Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
4686
4687
  /*
4688
    Some storage engines require that the new record is in record[0]
4689
    (and the old record is in record[1]).
4690
   */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4691
  assert(new_data == table->record[0]);
1 by brian
clean slate
4692
4693
  mark_trx_read_write();
4694
4695
  if (unlikely(error= update_row(old_data, new_data)))
4696
    return error;
4697
  if (unlikely(error= binlog_log_row(table, old_data, new_data, log_func)))
4698
    return error;
4699
  return 0;
4700
}
4701
4702
int handler::ha_delete_row(const uchar *buf)
4703
{
4704
  int error;
4705
  Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
4706
4707
  mark_trx_read_write();
4708
4709
  if (unlikely(error= delete_row(buf)))
4710
    return error;
4711
  if (unlikely(error= binlog_log_row(table, buf, 0, log_func)))
4712
    return error;
4713
  return 0;
4714
}
4715
4716
4717
4718
/**
4719
  @details
4720
  use_hidden_primary_key() is called in case of an update/delete when
4721
  (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
4722
  but we don't have a primary key
4723
*/
4724
void handler::use_hidden_primary_key()
4725
{
4726
  /* fallback to use all columns in the table to identify row */
4727
  table->use_all_columns();
4728
}