~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000-2006 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/**
17
  @file handler.cc
18
19
  Handler-calling-functions
20
*/
21
22
#ifdef USE_PRAGMA_IMPLEMENTATION
23
#pragma implementation				// gcc: Class implementation
24
#endif
25
26
#include "mysql_priv.h"
27
#include "rpl_filter.h"
28
#include <myisampack.h>
29
#include <errno.h>
30
31
/*
32
  While we have legacy_db_type, we have this array to
33
  check for dups and to find handlerton from legacy_db_type.
34
  Remove when legacy_db_type is finally gone
35
*/
36
st_plugin_int *hton2plugin[MAX_HA];
37
38
static handlerton *installed_htons[128];
39
40
#define BITMAP_STACKBUF_SIZE (128/8)
41
42
KEY_CREATE_INFO default_key_create_info= { HA_KEY_ALG_UNDEF, 0, {NullS,0}, {NullS,0} };
43
44
/* number of entries in handlertons[] */
61 by Brian Aker
Conversion of handler type.
45
uint32_t total_ha= 0;
1 by brian
clean slate
46
/* number of storage engines (from handlertons[]) that support 2pc */
61 by Brian Aker
Conversion of handler type.
47
uint32_t total_ha_2pc= 0;
1 by brian
clean slate
48
/* size of savepoint storage area (see ha_init) */
61 by Brian Aker
Conversion of handler type.
49
uint32_t savepoint_alloc_size= 0;
1 by brian
clean slate
50
51
static const LEX_STRING sys_table_aliases[]=
52
{
53
  { C_STRING_WITH_LEN("INNOBASE") },  { C_STRING_WITH_LEN("INNODB") },
54
  { C_STRING_WITH_LEN("HEAP") },      { C_STRING_WITH_LEN("MEMORY") },
55
  {NullS, 0}
56
};
57
58
const char *ha_row_type[] = {
59
  "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE", "?","?","?"
60
};
61
62
const char *tx_isolation_names[] =
63
{ "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
64
  NullS};
65
TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
66
			       tx_isolation_names, NULL};
67
68
static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
69
uint known_extensions_id= 0;
70
71
72
73
static plugin_ref ha_default_plugin(THD *thd)
74
{
75
  if (thd->variables.table_plugin)
76
    return thd->variables.table_plugin;
77
  return my_plugin_lock(thd, &global_system_variables.table_plugin);
78
}
79
80
81
/**
82
  Return the default storage engine handlerton for thread
83
84
  @param ha_default_handlerton(thd)
85
  @param thd         current thread
86
87
  @return
88
    pointer to handlerton
89
*/
90
handlerton *ha_default_handlerton(THD *thd)
91
{
92
  plugin_ref plugin= ha_default_plugin(thd);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
93
  assert(plugin);
1 by brian
clean slate
94
  handlerton *hton= plugin_data(plugin, handlerton*);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
95
  assert(hton);
1 by brian
clean slate
96
  return hton;
97
}
98
99
100
/**
101
  Return the storage engine handlerton for the supplied name
102
  
103
  @param thd         current thread
104
  @param name        name of storage engine
105
  
106
  @return
107
    pointer to storage engine plugin handle
108
*/
109
plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name)
110
{
111
  const LEX_STRING *table_alias;
112
  plugin_ref plugin;
113
114
redo:
115
  /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
116
  if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
117
                           (const uchar *)name->str, name->length,
118
                           (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
119
    return ha_default_plugin(thd);
120
121
  if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
122
  {
123
    handlerton *hton= plugin_data(plugin, handlerton *);
124
    if (!(hton->flags & HTON_NOT_USER_SELECTABLE))
125
      return plugin;
126
      
127
    /*
128
      unlocking plugin immediately after locking is relatively low cost.
129
    */
130
    plugin_unlock(thd, plugin);
131
  }
132
133
  /*
134
    We check for the historical aliases.
135
  */
136
  for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
137
  {
138
    if (!my_strnncoll(&my_charset_latin1,
139
                      (const uchar *)name->str, name->length,
140
                      (const uchar *)table_alias->str, table_alias->length))
141
    {
142
      name= table_alias + 1;
143
      goto redo;
144
    }
145
  }
146
147
  return NULL;
148
}
149
150
151
plugin_ref ha_lock_engine(THD *thd, handlerton *hton)
152
{
153
  if (hton)
154
  {
155
    st_plugin_int **plugin= hton2plugin + hton->slot;
156
    
157
    return my_plugin_lock(thd, &plugin);
158
  }
159
  return NULL;
160
}
161
162
163
handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
164
{
165
  plugin_ref plugin;
166
  switch (db_type) {
167
  case DB_TYPE_DEFAULT:
168
    return ha_default_handlerton(thd);
169
  default:
170
    if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
171
        (plugin= ha_lock_engine(thd, installed_htons[db_type])))
172
      return plugin_data(plugin, handlerton*);
173
    /* fall through */
174
  case DB_TYPE_UNKNOWN:
175
    return NULL;
176
  }
177
}
178
179
180
/**
181
  Use other database handler if databasehandler is not compiled in.
182
*/
183
handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
184
                          bool no_substitute, bool report_error)
185
{
186
  handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
187
  if (ha_storage_engine_is_enabled(hton))
188
    return hton;
189
190
  if (no_substitute)
191
  {
192
    if (report_error)
193
    {
194
      const char *engine_name= ha_resolve_storage_engine_name(hton);
195
      my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
196
    }
197
    return NULL;
198
  }
199
200
  switch (database_type) {
201
  case DB_TYPE_HASH:
202
    return ha_resolve_by_legacy_type(thd, DB_TYPE_HASH);
203
  default:
204
    break;
205
  }
206
207
  return ha_default_handlerton(thd);
208
} /* ha_checktype */
209
210
211
handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
212
                         handlerton *db_type)
213
{
214
  handler *file;
215
216
  if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
217
  {
218
    if ((file= db_type->create(db_type, share, alloc)))
219
      file->init();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
220
    return(file);
1 by brian
clean slate
221
  }
222
  /*
223
    Try the default table type
224
    Here the call to current_thd() is ok as we call this function a lot of
225
    times but we enter this branch very seldom.
226
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
227
  return(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
1 by brian
clean slate
228
}
229
230
231
/**
232
  Register handler error messages for use with my_error().
233
234
  @retval
235
    0           OK
236
  @retval
237
    !=0         Error
238
*/
239
240
int ha_init_errors(void)
241
{
242
#define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg)
243
  const char    **errmsgs;
244
245
  /* Allocate a pointer array for the error message strings. */
246
  /* Zerofill it to avoid uninitialized gaps. */
247
  if (! (errmsgs= (const char**) my_malloc(HA_ERR_ERRORS * sizeof(char*),
248
                                           MYF(MY_WME | MY_ZEROFILL))))
249
    return 1;
250
251
  /* Set the dedicated error messages. */
252
  SETMSG(HA_ERR_KEY_NOT_FOUND,          ER(ER_KEY_NOT_FOUND));
253
  SETMSG(HA_ERR_FOUND_DUPP_KEY,         ER(ER_DUP_KEY));
254
  SETMSG(HA_ERR_RECORD_CHANGED,         "Update wich is recoverable");
255
  SETMSG(HA_ERR_WRONG_INDEX,            "Wrong index given to function");
256
  SETMSG(HA_ERR_CRASHED,                ER(ER_NOT_KEYFILE));
257
  SETMSG(HA_ERR_WRONG_IN_RECORD,        ER(ER_CRASHED_ON_USAGE));
258
  SETMSG(HA_ERR_OUT_OF_MEM,             "Table handler out of memory");
259
  SETMSG(HA_ERR_NOT_A_TABLE,            "Incorrect file format '%.64s'");
260
  SETMSG(HA_ERR_WRONG_COMMAND,          "Command not supported");
261
  SETMSG(HA_ERR_OLD_FILE,               ER(ER_OLD_KEYFILE));
262
  SETMSG(HA_ERR_NO_ACTIVE_RECORD,       "No record read in update");
263
  SETMSG(HA_ERR_RECORD_DELETED,         "Intern record deleted");
264
  SETMSG(HA_ERR_RECORD_FILE_FULL,       ER(ER_RECORD_FILE_FULL));
265
  SETMSG(HA_ERR_INDEX_FILE_FULL,        "No more room in index file '%.64s'");
266
  SETMSG(HA_ERR_END_OF_FILE,            "End in next/prev/first/last");
267
  SETMSG(HA_ERR_UNSUPPORTED,            ER(ER_ILLEGAL_HA));
268
  SETMSG(HA_ERR_TO_BIG_ROW,             "Too big row");
269
  SETMSG(HA_WRONG_CREATE_OPTION,        "Wrong create option");
270
  SETMSG(HA_ERR_FOUND_DUPP_UNIQUE,      ER(ER_DUP_UNIQUE));
271
  SETMSG(HA_ERR_UNKNOWN_CHARSET,        "Can't open charset");
272
  SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF,    ER(ER_WRONG_MRG_TABLE));
273
  SETMSG(HA_ERR_CRASHED_ON_REPAIR,      ER(ER_CRASHED_ON_REPAIR));
274
  SETMSG(HA_ERR_CRASHED_ON_USAGE,       ER(ER_CRASHED_ON_USAGE));
275
  SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT,      ER(ER_LOCK_WAIT_TIMEOUT));
276
  SETMSG(HA_ERR_LOCK_TABLE_FULL,        ER(ER_LOCK_TABLE_FULL));
277
  SETMSG(HA_ERR_READ_ONLY_TRANSACTION,  ER(ER_READ_ONLY_TRANSACTION));
278
  SETMSG(HA_ERR_LOCK_DEADLOCK,          ER(ER_LOCK_DEADLOCK));
279
  SETMSG(HA_ERR_CANNOT_ADD_FOREIGN,     ER(ER_CANNOT_ADD_FOREIGN));
280
  SETMSG(HA_ERR_NO_REFERENCED_ROW,      ER(ER_NO_REFERENCED_ROW_2));
281
  SETMSG(HA_ERR_ROW_IS_REFERENCED,      ER(ER_ROW_IS_REFERENCED_2));
282
  SETMSG(HA_ERR_NO_SAVEPOINT,           "No savepoint with that name");
283
  SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE,  "Non unique key block size");
284
  SETMSG(HA_ERR_NO_SUCH_TABLE,          "No such table: '%.64s'");
285
  SETMSG(HA_ERR_TABLE_EXIST,            ER(ER_TABLE_EXISTS_ERROR));
286
  SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
287
  SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER(ER_TABLE_DEF_CHANGED));
288
  SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,  "FK constraint would lead to duplicate key");
289
  SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    ER(ER_TABLE_NEEDS_UPGRADE));
290
  SETMSG(HA_ERR_TABLE_READONLY,         ER(ER_OPEN_AS_READONLY));
291
  SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER(ER_AUTOINC_READ_FAILED));
292
  SETMSG(HA_ERR_AUTOINC_ERANGE,         ER(ER_WARN_DATA_OUT_OF_RANGE));
293
294
  /* Register the error messages for use with my_error(). */
295
  return my_error_register(errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
296
}
297
298
299
/**
300
  Unregister handler error messages.
301
302
  @retval
303
    0           OK
304
  @retval
305
    !=0         Error
306
*/
307
static int ha_finish_errors(void)
308
{
309
  const char    **errmsgs;
310
311
  /* Allocate a pointer array for the error message strings. */
312
  if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST)))
313
    return 1;
314
  my_free((uchar*) errmsgs, MYF(0));
315
  return 0;
316
}
317
318
319
int ha_finalize_handlerton(st_plugin_int *plugin)
320
{
321
  handlerton *hton= (handlerton *)plugin->data;
322
323
  switch (hton->state)
324
  {
325
  case SHOW_OPTION_NO:
326
  case SHOW_OPTION_DISABLED:
327
    break;
328
  case SHOW_OPTION_YES:
329
    if (installed_htons[hton->db_type] == hton)
330
      installed_htons[hton->db_type]= NULL;
331
    break;
332
  };
333
334
  if (hton->panic)
335
    hton->panic(hton, HA_PANIC_CLOSE);
336
337
  my_free((uchar*)hton, MYF(0));
338
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
339
  return(0);
1 by brian
clean slate
340
}
341
342
343
int ha_initialize_handlerton(st_plugin_int *plugin)
344
{
345
  handlerton *hton;
346
347
  hton= (handlerton *)my_malloc(sizeof(handlerton),
348
                                MYF(MY_WME | MY_ZEROFILL));
349
  /* 
350
    FIXME: the MY_ZEROFILL flag above doesn't zero all the bytes.
351
    
352
    This was detected after adding get_backup_engine member to handlerton
353
    structure. Apparently get_backup_engine was not NULL even though it was
354
    not initialized.
355
   */
356
  bzero(hton, sizeof(hton));
357
  /* Historical Requirement */
358
  plugin->data= hton; // shortcut for the future
359
  if (plugin->plugin->init)
360
  {
361
    if (plugin->plugin->init(hton))
362
    {
363
      sql_print_error("Plugin '%s' init function returned error.",
364
                      plugin->name.str);
365
      goto err;
366
    }
367
  }
368
369
  /*
370
    the switch below and hton->state should be removed when
371
    command-line options for plugins will be implemented
372
  */
373
  switch (hton->state) {
374
  case SHOW_OPTION_NO:
375
    break;
376
  case SHOW_OPTION_YES:
377
    {
378
      uint tmp;
379
      /* now check the db_type for conflict */
380
      if (hton->db_type <= DB_TYPE_UNKNOWN ||
381
          hton->db_type >= DB_TYPE_DEFAULT ||
382
          installed_htons[hton->db_type])
383
      {
384
        int idx= (int) DB_TYPE_FIRST_DYNAMIC;
385
386
        while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
387
          idx++;
388
389
        if (idx == (int) DB_TYPE_DEFAULT)
390
        {
391
          sql_print_warning("Too many storage engines!");
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
392
          return(1);
1 by brian
clean slate
393
        }
394
        if (hton->db_type != DB_TYPE_UNKNOWN)
395
          sql_print_warning("Storage engine '%s' has conflicting typecode. "
396
                            "Assigning value %d.", plugin->plugin->name, idx);
397
        hton->db_type= (enum legacy_db_type) idx;
398
      }
399
      installed_htons[hton->db_type]= hton;
400
      tmp= hton->savepoint_offset;
401
      hton->savepoint_offset= savepoint_alloc_size;
402
      savepoint_alloc_size+= tmp;
403
      hton->slot= total_ha++;
404
      hton2plugin[hton->slot]=plugin;
405
      if (hton->prepare)
406
        total_ha_2pc++;
407
      break;
408
    }
409
    /* fall through */
410
  default:
411
    hton->state= SHOW_OPTION_DISABLED;
412
    break;
413
  }
414
  
415
  /* 
416
    This is entirely for legacy. We will create a new "disk based" hton and a 
417
    "memory" hton which will be configurable longterm. We should be able to 
418
    remove partition and myisammrg.
419
  */
420
  switch (hton->db_type) {
421
  case DB_TYPE_HEAP:
422
    heap_hton= hton;
423
    break;
424
  case DB_TYPE_MYISAM:
425
    myisam_hton= hton;
426
    break;
427
  default:
428
    break;
429
  };
430
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
431
  return(0);
1 by brian
clean slate
432
err:
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
433
  return(1);
1 by brian
clean slate
434
}
435
436
int ha_init()
437
{
438
  int error= 0;
439
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
440
  assert(total_ha < MAX_HA);
1 by brian
clean slate
441
  /*
442
    Check if there is a transaction-capable storage engine besides the
443
    binary log (which is considered a transaction-capable storage engine in
444
    counting total_ha)
445
  */
61 by Brian Aker
Conversion of handler type.
446
  opt_using_transactions= total_ha>(uint32_t)opt_bin_log;
1 by brian
clean slate
447
  savepoint_alloc_size+= sizeof(SAVEPOINT);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
448
  return(error);
1 by brian
clean slate
449
}
450
451
int ha_end()
452
{
453
  int error= 0;
454
455
  /* 
456
    This should be eventualy based  on the graceful shutdown flag.
457
    So if flag is equal to HA_PANIC_CLOSE, the deallocate
458
    the errors.
459
  */
460
  if (ha_finish_errors())
461
    error= 1;
462
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
463
  return(error);
1 by brian
clean slate
464
}
465
77.1.15 by Monty Taylor
Bunch of warning cleanups.
466
static my_bool dropdb_handlerton(THD *unused1 __attribute__((__unused__)),
467
                                 plugin_ref plugin,
1 by brian
clean slate
468
                                 void *path)
469
{
470
  handlerton *hton= plugin_data(plugin, handlerton *);
471
  if (hton->state == SHOW_OPTION_YES && hton->drop_database)
472
    hton->drop_database(hton, (char *)path);
56 by brian
Next pass of true/false update.
473
  return false;
1 by brian
clean slate
474
}
475
476
477
void ha_drop_database(char* path)
478
{
479
  plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
480
}
481
482
483
static my_bool closecon_handlerton(THD *thd, plugin_ref plugin,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
484
                                   void *unused __attribute__((__unused__)))
1 by brian
clean slate
485
{
486
  handlerton *hton= plugin_data(plugin, handlerton *);
487
  /*
488
    there's no need to rollback here as all transactions must
489
    be rolled back already
490
  */
491
  if (hton->state == SHOW_OPTION_YES && hton->close_connection &&
492
      thd_get_ha_data(thd, hton))
493
    hton->close_connection(hton, thd);
56 by brian
Next pass of true/false update.
494
  return false;
1 by brian
clean slate
495
}
496
497
498
/**
499
  @note
500
    don't bother to rollback here, it's done already
501
*/
502
void ha_close_connection(THD* thd)
503
{
504
  plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
505
}
506
507
/* ========================================================================
508
 ======================= TRANSACTIONS ===================================*/
509
510
/**
511
  Transaction handling in the server
512
  ==================================
513
514
  In each client connection, MySQL maintains two transactional
515
  states:
516
  - a statement transaction,
517
  - a standard, also called normal transaction.
518
519
  Historical note
520
  ---------------
521
  "Statement transaction" is a non-standard term that comes
522
  from the times when MySQL supported BerkeleyDB storage engine.
523
524
  First of all, it should be said that in BerkeleyDB auto-commit
525
  mode auto-commits operations that are atomic to the storage
526
  engine itself, such as a write of a record, and are too
527
  high-granular to be atomic from the application perspective
528
  (MySQL). One SQL statement could involve many BerkeleyDB
529
  auto-committed operations and thus BerkeleyDB auto-commit was of
530
  little use to MySQL.
531
532
  Secondly, instead of SQL standard savepoints, BerkeleyDB
533
  provided the concept of "nested transactions". In a nutshell,
534
  transactions could be arbitrarily nested, but when the parent
535
  transaction was committed or aborted, all its child (nested)
536
  transactions were handled committed or aborted as well.
537
  Commit of a nested transaction, in turn, made its changes
538
  visible, but not durable: it destroyed the nested transaction,
539
  all its changes would become available to the parent and
540
  currently active nested transactions of this parent.
541
542
  So the mechanism of nested transactions was employed to
543
  provide "all or nothing" guarantee of SQL statements
544
  required by the standard.
545
  A nested transaction would be created at start of each SQL
546
  statement, and destroyed (committed or aborted) at statement
547
  end. Such nested transaction was internally referred to as
548
  a "statement transaction" and gave birth to the term.
549
550
  <Historical note ends>
551
552
  Since then a statement transaction is started for each statement
553
  that accesses transactional tables or uses the binary log.  If
554
  the statement succeeds, the statement transaction is committed.
555
  If the statement fails, the transaction is rolled back. Commits
556
  of statement transactions are not durable -- each such
557
  transaction is nested in the normal transaction, and if the
558
  normal transaction is rolled back, the effects of all enclosed
559
  statement transactions are undone as well.  Technically,
560
  a statement transaction can be viewed as a savepoint which is
561
  maintained automatically in order to make effects of one
562
  statement atomic.
563
564
  The normal transaction is started by the user and is ended
565
  usually upon a user request as well. The normal transaction
566
  encloses transactions of all statements issued between
567
  its beginning and its end.
568
  In autocommit mode, the normal transaction is equivalent
569
  to the statement transaction.
570
571
  Since MySQL supports PSEA (pluggable storage engine
572
  architecture), more than one transactional engine can be
573
  active at a time. Hence transactions, from the server
574
  point of view, are always distributed. In particular,
575
  transactional state is maintained independently for each
576
  engine. In order to commit a transaction the two phase
577
  commit protocol is employed.
578
579
  Not all statements are executed in context of a transaction.
580
  Administrative and status information statements do not modify
581
  engine data, and thus do not start a statement transaction and
582
  also have no effect on the normal transaction. Examples of such
583
  statements are SHOW STATUS and RESET SLAVE.
584
585
  Similarly DDL statements are not transactional,
586
  and therefore a transaction is [almost] never started for a DDL
587
  statement. The difference between a DDL statement and a purely
588
  administrative statement though is that a DDL statement always
589
  commits the current transaction before proceeding, if there is
590
  any.
591
592
  At last, SQL statements that work with non-transactional
593
  engines also have no effect on the transaction state of the
594
  connection. Even though they are written to the binary log,
595
  and the binary log is, overall, transactional, the writes
596
  are done in "write-through" mode, directly to the binlog
597
  file, followed with a OS cache sync, in other words,
598
  bypassing the binlog undo log (translog).
599
  They do not commit the current normal transaction.
600
  A failure of a statement that uses non-transactional tables
601
  would cause a rollback of the statement transaction, but
602
  in case there no non-transactional tables are used,
603
  no statement transaction is started.
604
605
  Data layout
606
  -----------
607
608
  The server stores its transaction-related data in
609
  thd->transaction. This structure has two members of type
610
  THD_TRANS. These members correspond to the statement and
611
  normal transactions respectively:
612
613
  - thd->transaction.stmt contains a list of engines
614
  that are participating in the given statement
615
  - thd->transaction.all contains a list of engines that
616
  have participated in any of the statement transactions started
617
  within the context of the normal transaction.
618
  Each element of the list contains a pointer to the storage
619
  engine, engine-specific transactional data, and engine-specific
620
  transaction flags.
621
622
  In autocommit mode thd->transaction.all is empty.
623
  Instead, data of thd->transaction.stmt is
624
  used to commit/rollback the normal transaction.
625
626
  The list of registered engines has a few important properties:
627
  - no engine is registered in the list twice
628
  - engines are present in the list a reverse temporal order --
629
  new participants are always added to the beginning of the list.
630
631
  Transaction life cycle
632
  ----------------------
633
634
  When a new connection is established, thd->transaction
635
  members are initialized to an empty state.
636
  If a statement uses any tables, all affected engines
637
  are registered in the statement engine list. In
638
  non-autocommit mode, the same engines are registered in
639
  the normal transaction list.
640
  At the end of the statement, the server issues a commit
641
  or a roll back for all engines in the statement list.
642
  At this point transaction flags of an engine, if any, are
643
  propagated from the statement list to the list of the normal
644
  transaction.
645
  When commit/rollback is finished, the statement list is
646
  cleared. It will be filled in again by the next statement,
647
  and emptied again at the next statement's end.
648
649
  The normal transaction is committed in a similar way
650
  (by going over all engines in thd->transaction.all list)
651
  but at different times:
652
  - upon COMMIT SQL statement is issued by the user
653
  - implicitly, by the server, at the beginning of a DDL statement
654
  or SET AUTOCOMMIT={0|1} statement.
655
656
  The normal transaction can be rolled back as well:
657
  - if the user has requested so, by issuing ROLLBACK SQL
658
  statement
659
  - if one of the storage engines requested a rollback
660
  by setting thd->transaction_rollback_request. This may
661
  happen in case, e.g., when the transaction in the engine was
662
  chosen a victim of the internal deadlock resolution algorithm
663
  and rolled back internally. When such a situation happens, there
664
  is little the server can do and the only option is to rollback
665
  transactions in all other participating engines.  In this case
666
  the rollback is accompanied by an error sent to the user.
667
668
  As follows from the use cases above, the normal transaction
669
  is never committed when there is an outstanding statement
670
  transaction. In most cases there is no conflict, since
671
  commits of the normal transaction are issued by a stand-alone
672
  administrative or DDL statement, thus no outstanding statement
673
  transaction of the previous statement exists. Besides,
674
  all statements that manipulate with the normal transaction
675
  are prohibited in stored functions and triggers, therefore
676
  no conflicting situation can occur in a sub-statement either.
677
  The remaining rare cases when the server explicitly has
678
  to commit the statement transaction prior to committing the normal
679
  one cover error-handling scenarios (see for example
680
  SQLCOM_LOCK_TABLES).
681
682
  When committing a statement or a normal transaction, the server
683
  either uses the two-phase commit protocol, or issues a commit
684
  in each engine independently. The two-phase commit protocol
685
  is used only if:
686
  - all participating engines support two-phase commit (provide
687
    handlerton::prepare PSEA API call) and
688
  - transactions in at least two engines modify data (i.e. are
689
  not read-only).
690
691
  Note that the two phase commit is used for
692
  statement transactions, even though they are not durable anyway.
693
  This is done to ensure logical consistency of data in a multiple-
694
  engine transaction.
695
  For example, imagine that some day MySQL supports unique
696
  constraint checks deferred till the end of statement. In such
697
  case a commit in one of the engines may yield ER_DUP_KEY,
698
  and MySQL should be able to gracefully abort statement
699
  transactions of other participants.
700
701
  After the normal transaction has been committed,
702
  thd->transaction.all list is cleared.
703
704
  When a connection is closed, the current normal transaction, if
705
  any, is rolled back.
706
707
  Roles and responsibilities
708
  --------------------------
709
710
  The server has no way to know that an engine participates in
711
  the statement and a transaction has been started
712
  in it unless the engine says so. Thus, in order to be
713
  a part of a transaction, the engine must "register" itself.
714
  This is done by invoking trans_register_ha() server call.
715
  Normally the engine registers itself whenever handler::external_lock()
716
  is called. trans_register_ha() can be invoked many times: if
717
  an engine is already registered, the call does nothing.
718
  In case autocommit is not set, the engine must register itself
719
  twice -- both in the statement list and in the normal transaction
720
  list.
721
  In which list to register is a parameter of trans_register_ha().
722
723
  Note, that although the registration interface in itself is
724
  fairly clear, the current usage practice often leads to undesired
725
  effects. E.g. since a call to trans_register_ha() in most engines
726
  is embedded into implementation of handler::external_lock(), some
727
  DDL statements start a transaction (at least from the server
728
  point of view) even though they are not expected to. E.g.
729
  CREATE TABLE does not start a transaction, since
730
  handler::external_lock() is never called during CREATE TABLE. But
731
  CREATE TABLE ... SELECT does, since handler::external_lock() is
732
  called for the table that is being selected from. This has no
733
  practical effects currently, but must be kept in mind
734
  nevertheless.
735
736
  Once an engine is registered, the server will do the rest
737
  of the work.
738
739
  During statement execution, whenever any of data-modifying
740
  PSEA API methods is used, e.g. handler::write_row() or
741
  handler::update_row(), the read-write flag is raised in the
742
  statement transaction for the involved engine.
743
  Currently All PSEA calls are "traced", and the data can not be
744
  changed in a way other than issuing a PSEA call. Important:
745
  unless this invariant is preserved the server will not know that
746
  a transaction in a given engine is read-write and will not
747
  involve the two-phase commit protocol!
748
749
  At the end of a statement, server call
750
  ha_autocommit_or_rollback() is invoked. This call in turn
751
  invokes handlerton::prepare() for every involved engine.
752
  Prepare is followed by a call to handlerton::commit_one_phase()
753
  If a one-phase commit will suffice, handlerton::prepare() is not
754
  invoked and the server only calls handlerton::commit_one_phase().
755
  At statement commit, the statement-related read-write engine
756
  flag is propagated to the corresponding flag in the normal
757
  transaction.  When the commit is complete, the list of registered
758
  engines is cleared.
759
760
  Rollback is handled in a similar fashion.
761
762
  Additional notes on DDL and the normal transaction.
763
  ---------------------------------------------------
764
765
  DDLs and operations with non-transactional engines
766
  do not "register" in thd->transaction lists, and thus do not
767
  modify the transaction state. Besides, each DDL in
768
  MySQL is prefixed with an implicit normal transaction commit
769
  (a call to end_active_trans()), and thus leaves nothing
770
  to modify.
771
  However, as it has been pointed out with CREATE TABLE .. SELECT,
772
  some DDL statements can start a *new* transaction.
773
774
  Behaviour of the server in this case is currently badly
775
  defined.
776
  DDL statements use a form of "semantic" logging
777
  to maintain atomicity: if CREATE TABLE .. SELECT failed,
778
  the newly created table is deleted.
779
  In addition, some DDL statements issue interim transaction
780
  commits: e.g. ALTER TABLE issues a commit after data is copied
781
  from the original table to the internal temporary table. Other
782
  statements, e.g. CREATE TABLE ... SELECT do not always commit
783
  after itself.
784
  And finally there is a group of DDL statements such as
785
  RENAME/DROP TABLE that doesn't start a new transaction
786
  and doesn't commit.
787
788
  This diversity makes it hard to say what will happen if
789
  by chance a stored function is invoked during a DDL --
790
  whether any modifications it makes will be committed or not
791
  is not clear. Fortunately, SQL grammar of few DDLs allows
792
  invocation of a stored function.
793
794
  A consistent behaviour is perhaps to always commit the normal
795
  transaction after all DDLs, just like the statement transaction
796
  is always committed at the end of all statements.
797
*/
798
799
/**
800
  Register a storage engine for a transaction.
801
802
  Every storage engine MUST call this function when it starts
803
  a transaction or a statement (that is it must be called both for the
804
  "beginning of transaction" and "beginning of statement").
805
  Only storage engines registered for the transaction/statement
806
  will know when to commit/rollback it.
807
808
  @note
809
    trans_register_ha is idempotent - storage engine may register many
810
    times per transaction.
811
812
*/
813
void trans_register_ha(THD *thd, bool all, handlerton *ht_arg)
814
{
815
  THD_TRANS *trans;
816
  Ha_trx_info *ha_info;
817
818
  if (all)
819
  {
820
    trans= &thd->transaction.all;
821
    thd->server_status|= SERVER_STATUS_IN_TRANS;
822
  }
823
  else
824
    trans= &thd->transaction.stmt;
825
826
  ha_info= thd->ha_data[ht_arg->slot].ha_info + static_cast<unsigned>(all);
827
828
  if (ha_info->is_started())
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
829
    return; /* already registered, return */
1 by brian
clean slate
830
831
  ha_info->register_ha(trans, ht_arg);
832
833
  trans->no_2pc|=(ht_arg->prepare==0);
834
  if (thd->transaction.xid_state.xid.is_null())
835
    thd->transaction.xid_state.xid.set(thd->query_id);
836
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
837
  return;
1 by brian
clean slate
838
}
839
840
/**
841
  @retval
842
    0   ok
843
  @retval
844
    1   error, transaction was rolled back
845
*/
846
int ha_prepare(THD *thd)
847
{
848
  int error=0, all=1;
849
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
850
  Ha_trx_info *ha_info= trans->ha_list;
851
  if (ha_info)
852
  {
853
    for (; ha_info; ha_info= ha_info->next())
854
    {
855
      int err;
856
      handlerton *ht= ha_info->ht();
857
      status_var_increment(thd->status_var.ha_prepare_count);
858
      if (ht->prepare)
859
      {
860
        if ((err= ht->prepare(ht, thd, all)))
861
        {
862
          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
863
          ha_rollback_trans(thd, all);
864
          error=1;
865
          break;
866
        }
867
      }
868
      else
869
      {
870
        push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
871
                            ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
872
                            ha_resolve_storage_engine_name(ht));
873
      }
874
    }
875
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
876
  return(error);
1 by brian
clean slate
877
}
878
879
/**
880
  Check if we can skip the two-phase commit.
881
882
  A helper function to evaluate if two-phase commit is mandatory.
883
  As a side effect, propagates the read-only/read-write flags
884
  of the statement transaction to its enclosing normal transaction.
885
56 by brian
Next pass of true/false update.
886
  @retval true   we must run a two-phase commit. Returned
1 by brian
clean slate
887
                 if we have at least two engines with read-write changes.
56 by brian
Next pass of true/false update.
888
  @retval false  Don't need two-phase commit. Even if we have two
1 by brian
clean slate
889
                 transactional engines, we can run two independent
890
                 commits if changes in one of the engines are read-only.
891
*/
892
893
static
894
bool
895
ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
896
                                    bool all)
897
{
898
  /* The number of storage engines that have actual changes. */
899
  unsigned rw_ha_count= 0;
900
  Ha_trx_info *ha_info;
901
902
  for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
903
  {
904
    if (ha_info->is_trx_read_write())
905
      ++rw_ha_count;
906
907
    if (! all)
908
    {
909
      Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
910
      assert(ha_info != ha_info_all);
1 by brian
clean slate
911
      /*
912
        Merge read-only/read-write information about statement
913
        transaction to its enclosing normal transaction. Do this
914
        only if in a real transaction -- that is, if we know
915
        that ha_info_all is registered in thd->transaction.all.
916
        Since otherwise we only clutter the normal transaction flags.
917
      */
56 by brian
Next pass of true/false update.
918
      if (ha_info_all->is_started()) /* false if autocommit. */
1 by brian
clean slate
919
        ha_info_all->coalesce_trx_with(ha_info);
920
    }
921
    else if (rw_ha_count > 1)
922
    {
923
      /*
924
        It is a normal transaction, so we don't need to merge read/write
925
        information up, and the need for two-phase commit has been
926
        already established. Break the loop prematurely.
927
      */
928
      break;
929
    }
930
  }
931
  return rw_ha_count > 1;
932
}
933
934
935
/**
936
  @retval
937
    0   ok
938
  @retval
939
    1   transaction was rolled back
940
  @retval
941
    2   error during commit, data may be inconsistent
942
943
  @todo
944
    Since we don't support nested statement transactions in 5.0,
945
    we can't commit or rollback stmt transactions while we are inside
946
    stored functions or triggers. So we simply do nothing now.
947
    TODO: This should be fixed in later ( >= 5.1) releases.
948
*/
949
int ha_commit_trans(THD *thd, bool all)
950
{
951
  int error= 0, cookie= 0;
952
  /*
953
    'all' means that this is either an explicit commit issued by
954
    user, or an implicit commit issued by a DDL.
955
  */
956
  THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
957
  bool is_real_trans= all || thd->transaction.all.ha_list == 0;
958
  Ha_trx_info *ha_info= trans->ha_list;
959
  my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
960
961
  /*
962
    We must not commit the normal transaction if a statement
963
    transaction is pending. Otherwise statement transaction
964
    flags will not get propagated to its normal transaction's
965
    counterpart.
966
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
967
  assert(thd->transaction.stmt.ha_list == NULL ||
1 by brian
clean slate
968
              trans == &thd->transaction.stmt);
969
970
  if (thd->in_sub_stmt)
971
  {
972
    /*
973
      Since we don't support nested statement transactions in 5.0,
974
      we can't commit or rollback stmt transactions while we are inside
975
      stored functions or triggers. So we simply do nothing now.
976
      TODO: This should be fixed in later ( >= 5.1) releases.
977
    */
978
    if (!all)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
979
      return(0);
1 by brian
clean slate
980
    /*
981
      We assume that all statements which commit or rollback main transaction
982
      are prohibited inside of stored functions or triggers. So they should
983
      bail out with error even before ha_commit_trans() call. To be 100% safe
984
      let us throw error in non-debug builds.
985
    */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
986
    assert(0);
1 by brian
clean slate
987
    my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
988
    return(2);
1 by brian
clean slate
989
  }
990
  if (ha_info)
991
  {
992
    bool must_2pc;
993
994
    if (is_real_trans && wait_if_global_read_lock(thd, 0, 0))
995
    {
996
      ha_rollback_trans(thd, all);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
997
      return(1);
1 by brian
clean slate
998
    }
999
1000
    if (   is_real_trans
1001
        && opt_readonly
1002
        && ! thd->slave_thread
1003
       )
1004
    {
1005
      my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
1006
      ha_rollback_trans(thd, all);
1007
      error= 1;
1008
      goto end;
1009
    }
1010
1011
    must_2pc= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1012
1013
    if (!trans->no_2pc && must_2pc)
1014
    {
1015
      for (; ha_info && !error; ha_info= ha_info->next())
1016
      {
1017
        int err;
1018
        handlerton *ht= ha_info->ht();
1019
        /*
1020
          Do not call two-phase commit if this particular
1021
          transaction is read-only. This allows for simpler
1022
          implementation in engines that are always read-only.
1023
        */
1024
        if (! ha_info->is_trx_read_write())
1025
          continue;
1026
        /*
1027
          Sic: we know that prepare() is not NULL since otherwise
1028
          trans->no_2pc would have been set.
1029
        */
1030
        if ((err= ht->prepare(ht, thd, all)))
1031
        {
1032
          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1033
          error= 1;
1034
        }
1035
        status_var_increment(thd->status_var.ha_prepare_count);
1036
      }
1037
      if (error || (is_real_trans && xid &&
1038
                    (error= !(cookie= tc_log->log_xid(thd, xid)))))
1039
      {
1040
        ha_rollback_trans(thd, all);
1041
        error= 1;
1042
        goto end;
1043
      }
1044
    }
1045
    error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
1046
    if (cookie)
1047
      tc_log->unlog(cookie, xid);
1048
end:
1049
    if (is_real_trans)
1050
      start_waiting_global_read_lock(thd);
1051
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1052
  return(error);
1 by brian
clean slate
1053
}
1054
1055
/**
1056
  @note
1057
  This function does not care about global read lock. A caller should.
1058
*/
1059
int ha_commit_one_phase(THD *thd, bool all)
1060
{
1061
  int error=0;
1062
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1063
  bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1064
  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1065
  if (ha_info)
1066
  {
1067
    for (; ha_info; ha_info= ha_info_next)
1068
    {
1069
      int err;
1070
      handlerton *ht= ha_info->ht();
1071
      if ((err= ht->commit(ht, thd, all)))
1072
      {
1073
        my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1074
        error=1;
1075
      }
1076
      status_var_increment(thd->status_var.ha_commit_count);
1077
      ha_info_next= ha_info->next();
1078
      ha_info->reset(); /* keep it conveniently zero-filled */
1079
    }
1080
    trans->ha_list= 0;
1081
    trans->no_2pc=0;
1082
    if (is_real_trans)
1083
      thd->transaction.xid_state.xid.null();
1084
    if (all)
1085
    {
1086
      thd->variables.tx_isolation=thd->session_tx_isolation;
1087
      thd->transaction.cleanup();
1088
    }
1089
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1090
  return(error);
1 by brian
clean slate
1091
}
1092
1093
1094
int ha_rollback_trans(THD *thd, bool all)
1095
{
1096
  int error=0;
1097
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1098
  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1099
  bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1100
1101
  /*
1102
    We must not rollback the normal transaction if a statement
1103
    transaction is pending.
1104
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1105
  assert(thd->transaction.stmt.ha_list == NULL ||
1 by brian
clean slate
1106
              trans == &thd->transaction.stmt);
1107
1108
  if (thd->in_sub_stmt)
1109
  {
1110
    /*
1111
      If we are inside stored function or trigger we should not commit or
1112
      rollback current statement transaction. See comment in ha_commit_trans()
1113
      call for more information.
1114
    */
1115
    if (!all)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1116
      return(0);
1117
    assert(0);
1 by brian
clean slate
1118
    my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1119
    return(1);
1 by brian
clean slate
1120
  }
1121
  if (ha_info)
1122
  {
1123
    for (; ha_info; ha_info= ha_info_next)
1124
    {
1125
      int err;
1126
      handlerton *ht= ha_info->ht();
1127
      if ((err= ht->rollback(ht, thd, all)))
1128
      { // cannot happen
1129
        my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1130
        error=1;
1131
      }
1132
      status_var_increment(thd->status_var.ha_rollback_count);
1133
      ha_info_next= ha_info->next();
1134
      ha_info->reset(); /* keep it conveniently zero-filled */
1135
    }
1136
    trans->ha_list= 0;
1137
    trans->no_2pc=0;
1138
    if (is_real_trans)
1139
      thd->transaction.xid_state.xid.null();
1140
    if (all)
1141
    {
1142
      thd->variables.tx_isolation=thd->session_tx_isolation;
1143
      thd->transaction.cleanup();
1144
    }
1145
  }
1146
  if (all)
56 by brian
Next pass of true/false update.
1147
    thd->transaction_rollback_request= false;
1 by brian
clean slate
1148
1149
  /*
1150
    If a non-transactional table was updated, warn; don't warn if this is a
1151
    slave thread (because when a slave thread executes a ROLLBACK, it has
1152
    been read from the binary log, so it's 100% sure and normal to produce
1153
    error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
1154
    slave SQL thread, it would not stop the thread but just be printed in
1155
    the error log; but we don't want users to wonder why they have this
1156
    message in the error log, so we don't send it.
1157
  */
1158
  if (is_real_trans && thd->transaction.all.modified_non_trans_table &&
1159
      !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
1160
    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
1161
                 ER_WARNING_NOT_COMPLETE_ROLLBACK,
1162
                 ER(ER_WARNING_NOT_COMPLETE_ROLLBACK));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1163
  return(error);
1 by brian
clean slate
1164
}
1165
1166
/**
1167
  This is used to commit or rollback a single statement depending on
1168
  the value of error.
1169
1170
  @note
1171
    Note that if the autocommit is on, then the following call inside
1172
    InnoDB will commit or rollback the whole transaction (= the statement). The
1173
    autocommit mechanism built into InnoDB is based on counting locks, but if
1174
    the user has used LOCK TABLES then that mechanism does not know to do the
1175
    commit.
1176
*/
1177
int ha_autocommit_or_rollback(THD *thd, int error)
1178
{
1179
  if (thd->transaction.stmt.ha_list)
1180
  {
1181
    if (!error)
1182
    {
1183
      if (ha_commit_trans(thd, 0))
1184
	error=1;
1185
    }
1186
    else 
1187
    {
1188
      (void) ha_rollback_trans(thd, 0);
1189
      if (thd->transaction_rollback_request && !thd->in_sub_stmt)
1190
        (void) ha_rollback(thd);
1191
    }
1192
1193
    thd->variables.tx_isolation=thd->session_tx_isolation;
1194
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1195
  return(error);
1 by brian
clean slate
1196
}
1197
1198
1199
struct xahton_st {
1200
  XID *xid;
1201
  int result;
1202
};
1203
77.1.15 by Monty Taylor
Bunch of warning cleanups.
1204
static my_bool xacommit_handlerton(THD *unused1 __attribute__((__unused__)),
1205
                                   plugin_ref plugin,
1 by brian
clean slate
1206
                                   void *arg)
1207
{
1208
  handlerton *hton= plugin_data(plugin, handlerton *);
1209
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1210
  {
1211
    hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
1212
    ((struct xahton_st *)arg)->result= 0;
1213
  }
56 by brian
Next pass of true/false update.
1214
  return false;
1 by brian
clean slate
1215
}
1216
77.1.15 by Monty Taylor
Bunch of warning cleanups.
1217
static my_bool xarollback_handlerton(THD *unused1 __attribute__((__unused__)),
1218
                                     plugin_ref plugin,
1 by brian
clean slate
1219
                                     void *arg)
1220
{
1221
  handlerton *hton= plugin_data(plugin, handlerton *);
1222
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1223
  {
1224
    hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
1225
    ((struct xahton_st *)arg)->result= 0;
1226
  }
56 by brian
Next pass of true/false update.
1227
  return false;
1 by brian
clean slate
1228
}
1229
1230
1231
int ha_commit_or_rollback_by_xid(XID *xid, bool commit)
1232
{
1233
  struct xahton_st xaop;
1234
  xaop.xid= xid;
1235
  xaop.result= 1;
1236
1237
  plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
1238
                 MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
1239
1240
  return xaop.result;
1241
}
1242
1243
/**
1244
  recover() step of xa.
1245
1246
  @note
1247
    there are three modes of operation:
1248
    - automatic recover after a crash
1249
    in this case commit_list != 0, tc_heuristic_recover==0
1250
    all xids from commit_list are committed, others are rolled back
1251
    - manual (heuristic) recover
1252
    in this case commit_list==0, tc_heuristic_recover != 0
1253
    DBA has explicitly specified that all prepared transactions should
1254
    be committed (or rolled back).
1255
    - no recovery (MySQL did not detect a crash)
1256
    in this case commit_list==0, tc_heuristic_recover == 0
1257
    there should be no prepared transactions in this case.
1258
*/
1259
struct xarecover_st
1260
{
1261
  int len, found_foreign_xids, found_my_xids;
1262
  XID *list;
1263
  HASH *commit_list;
1264
  bool dry_run;
1265
};
1266
77.1.15 by Monty Taylor
Bunch of warning cleanups.
1267
static my_bool xarecover_handlerton(THD *unused __attribute__((__unused__)),
1268
                                    plugin_ref plugin,
1 by brian
clean slate
1269
                                    void *arg)
1270
{
1271
  handlerton *hton= plugin_data(plugin, handlerton *);
1272
  struct xarecover_st *info= (struct xarecover_st *) arg;
1273
  int got;
1274
1275
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1276
  {
1277
    while ((got= hton->recover(hton, info->list, info->len)) > 0 )
1278
    {
1279
      sql_print_information("Found %d prepared transaction(s) in %s",
1280
                            got, ha_resolve_storage_engine_name(hton));
1281
      for (int i=0; i < got; i ++)
1282
      {
1283
        my_xid x=info->list[i].get_my_xid();
1284
        if (!x) // not "mine" - that is generated by external TM
1285
        {
1286
          xid_cache_insert(info->list+i, XA_PREPARED);
1287
          info->found_foreign_xids++;
1288
          continue;
1289
        }
1290
        if (info->dry_run)
1291
        {
1292
          info->found_my_xids++;
1293
          continue;
1294
        }
1295
        // recovery mode
1296
        if (info->commit_list ?
1297
            hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
1298
            tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
1299
        {
1300
          hton->commit_by_xid(hton, info->list+i);
1301
        }
1302
        else
1303
        {
1304
          hton->rollback_by_xid(hton, info->list+i);
1305
        }
1306
      }
1307
      if (got < info->len)
1308
        break;
1309
    }
1310
  }
56 by brian
Next pass of true/false update.
1311
  return false;
1 by brian
clean slate
1312
}
1313
1314
int ha_recover(HASH *commit_list)
1315
{
1316
  struct xarecover_st info;
1317
  info.found_foreign_xids= info.found_my_xids= 0;
1318
  info.commit_list= commit_list;
1319
  info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
1320
  info.list= NULL;
1321
1322
  /* commit_list and tc_heuristic_recover cannot be set both */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1323
  assert(info.commit_list==0 || tc_heuristic_recover==0);
1 by brian
clean slate
1324
  /* if either is set, total_ha_2pc must be set too */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1325
  assert(info.dry_run || total_ha_2pc>(uint32_t)opt_bin_log);
1 by brian
clean slate
1326
61 by Brian Aker
Conversion of handler type.
1327
  if (total_ha_2pc <= (uint32_t)opt_bin_log)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1328
    return(0);
1 by brian
clean slate
1329
1330
  if (info.commit_list)
1331
    sql_print_information("Starting crash recovery...");
1332
1333
1334
#ifndef WILL_BE_DELETED_LATER
1335
1336
  /*
1337
    for now, only InnoDB supports 2pc. It means we can always safely
1338
    rollback all pending transactions, without risking inconsistent data
1339
  */
1340
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1341
  assert(total_ha_2pc == (uint32_t) opt_bin_log+1); // only InnoDB and binlog
1 by brian
clean slate
1342
  tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
56 by brian
Next pass of true/false update.
1343
  info.dry_run=false;
1 by brian
clean slate
1344
#endif
1345
1346
1347
  for (info.len= MAX_XID_LIST_SIZE ; 
1348
       info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
1349
  {
1350
    info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0));
1351
  }
1352
  if (!info.list)
1353
  {
1354
    sql_print_error(ER(ER_OUTOFMEMORY), info.len*sizeof(XID));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1355
    return(1);
1 by brian
clean slate
1356
  }
1357
1358
  plugin_foreach(NULL, xarecover_handlerton, 
1359
                 MYSQL_STORAGE_ENGINE_PLUGIN, &info);
1360
1361
  my_free((uchar*)info.list, MYF(0));
1362
  if (info.found_foreign_xids)
1363
    sql_print_warning("Found %d prepared XA transactions", 
1364
                      info.found_foreign_xids);
1365
  if (info.dry_run && info.found_my_xids)
1366
  {
1367
    sql_print_error("Found %d prepared transactions! It means that mysqld was "
1368
                    "not shut down properly last time and critical recovery "
1369
                    "information (last binlog or %s file) was manually deleted "
1370
                    "after a crash. You have to start mysqld with "
1371
                    "--tc-heuristic-recover switch to commit or rollback "
1372
                    "pending transactions.",
1373
                    info.found_my_xids, opt_tc_log_file);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1374
    return(1);
1 by brian
clean slate
1375
  }
1376
  if (info.commit_list)
1377
    sql_print_information("Crash recovery finished.");
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1378
  return(0);
1 by brian
clean slate
1379
}
1380
1381
/**
1382
  return the list of XID's to a client, the same way SHOW commands do.
1383
1384
  @note
1385
    I didn't find in XA specs that an RM cannot return the same XID twice,
1386
    so mysql_xa_recover does not filter XID's to ensure uniqueness.
1387
    It can be easily fixed later, if necessary.
1388
*/
1389
bool mysql_xa_recover(THD *thd)
1390
{
1391
  List<Item> field_list;
1392
  Protocol *protocol= thd->protocol;
1393
  int i=0;
1394
  XID_STATE *xs;
1395
1396
  field_list.push_back(new Item_int("formatID", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1397
  field_list.push_back(new Item_int("gtrid_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1398
  field_list.push_back(new Item_int("bqual_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1399
  field_list.push_back(new Item_empty_string("data",XIDDATASIZE));
1400
1401
  if (protocol->send_fields(&field_list,
1402
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1403
    return(1);
1 by brian
clean slate
1404
1405
  pthread_mutex_lock(&LOCK_xid_cache);
1406
  while ((xs= (XID_STATE*)hash_element(&xid_cache, i++)))
1407
  {
1408
    if (xs->xa_state==XA_PREPARED)
1409
    {
1410
      protocol->prepare_for_resend();
56 by brian
Next pass of true/false update.
1411
      protocol->store_longlong((longlong)xs->xid.formatID, false);
1412
      protocol->store_longlong((longlong)xs->xid.gtrid_length, false);
1413
      protocol->store_longlong((longlong)xs->xid.bqual_length, false);
1 by brian
clean slate
1414
      protocol->store(xs->xid.data, xs->xid.gtrid_length+xs->xid.bqual_length,
1415
                      &my_charset_bin);
1416
      if (protocol->write())
1417
      {
1418
        pthread_mutex_unlock(&LOCK_xid_cache);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1419
        return(1);
1 by brian
clean slate
1420
      }
1421
    }
1422
  }
1423
1424
  pthread_mutex_unlock(&LOCK_xid_cache);
1425
  my_eof(thd);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1426
  return(0);
1 by brian
clean slate
1427
}
1428
1429
/**
1430
  @details
1431
  This function should be called when MySQL sends rows of a SELECT result set
1432
  or the EOF mark to the client. It releases a possible adaptive hash index
1433
  S-latch held by thd in InnoDB and also releases a possible InnoDB query
1434
  FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a thd to
1435
  keep them over several calls of the InnoDB handler interface when a join
1436
  is executed. But when we let the control to pass to the client they have
1437
  to be released because if the application program uses mysql_use_result(),
1438
  it may deadlock on the S-latch if the application on another connection
1439
  performs another SQL query. In MySQL-4.1 this is even more important because
1440
  there a connection can have several SELECT queries open at the same time.
1441
1442
  @param thd           the thread handle of the current connection
1443
1444
  @return
1445
    always 0
1446
*/
1447
static my_bool release_temporary_latches(THD *thd, plugin_ref plugin,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
1448
                                         void *unused __attribute__((__unused__)))
1 by brian
clean slate
1449
{
1450
  handlerton *hton= plugin_data(plugin, handlerton *);
1451
1452
  if (hton->state == SHOW_OPTION_YES && hton->release_temporary_latches)
1453
    hton->release_temporary_latches(hton, thd);
1454
56 by brian
Next pass of true/false update.
1455
  return false;
1 by brian
clean slate
1456
}
1457
1458
1459
int ha_release_temporary_latches(THD *thd)
1460
{
1461
  plugin_foreach(thd, release_temporary_latches, MYSQL_STORAGE_ENGINE_PLUGIN, 
1462
                 NULL);
1463
1464
  return 0;
1465
}
1466
1467
int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
1468
{
1469
  int error=0;
1470
  THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
1471
                                        &thd->transaction.all);
1472
  Ha_trx_info *ha_info, *ha_info_next;
1473
1474
  trans->no_2pc=0;
1475
  /*
1476
    rolling back to savepoint in all storage engines that were part of the
1477
    transaction when the savepoint was set
1478
  */
1479
  for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
1480
  {
1481
    int err;
1482
    handlerton *ht= ha_info->ht();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1483
    assert(ht);
1484
    assert(ht->savepoint_set != 0);
1 by brian
clean slate
1485
    if ((err= ht->savepoint_rollback(ht, thd,
1486
                                     (uchar *)(sv+1)+ht->savepoint_offset)))
1487
    { // cannot happen
1488
      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1489
      error=1;
1490
    }
1491
    status_var_increment(thd->status_var.ha_savepoint_rollback_count);
1492
    trans->no_2pc|= ht->prepare == 0;
1493
  }
1494
  /*
1495
    rolling back the transaction in all storage engines that were not part of
1496
    the transaction when the savepoint was set
1497
  */
1498
  for (ha_info= trans->ha_list; ha_info != sv->ha_list;
1499
       ha_info= ha_info_next)
1500
  {
1501
    int err;
1502
    handlerton *ht= ha_info->ht();
1503
    if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
1504
    { // cannot happen
1505
      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1506
      error=1;
1507
    }
1508
    status_var_increment(thd->status_var.ha_rollback_count);
1509
    ha_info_next= ha_info->next();
1510
    ha_info->reset(); /* keep it conveniently zero-filled */
1511
  }
1512
  trans->ha_list= sv->ha_list;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1513
  return(error);
1 by brian
clean slate
1514
}
1515
1516
/**
1517
  @note
1518
  according to the sql standard (ISO/IEC 9075-2:2003)
1519
  section "4.33.4 SQL-statements and transaction states",
1520
  SAVEPOINT is *not* transaction-initiating SQL-statement
1521
*/
1522
int ha_savepoint(THD *thd, SAVEPOINT *sv)
1523
{
1524
  int error=0;
1525
  THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
1526
                                        &thd->transaction.all);
1527
  Ha_trx_info *ha_info= trans->ha_list;
1528
  for (; ha_info; ha_info= ha_info->next())
1529
  {
1530
    int err;
1531
    handlerton *ht= ha_info->ht();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1532
    assert(ht);
1 by brian
clean slate
1533
    if (! ht->savepoint_set)
1534
    {
1535
      my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
1536
      error=1;
1537
      break;
1538
    }
1539
    if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
1540
    { // cannot happen
1541
      my_error(ER_GET_ERRNO, MYF(0), err);
1542
      error=1;
1543
    }
1544
    status_var_increment(thd->status_var.ha_savepoint_count);
1545
  }
1546
  /*
1547
    Remember the list of registered storage engines. All new
1548
    engines are prepended to the beginning of the list.
1549
  */
1550
  sv->ha_list= trans->ha_list;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1551
  return(error);
1 by brian
clean slate
1552
}
1553
1554
int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
1555
{
1556
  int error=0;
1557
  Ha_trx_info *ha_info= sv->ha_list;
1558
1559
  for (; ha_info; ha_info= ha_info->next())
1560
  {
1561
    int err;
1562
    handlerton *ht= ha_info->ht();
1563
    /* Savepoint life time is enclosed into transaction life time. */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1564
    assert(ht);
1 by brian
clean slate
1565
    if (!ht->savepoint_release)
1566
      continue;
1567
    if ((err= ht->savepoint_release(ht, thd,
1568
                                    (uchar *)(sv+1) + ht->savepoint_offset)))
1569
    { // cannot happen
1570
      my_error(ER_GET_ERRNO, MYF(0), err);
1571
      error=1;
1572
    }
1573
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1574
  return(error);
1 by brian
clean slate
1575
}
1576
1577
1578
static my_bool snapshot_handlerton(THD *thd, plugin_ref plugin,
1579
                                   void *arg)
1580
{
1581
  handlerton *hton= plugin_data(plugin, handlerton *);
1582
  if (hton->state == SHOW_OPTION_YES &&
1583
      hton->start_consistent_snapshot)
1584
  {
1585
    hton->start_consistent_snapshot(hton, thd);
1586
    *((bool *)arg)= false;
1587
  }
56 by brian
Next pass of true/false update.
1588
  return false;
1 by brian
clean slate
1589
}
1590
1591
int ha_start_consistent_snapshot(THD *thd)
1592
{
1593
  bool warn= true;
1594
1595
  plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
1596
1597
  /*
1598
    Same idea as when one wants to CREATE TABLE in one engine which does not
1599
    exist:
1600
  */
1601
  if (warn)
1602
    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
1603
                 "This MySQL server does not support any "
1604
                 "consistent-read capable storage engine");
1605
  return 0;
1606
}
1607
1608
77.1.15 by Monty Taylor
Bunch of warning cleanups.
1609
static my_bool flush_handlerton(THD *thd __attribute__((__unused__)),
1610
                                plugin_ref plugin,
1611
                                void *arg __attribute__((__unused__)))
1 by brian
clean slate
1612
{
1613
  handlerton *hton= plugin_data(plugin, handlerton *);
1614
  if (hton->state == SHOW_OPTION_YES && hton->flush_logs && 
1615
      hton->flush_logs(hton))
56 by brian
Next pass of true/false update.
1616
    return true;
1617
  return false;
1 by brian
clean slate
1618
}
1619
1620
1621
bool ha_flush_logs(handlerton *db_type)
1622
{
1623
  if (db_type == NULL)
1624
  {
1625
    if (plugin_foreach(NULL, flush_handlerton,
1626
                          MYSQL_STORAGE_ENGINE_PLUGIN, 0))
56 by brian
Next pass of true/false update.
1627
      return true;
1 by brian
clean slate
1628
  }
1629
  else
1630
  {
1631
    if (db_type->state != SHOW_OPTION_YES ||
1632
        (db_type->flush_logs && db_type->flush_logs(db_type)))
56 by brian
Next pass of true/false update.
1633
      return true;
1 by brian
clean slate
1634
  }
56 by brian
Next pass of true/false update.
1635
  return false;
1 by brian
clean slate
1636
}
1637
1638
static const char *check_lowercase_names(handler *file, const char *path,
1639
                                         char *tmp_path)
1640
{
1641
  if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
1642
    return path;
1643
1644
  /* Ensure that table handler get path in lower case */
1645
  if (tmp_path != path)
1646
    strmov(tmp_path, path);
1647
1648
  /*
1649
    we only should turn into lowercase database/table part
1650
    so start the process after homedirectory
1651
  */
1652
  my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
1653
  return tmp_path;
1654
}
1655
1656
1657
/**
1658
  An interceptor to hijack the text of the error message without
1659
  setting an error in the thread. We need the text to present it
1660
  in the form of a warning to the user.
1661
*/
1662
1663
struct Ha_delete_table_error_handler: public Internal_error_handler
1664
{
1665
public:
1666
  virtual bool handle_error(uint sql_errno,
1667
                            const char *message,
1668
                            MYSQL_ERROR::enum_warning_level level,
1669
                            THD *thd);
1670
  char buff[MYSQL_ERRMSG_SIZE];
1671
};
1672
1673
1674
bool
1675
Ha_delete_table_error_handler::
77.1.15 by Monty Taylor
Bunch of warning cleanups.
1676
handle_error(uint sql_errno  __attribute__((__unused__)),
1 by brian
clean slate
1677
             const char *message,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
1678
             MYSQL_ERROR::enum_warning_level level __attribute__((__unused__)),
1679
             THD *thd __attribute__((__unused__)))
1 by brian
clean slate
1680
{
1681
  /* Grab the error message */
1682
  strmake(buff, message, sizeof(buff)-1);
56 by brian
Next pass of true/false update.
1683
  return true;
1 by brian
clean slate
1684
}
1685
1686
1687
/**
1688
  This should return ENOENT if the file doesn't exists.
1689
  The .frm file will be deleted only if we return 0 or ENOENT
1690
*/
1691
int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
1692
                    const char *db, const char *alias, bool generate_warning)
1693
{
1694
  handler *file;
1695
  char tmp_path[FN_REFLEN];
1696
  int error;
1697
  TABLE dummy_table;
1698
  TABLE_SHARE dummy_share;
1699
1700
  bzero((char*) &dummy_table, sizeof(dummy_table));
1701
  bzero((char*) &dummy_share, sizeof(dummy_share));
1702
  dummy_table.s= &dummy_share;
1703
1704
  /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
1705
  if (table_type == NULL ||
1706
      ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1707
    return(ENOENT);
1 by brian
clean slate
1708
1709
  path= check_lowercase_names(file, path, tmp_path);
1710
  if ((error= file->ha_delete_table(path)) && generate_warning)
1711
  {
1712
    /*
1713
      Because file->print_error() use my_error() to generate the error message
1714
      we use an internal error handler to intercept it and store the text
1715
      in a temporary buffer. Later the message will be presented to user
1716
      as a warning.
1717
    */
1718
    Ha_delete_table_error_handler ha_delete_table_error_handler;
1719
1720
    /* Fill up strucutures that print_error may need */
1721
    dummy_share.path.str= (char*) path;
1722
    dummy_share.path.length= strlen(path);
1723
    dummy_share.db.str= (char*) db;
1724
    dummy_share.db.length= strlen(db);
1725
    dummy_share.table_name.str= (char*) alias;
1726
    dummy_share.table_name.length= strlen(alias);
1727
    dummy_table.alias= alias;
1728
1729
    file->change_table_ptr(&dummy_table, &dummy_share);
1730
1731
    thd->push_internal_handler(&ha_delete_table_error_handler);
1732
    file->print_error(error, 0);
1733
1734
    thd->pop_internal_handler();
1735
1736
    /*
1737
      XXX: should we convert *all* errors to warnings here?
1738
      What if the error is fatal?
1739
    */
1740
    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, error,
1741
                ha_delete_table_error_handler.buff);
1742
  }
1743
  delete file;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1744
  return(error);
1 by brian
clean slate
1745
}
1746
1747
/****************************************************************************
1748
** General handler functions
1749
****************************************************************************/
1750
handler *handler::clone(MEM_ROOT *mem_root)
1751
{
1752
  handler *new_handler= get_new_handler(table->s, mem_root, table->s->db_type());
1753
  /*
1754
    Allocate handler->ref here because otherwise ha_open will allocate it
1755
    on this->table->mem_root and we will not be able to reclaim that memory 
1756
    when the clone handler object is destroyed.
1757
  */
1758
  if (!(new_handler->ref= (uchar*) alloc_root(mem_root, ALIGN_SIZE(ref_length)*2)))
1759
    return NULL;
1760
  if (new_handler && !new_handler->ha_open(table,
1761
                                           table->s->normalized_path.str,
1762
                                           table->db_stat,
1763
                                           HA_OPEN_IGNORE_IF_LOCKED))
1764
    return new_handler;
1765
  return NULL;
1766
}
1767
1768
1769
1770
void handler::ha_statistic_increment(ulong SSV::*offset) const
1771
{
1772
  status_var_increment(table->in_use->status_var.*offset);
1773
}
1774
1775
void **handler::ha_data(THD *thd) const
1776
{
1777
  return thd_ha_data(thd, ht);
1778
}
1779
1780
THD *handler::ha_thd(void) const
1781
{
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1782
  assert(!table || !table->in_use || table->in_use == current_thd);
1 by brian
clean slate
1783
  return (table && table->in_use) ? table->in_use : current_thd;
1784
}
1785
1786
/**
1787
  Open database-handler.
1788
1789
  Try O_RDONLY if cannot open as O_RDWR
1790
  Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
1791
*/
1792
int handler::ha_open(TABLE *table_arg, const char *name, int mode,
1793
                     int test_if_locked)
1794
{
1795
  int error;
1796
1797
  table= table_arg;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1798
  assert(table->s == table_share);
1799
  assert(alloc_root_inited(&table->mem_root));
1 by brian
clean slate
1800
1801
  if ((error=open(name,mode,test_if_locked)))
1802
  {
1803
    if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
1804
	(table->db_stat & HA_TRY_READ_ONLY))
1805
    {
1806
      table->db_stat|=HA_READ_ONLY;
1807
      error=open(name,O_RDONLY,test_if_locked);
1808
    }
1809
  }
1810
  if (error)
1811
  {
1812
    my_errno= error;                            /* Safeguard */
1813
  }
1814
  else
1815
  {
1816
    if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
1817
      table->db_stat|=HA_READ_ONLY;
1818
    (void) extra(HA_EXTRA_NO_READCHECK);	// Not needed in SQL
1819
1820
    /* ref is already allocated for us if we're called from handler::clone() */
1821
    if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root, 
1822
                                          ALIGN_SIZE(ref_length)*2)))
1823
    {
1824
      close();
1825
      error=HA_ERR_OUT_OF_MEM;
1826
    }
1827
    else
1828
      dup_ref=ref+ALIGN_SIZE(ref_length);
1829
    cached_table_flags= table_flags();
1830
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1831
  return(error);
1 by brian
clean slate
1832
}
1833
1834
/**
1835
  one has to use this method when to find
1836
  random position by record as the plain
1837
  position() call doesn't work for some
1838
  handlers for random position
1839
*/
1840
1841
int handler::rnd_pos_by_record(uchar *record)
1842
{
1843
  register int error;
1844
1845
  position(record);
1846
  if (inited && (error= ha_index_end()))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1847
    return(error);
56 by brian
Next pass of true/false update.
1848
  if ((error= ha_rnd_init(false)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1849
    return(error);
1 by brian
clean slate
1850
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1851
  return(rnd_pos(record, ref));
1 by brian
clean slate
1852
}
1853
1854
/**
1855
  Read first row (only) from a table.
1856
1857
  This is never called for InnoDB tables, as these table types
1858
  has the HA_STATS_RECORDS_IS_EXACT set.
1859
*/
1860
int handler::read_first_row(uchar * buf, uint primary_key)
1861
{
1862
  register int error;
1863
1864
  ha_statistic_increment(&SSV::ha_read_first_count);
1865
1866
  /*
1867
    If there is very few deleted rows in the table, find the first row by
1868
    scanning the table.
1869
    TODO remove the test for HA_READ_ORDER
1870
  */
1871
  if (stats.deleted < 10 || primary_key >= MAX_KEY ||
1872
      !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
1873
  {
1874
    (void) ha_rnd_init(1);
1875
    while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED) ;
1876
    (void) ha_rnd_end();
1877
  }
1878
  else
1879
  {
1880
    /* Find the first row through the primary key */
1881
    (void) ha_index_init(primary_key, 0);
1882
    error=index_first(buf);
1883
    (void) ha_index_end();
1884
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1885
  return(error);
1 by brian
clean slate
1886
}
1887
1888
/**
1889
  Generate the next auto-increment number based on increment and offset.
1890
  computes the lowest number
1891
  - strictly greater than "nr"
1892
  - of the form: auto_increment_offset + N * auto_increment_increment
1893
1894
  In most cases increment= offset= 1, in which case we get:
1895
  @verbatim 1,2,3,4,5,... @endverbatim
1896
    If increment=10 and offset=5 and previous number is 1, we get:
1897
  @verbatim 1,5,15,25,35,... @endverbatim
1898
*/
1899
inline uint64_t
1900
compute_next_insert_id(uint64_t nr,struct system_variables *variables)
1901
{
1902
  if (variables->auto_increment_increment == 1)
1903
    return (nr+1); // optimization of the formula below
1904
  nr= (((nr+ variables->auto_increment_increment -
1905
         variables->auto_increment_offset)) /
1906
       (uint64_t) variables->auto_increment_increment);
1907
  return (nr* (uint64_t) variables->auto_increment_increment +
1908
          variables->auto_increment_offset);
1909
}
1910
1911
1912
void handler::adjust_next_insert_id_after_explicit_value(uint64_t nr)
1913
{
1914
  /*
1915
    If we have set THD::next_insert_id previously and plan to insert an
1916
    explicitely-specified value larger than this, we need to increase
1917
    THD::next_insert_id to be greater than the explicit value.
1918
  */
1919
  if ((next_insert_id > 0) && (nr >= next_insert_id))
1920
    set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
1921
}
1922
1923
1924
/**
1925
  Compute a previous insert id
1926
1927
  Computes the largest number X:
1928
  - smaller than or equal to "nr"
1929
  - of the form: auto_increment_offset + N * auto_increment_increment
1930
    where N>=0.
1931
1932
  @param nr            Number to "round down"
1933
  @param variables     variables struct containing auto_increment_increment and
1934
                       auto_increment_offset
1935
1936
  @return
1937
    The number X if it exists, "nr" otherwise.
1938
*/
1939
inline uint64_t
1940
prev_insert_id(uint64_t nr, struct system_variables *variables)
1941
{
1942
  if (unlikely(nr < variables->auto_increment_offset))
1943
  {
1944
    /*
1945
      There's nothing good we can do here. That is a pathological case, where
1946
      the offset is larger than the column's max possible value, i.e. not even
1947
      the first sequence value may be inserted. User will receive warning.
1948
    */
1949
    return nr;
1950
  }
1951
  if (variables->auto_increment_increment == 1)
1952
    return nr; // optimization of the formula below
1953
  nr= (((nr - variables->auto_increment_offset)) /
1954
       (uint64_t) variables->auto_increment_increment);
1955
  return (nr * (uint64_t) variables->auto_increment_increment +
1956
          variables->auto_increment_offset);
1957
}
1958
1959
1960
/**
1961
  Update the auto_increment field if necessary.
1962
1963
  Updates columns with type NEXT_NUMBER if:
1964
1965
  - If column value is set to NULL (in which case
1966
    auto_increment_field_not_null is 0)
1967
  - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
1968
    set. In the future we will only set NEXT_NUMBER fields if one sets them
1969
    to NULL (or they are not included in the insert list).
1970
1971
    In those cases, we check if the currently reserved interval still has
1972
    values we have not used. If yes, we pick the smallest one and use it.
1973
    Otherwise:
1974
1975
  - If a list of intervals has been provided to the statement via SET
1976
    INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
1977
    first unused interval from this list, consider it as reserved.
1978
1979
  - Otherwise we set the column for the first row to the value
1980
    next_insert_id(get_auto_increment(column))) which is usually
1981
    max-used-column-value+1.
1982
    We call get_auto_increment() for the first row in a multi-row
1983
    statement. get_auto_increment() will tell us the interval of values it
1984
    reserved for us.
1985
1986
  - In both cases, for the following rows we use those reserved values without
1987
    calling the handler again (we just progress in the interval, computing
1988
    each new value from the previous one). Until we have exhausted them, then
1989
    we either take the next provided interval or call get_auto_increment()
1990
    again to reserve a new interval.
1991
1992
  - In both cases, the reserved intervals are remembered in
1993
    thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
1994
    binlogging; the last reserved interval is remembered in
1995
    auto_inc_interval_for_cur_row.
1996
1997
    The idea is that generated auto_increment values are predictable and
1998
    independent of the column values in the table.  This is needed to be
1999
    able to replicate into a table that already has rows with a higher
2000
    auto-increment value than the one that is inserted.
2001
2002
    After we have already generated an auto-increment number and the user
2003
    inserts a column with a higher value than the last used one, we will
2004
    start counting from the inserted value.
2005
2006
    This function's "outputs" are: the table's auto_increment field is filled
2007
    with a value, thd->next_insert_id is filled with the value to use for the
2008
    next row, if a value was autogenerated for the current row it is stored in
2009
    thd->insert_id_for_cur_row, if get_auto_increment() was called
2010
    thd->auto_inc_interval_for_cur_row is modified, if that interval is not
2011
    present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
2012
    this list.
2013
2014
  @todo
2015
    Replace all references to "next number" or NEXT_NUMBER to
2016
    "auto_increment", everywhere (see below: there is
2017
    table->auto_increment_field_not_null, and there also exists
2018
    table->next_number_field, it's not consistent).
2019
2020
  @retval
2021
    0	ok
2022
  @retval
2023
    HA_ERR_AUTOINC_READ_FAILED  get_auto_increment() was called and
2024
    returned ~(uint64_t) 0
2025
  @retval
2026
    HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
2027
    failure.
2028
*/
2029
2030
#define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
2031
#define AUTO_INC_DEFAULT_NB_MAX_BITS 16
2032
#define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
2033
2034
int handler::update_auto_increment()
2035
{
2036
  uint64_t nr, nb_reserved_values;
56 by brian
Next pass of true/false update.
2037
  bool append= false;
1 by brian
clean slate
2038
  THD *thd= table->in_use;
2039
  struct system_variables *variables= &thd->variables;
2040
2041
  /*
2042
    next_insert_id is a "cursor" into the reserved interval, it may go greater
2043
    than the interval, but not smaller.
2044
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2045
  assert(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
1 by brian
clean slate
2046
2047
  if (((nr= table->next_number_field->val_int()) != 0) || 
2048
      (table->auto_increment_field_not_null && (thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO)))
2049
  {
2050
    /*
2051
      Update next_insert_id if we had already generated a value in this
2052
      statement (case of INSERT VALUES(null),(3763),(null):
2053
      the last NULL needs to insert 3764, not the value of the first NULL plus
2054
      1).
2055
    */
2056
    adjust_next_insert_id_after_explicit_value(nr);
2057
    insert_id_for_cur_row= 0; // didn't generate anything
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2058
    return(0);
1 by brian
clean slate
2059
  }
2060
2061
  if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
2062
  {
2063
    /* next_insert_id is beyond what is reserved, so we reserve more. */
2064
    const Discrete_interval *forced=
2065
      thd->auto_inc_intervals_forced.get_next();
2066
    if (forced != NULL)
2067
    {
2068
      nr= forced->minimum();
2069
      nb_reserved_values= forced->values();
2070
    }
2071
    else
2072
    {
2073
      /*
2074
        handler::estimation_rows_to_insert was set by
2075
        handler::ha_start_bulk_insert(); if 0 it means "unknown".
2076
      */
2077
      uint nb_already_reserved_intervals=
2078
        thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements();
2079
      uint64_t nb_desired_values;
2080
      /*
2081
        If an estimation was given to the engine:
2082
        - use it.
2083
        - if we already reserved numbers, it means the estimation was
2084
        not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
2085
        time, twice that the 3rd time etc.
2086
        If no estimation was given, use those increasing defaults from the
2087
        start, starting from AUTO_INC_DEFAULT_NB_ROWS.
2088
        Don't go beyond a max to not reserve "way too much" (because
2089
        reservation means potentially losing unused values).
2090
      */
2091
      if (nb_already_reserved_intervals == 0 &&
2092
          (estimation_rows_to_insert > 0))
2093
        nb_desired_values= estimation_rows_to_insert;
2094
      else /* go with the increasing defaults */
2095
      {
2096
        /* avoid overflow in formula, with this if() */
2097
        if (nb_already_reserved_intervals <= AUTO_INC_DEFAULT_NB_MAX_BITS)
2098
        {
2099
          nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS * 
2100
            (1 << nb_already_reserved_intervals);
2101
          set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
2102
        }
2103
        else
2104
          nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
2105
      }
2106
      /* This call ignores all its parameters but nr, currently */
2107
      get_auto_increment(variables->auto_increment_offset,
2108
                         variables->auto_increment_increment,
2109
                         nb_desired_values, &nr,
2110
                         &nb_reserved_values);
2111
      if (nr == ~(uint64_t) 0)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2112
        return(HA_ERR_AUTOINC_READ_FAILED);  // Mark failure
1 by brian
clean slate
2113
      
2114
      /*
2115
        That rounding below should not be needed when all engines actually
2116
        respect offset and increment in get_auto_increment(). But they don't
2117
        so we still do it. Wonder if for the not-first-in-index we should do
2118
        it. Hope that this rounding didn't push us out of the interval; even
2119
        if it did we cannot do anything about it (calling the engine again
2120
        will not help as we inserted no row).
2121
      */
2122
      nr= compute_next_insert_id(nr-1, variables);
2123
    }
2124
    
2125
    if (table->s->next_number_keypart == 0)
2126
    {
2127
      /* We must defer the appending until "nr" has been possibly truncated */
56 by brian
Next pass of true/false update.
2128
      append= true;
1 by brian
clean slate
2129
    }
2130
  }
2131
56 by brian
Next pass of true/false update.
2132
  if (unlikely(table->next_number_field->store((longlong) nr, true)))
1 by brian
clean slate
2133
  {
2134
    /*
2135
      first test if the query was aborted due to strict mode constraints
2136
    */
2137
    if (thd->killed == THD::KILL_BAD_DATA)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2138
      return(HA_ERR_AUTOINC_ERANGE);
1 by brian
clean slate
2139
2140
    /*
2141
      field refused this value (overflow) and truncated it, use the result of
2142
      the truncation (which is going to be inserted); however we try to
2143
      decrease it to honour auto_increment_* variables.
2144
      That will shift the left bound of the reserved interval, we don't
2145
      bother shifting the right bound (anyway any other value from this
2146
      interval will cause a duplicate key).
2147
    */
2148
    nr= prev_insert_id(table->next_number_field->val_int(), variables);
56 by brian
Next pass of true/false update.
2149
    if (unlikely(table->next_number_field->store((longlong) nr, true)))
1 by brian
clean slate
2150
      nr= table->next_number_field->val_int();
2151
  }
2152
  if (append)
2153
  {
2154
    auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
2155
                                          variables->auto_increment_increment);
2156
    /* Row-based replication does not need to store intervals in binlog */
2157
    if (!thd->current_stmt_binlog_row_based)
2158
        thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
2159
                                                              auto_inc_interval_for_cur_row.values(),
2160
                                                              variables->auto_increment_increment);
2161
  }
2162
2163
  /*
2164
    Record this autogenerated value. If the caller then
2165
    succeeds to insert this value, it will call
2166
    record_first_successful_insert_id_in_cur_stmt()
2167
    which will set first_successful_insert_id_in_cur_stmt if it's not
2168
    already set.
2169
  */
2170
  insert_id_for_cur_row= nr;
2171
  /*
2172
    Set next insert id to point to next auto-increment value to be able to
2173
    handle multi-row statements.
2174
  */
2175
  set_next_insert_id(compute_next_insert_id(nr, variables));
2176
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2177
  return(0);
1 by brian
clean slate
2178
}
2179
2180
2181
/**
2182
  MySQL signal that it changed the column bitmap
2183
2184
  This is for handlers that needs to setup their own column bitmaps.
2185
  Normally the handler should set up their own column bitmaps in
2186
  index_init() or rnd_init() and in any column_bitmaps_signal() call after
2187
  this.
2188
2189
  The handler is allowed to do changes to the bitmap after a index_init or
2190
  rnd_init() call is made as after this, MySQL will not use the bitmap
2191
  for any program logic checking.
2192
*/
2193
void handler::column_bitmaps_signal()
2194
{
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2195
  return;
1 by brian
clean slate
2196
}
2197
2198
2199
/**
2200
  Reserves an interval of auto_increment values from the handler.
2201
2202
  offset and increment means that we want values to be of the form
2203
  offset + N * increment, where N>=0 is integer.
2204
  If the function sets *first_value to ~(uint64_t)0 it means an error.
2205
  If the function sets *nb_reserved_values to ULONGLONG_MAX it means it has
2206
  reserved to "positive infinite".
2207
2208
  @param offset
2209
  @param increment
2210
  @param nb_desired_values   how many values we want
2211
  @param first_value         (OUT) the first value reserved by the handler
2212
  @param nb_reserved_values  (OUT) how many values the handler reserved
2213
*/
77.1.15 by Monty Taylor
Bunch of warning cleanups.
2214
void handler::get_auto_increment(uint64_t offset __attribute__((__unused__)),
2215
                                 uint64_t increment __attribute__((__unused__)),
2216
                                 uint64_t nb_desired_values __attribute__((__unused__)),
1 by brian
clean slate
2217
                                 uint64_t *first_value,
2218
                                 uint64_t *nb_reserved_values)
2219
{
2220
  uint64_t nr;
2221
  int error;
2222
2223
  (void) extra(HA_EXTRA_KEYREAD);
2224
  table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
2225
                                        table->read_set);
2226
  column_bitmaps_signal();
2227
  index_init(table->s->next_number_index, 1);
2228
  if (table->s->next_number_keypart == 0)
2229
  {						// Autoincrement at key-start
2230
    error=index_last(table->record[1]);
2231
    /*
2232
      MySQL implicitely assumes such method does locking (as MySQL decides to
2233
      use nr+increment without checking again with the handler, in
2234
      handler::update_auto_increment()), so reserves to infinite.
2235
    */
2236
    *nb_reserved_values= ULONGLONG_MAX;
2237
  }
2238
  else
2239
  {
2240
    uchar key[MAX_KEY_LENGTH];
2241
    key_copy(key, table->record[0],
2242
             table->key_info + table->s->next_number_index,
2243
             table->s->next_number_key_offset);
2244
    error= index_read_map(table->record[1], key,
2245
                          make_prev_keypart_map(table->s->next_number_keypart),
2246
                          HA_READ_PREFIX_LAST);
2247
    /*
2248
      MySQL needs to call us for next row: assume we are inserting ("a",null)
2249
      here, we return 3, and next this statement will want to insert
2250
      ("b",null): there is no reason why ("b",3+1) would be the good row to
2251
      insert: maybe it already exists, maybe 3+1 is too large...
2252
    */
2253
    *nb_reserved_values= 1;
2254
  }
2255
2256
  if (error)
2257
    nr=1;
2258
  else
2259
    nr= ((uint64_t) table->next_number_field->
2260
         val_int_offset(table->s->rec_buff_length)+1);
2261
  index_end();
2262
  (void) extra(HA_EXTRA_NO_KEYREAD);
2263
  *first_value= nr;
2264
}
2265
2266
2267
void handler::ha_release_auto_increment()
2268
{
2269
  release_auto_increment();
2270
  insert_id_for_cur_row= 0;
2271
  auto_inc_interval_for_cur_row.replace(0, 0, 0);
2272
  if (next_insert_id > 0)
2273
  {
2274
    next_insert_id= 0;
2275
    /*
2276
      this statement used forced auto_increment values if there were some,
2277
      wipe them away for other statements.
2278
    */
2279
    table->in_use->auto_inc_intervals_forced.empty();
2280
  }
2281
}
2282
2283
2284
void handler::print_keydup_error(uint key_nr, const char *msg)
2285
{
2286
  /* Write the duplicated key in the error message */
2287
  char key[MAX_KEY_LENGTH];
2288
  String str(key,sizeof(key),system_charset_info);
2289
2290
  if (key_nr == MAX_KEY)
2291
  {
2292
    /* Key is unknown */
2293
    str.copy("", 0, system_charset_info);
2294
    my_printf_error(ER_DUP_ENTRY, msg, MYF(0), str.c_ptr(), "*UNKNOWN*");
2295
  }
2296
  else
2297
  {
2298
    /* Table is opened and defined at this point */
2299
    key_unpack(&str,table,(uint) key_nr);
2300
    uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
2301
    if (str.length() >= max_length)
2302
    {
2303
      str.length(max_length-4);
2304
      str.append(STRING_WITH_LEN("..."));
2305
    }
2306
    my_printf_error(ER_DUP_ENTRY, msg,
2307
		    MYF(0), str.c_ptr(), table->key_info[key_nr].name);
2308
  }
2309
}
2310
2311
2312
/**
2313
  Print error that we got from handler function.
2314
2315
  @note
2316
    In case of delete table it's only safe to use the following parts of
2317
    the 'table' structure:
2318
    - table->s->path
2319
    - table->alias
2320
*/
2321
void handler::print_error(int error, myf errflag)
2322
{
2323
  int textno=ER_GET_ERRNO;
2324
  switch (error) {
2325
  case EACCES:
2326
    textno=ER_OPEN_AS_READONLY;
2327
    break;
2328
  case EAGAIN:
2329
    textno=ER_FILE_USED;
2330
    break;
2331
  case ENOENT:
2332
    textno=ER_FILE_NOT_FOUND;
2333
    break;
2334
  case HA_ERR_KEY_NOT_FOUND:
2335
  case HA_ERR_NO_ACTIVE_RECORD:
2336
  case HA_ERR_END_OF_FILE:
2337
    textno=ER_KEY_NOT_FOUND;
2338
    break;
2339
  case HA_ERR_WRONG_MRG_TABLE_DEF:
2340
    textno=ER_WRONG_MRG_TABLE;
2341
    break;
2342
  case HA_ERR_FOUND_DUPP_KEY:
2343
  {
2344
    uint key_nr=get_dup_key(error);
2345
    if ((int) key_nr >= 0)
2346
    {
2347
      print_keydup_error(key_nr, ER(ER_DUP_ENTRY_WITH_KEY_NAME));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2348
      return;
1 by brian
clean slate
2349
    }
2350
    textno=ER_DUP_KEY;
2351
    break;
2352
  }
2353
  case HA_ERR_FOREIGN_DUPLICATE_KEY:
2354
  {
2355
    uint key_nr= get_dup_key(error);
2356
    if ((int) key_nr >= 0)
2357
    {
2358
      uint max_length;
2359
      /* Write the key in the error message */
2360
      char key[MAX_KEY_LENGTH];
2361
      String str(key,sizeof(key),system_charset_info);
2362
      /* Table is opened and defined at this point */
2363
      key_unpack(&str,table,(uint) key_nr);
2364
      max_length= (MYSQL_ERRMSG_SIZE-
2365
                   (uint) strlen(ER(ER_FOREIGN_DUPLICATE_KEY)));
2366
      if (str.length() >= max_length)
2367
      {
2368
        str.length(max_length-4);
2369
        str.append(STRING_WITH_LEN("..."));
2370
      }
2371
      my_error(ER_FOREIGN_DUPLICATE_KEY, MYF(0), table_share->table_name.str,
2372
        str.c_ptr(), key_nr+1);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2373
      return;
1 by brian
clean slate
2374
    }
2375
    textno= ER_DUP_KEY;
2376
    break;
2377
  }
2378
  case HA_ERR_FOUND_DUPP_UNIQUE:
2379
    textno=ER_DUP_UNIQUE;
2380
    break;
2381
  case HA_ERR_RECORD_CHANGED:
2382
    textno=ER_CHECKREAD;
2383
    break;
2384
  case HA_ERR_CRASHED:
2385
    textno=ER_NOT_KEYFILE;
2386
    break;
2387
  case HA_ERR_WRONG_IN_RECORD:
2388
    textno= ER_CRASHED_ON_USAGE;
2389
    break;
2390
  case HA_ERR_CRASHED_ON_USAGE:
2391
    textno=ER_CRASHED_ON_USAGE;
2392
    break;
2393
  case HA_ERR_NOT_A_TABLE:
2394
    textno= error;
2395
    break;
2396
  case HA_ERR_CRASHED_ON_REPAIR:
2397
    textno=ER_CRASHED_ON_REPAIR;
2398
    break;
2399
  case HA_ERR_OUT_OF_MEM:
2400
    textno=ER_OUT_OF_RESOURCES;
2401
    break;
2402
  case HA_ERR_WRONG_COMMAND:
2403
    textno=ER_ILLEGAL_HA;
2404
    break;
2405
  case HA_ERR_OLD_FILE:
2406
    textno=ER_OLD_KEYFILE;
2407
    break;
2408
  case HA_ERR_UNSUPPORTED:
2409
    textno=ER_UNSUPPORTED_EXTENSION;
2410
    break;
2411
  case HA_ERR_RECORD_FILE_FULL:
2412
  case HA_ERR_INDEX_FILE_FULL:
2413
    textno=ER_RECORD_FILE_FULL;
2414
    break;
2415
  case HA_ERR_LOCK_WAIT_TIMEOUT:
2416
    textno=ER_LOCK_WAIT_TIMEOUT;
2417
    break;
2418
  case HA_ERR_LOCK_TABLE_FULL:
2419
    textno=ER_LOCK_TABLE_FULL;
2420
    break;
2421
  case HA_ERR_LOCK_DEADLOCK:
2422
    textno=ER_LOCK_DEADLOCK;
2423
    break;
2424
  case HA_ERR_READ_ONLY_TRANSACTION:
2425
    textno=ER_READ_ONLY_TRANSACTION;
2426
    break;
2427
  case HA_ERR_CANNOT_ADD_FOREIGN:
2428
    textno=ER_CANNOT_ADD_FOREIGN;
2429
    break;
2430
  case HA_ERR_ROW_IS_REFERENCED:
2431
  {
2432
    String str;
2433
    get_error_message(error, &str);
2434
    my_error(ER_ROW_IS_REFERENCED_2, MYF(0), str.c_ptr_safe());
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2435
    return;
1 by brian
clean slate
2436
  }
2437
  case HA_ERR_NO_REFERENCED_ROW:
2438
  {
2439
    String str;
2440
    get_error_message(error, &str);
2441
    my_error(ER_NO_REFERENCED_ROW_2, MYF(0), str.c_ptr_safe());
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2442
    return;
1 by brian
clean slate
2443
  }
2444
  case HA_ERR_TABLE_DEF_CHANGED:
2445
    textno=ER_TABLE_DEF_CHANGED;
2446
    break;
2447
  case HA_ERR_NO_SUCH_TABLE:
2448
    my_error(ER_NO_SUCH_TABLE, MYF(0), table_share->db.str,
2449
             table_share->table_name.str);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2450
    return;
1 by brian
clean slate
2451
  case HA_ERR_RBR_LOGGING_FAILED:
2452
    textno= ER_BINLOG_ROW_LOGGING_FAILED;
2453
    break;
2454
  case HA_ERR_DROP_INDEX_FK:
2455
  {
2456
    const char *ptr= "???";
2457
    uint key_nr= get_dup_key(error);
2458
    if ((int) key_nr >= 0)
2459
      ptr= table->key_info[key_nr].name;
2460
    my_error(ER_DROP_INDEX_FK, MYF(0), ptr);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2461
    return;
1 by brian
clean slate
2462
  }
2463
  case HA_ERR_TABLE_NEEDS_UPGRADE:
2464
    textno=ER_TABLE_NEEDS_UPGRADE;
2465
    break;
2466
  case HA_ERR_TABLE_READONLY:
2467
    textno= ER_OPEN_AS_READONLY;
2468
    break;
2469
  case HA_ERR_AUTOINC_READ_FAILED:
2470
    textno= ER_AUTOINC_READ_FAILED;
2471
    break;
2472
  case HA_ERR_AUTOINC_ERANGE:
2473
    textno= ER_WARN_DATA_OUT_OF_RANGE;
2474
    break;
2475
  case HA_ERR_LOCK_OR_ACTIVE_TRANSACTION:
2476
    my_message(ER_LOCK_OR_ACTIVE_TRANSACTION,
2477
               ER(ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2478
    return;
1 by brian
clean slate
2479
    break;
2480
  default:
2481
    {
2482
      /* The error was "unknown" to this function.
2483
	 Ask handler if it has got a message for this error */
56 by brian
Next pass of true/false update.
2484
      bool temporary= false;
1 by brian
clean slate
2485
      String str;
2486
      temporary= get_error_message(error, &str);
2487
      if (!str.is_empty())
2488
      {
2489
	const char* engine= table_type();
2490
	if (temporary)
2491
	  my_error(ER_GET_TEMPORARY_ERRMSG, MYF(0), error, str.ptr(), engine);
2492
	else
2493
	  my_error(ER_GET_ERRMSG, MYF(0), error, str.ptr(), engine);
2494
      }
2495
      else
2496
	my_error(ER_GET_ERRNO,errflag,error);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2497
      return;
1 by brian
clean slate
2498
    }
2499
  }
2500
  my_error(textno, errflag, table_share->table_name.str, error);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2501
  return;
1 by brian
clean slate
2502
}
2503
2504
2505
/**
2506
  Return an error message specific to this handler.
2507
2508
  @param error  error code previously returned by handler
2509
  @param buf    pointer to String where to add error message
2510
2511
  @return
2512
    Returns true if this is a temporary error
2513
*/
77.1.15 by Monty Taylor
Bunch of warning cleanups.
2514
bool handler::get_error_message(int error __attribute__((__unused__)),
2515
                                String* buf __attribute__((__unused__)))
1 by brian
clean slate
2516
{
56 by brian
Next pass of true/false update.
2517
  return false;
1 by brian
clean slate
2518
}
2519
2520
2521
int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
2522
{
2523
  KEY *keyinfo, *keyend;
2524
  KEY_PART_INFO *keypart, *keypartend;
2525
2526
  if (!table->s->mysql_version)
2527
  {
2528
    /* check for blob-in-key error */
2529
    keyinfo= table->key_info;
2530
    keyend= table->key_info + table->s->keys;
2531
    for (; keyinfo < keyend; keyinfo++)
2532
    {
2533
      keypart= keyinfo->key_part;
2534
      keypartend= keypart + keyinfo->key_parts;
2535
      for (; keypart < keypartend; keypart++)
2536
      {
2537
        if (!keypart->fieldnr)
2538
          continue;
2539
        Field *field= table->field[keypart->fieldnr-1];
2540
        if (field->type() == MYSQL_TYPE_BLOB)
2541
        {
2542
          if (check_opt->sql_flags & TT_FOR_UPGRADE)
2543
            check_opt->flags= T_MEDIUM;
2544
          return HA_ADMIN_NEEDS_CHECK;
2545
        }
2546
      }
2547
    }
2548
  }
2549
  return check_for_upgrade(check_opt);
2550
}
2551
2552
2553
/* Code left, but Drizzle has no legacy yet (while MySQL did) */
2554
int handler::check_old_types()
2555
{
2556
  return 0;
2557
}
2558
2559
2560
static bool update_frm_version(TABLE *table)
2561
{
2562
  char path[FN_REFLEN];
2563
  File file;
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2564
  bool result= true;
1 by brian
clean slate
2565
2566
  /*
2567
    No need to update frm version in case table was created or checked
2568
    by server with the same version. This also ensures that we do not
2569
    update frm version for temporary tables as this code doesn't support
2570
    temporary tables.
2571
  */
2572
  if (table->s->mysql_version == MYSQL_VERSION_ID)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2573
    return(0);
1 by brian
clean slate
2574
2575
  strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
2576
2577
  if ((file= my_open(path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
2578
  {
2579
    uchar version[4];
2580
    char *key= table->s->table_cache_key.str;
2581
    uint key_length= table->s->table_cache_key.length;
2582
    TABLE *entry;
2583
    HASH_SEARCH_STATE state;
2584
2585
    int4store(version, MYSQL_VERSION_ID);
2586
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2587
    if (pwrite(file, (uchar*)version, 4, 51L) == 0)
2588
    {
2589
      result= false;
1 by brian
clean slate
2590
      goto err;
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2591
    }
1 by brian
clean slate
2592
2593
    for (entry=(TABLE*) hash_first(&open_cache,(uchar*) key,key_length, &state);
2594
         entry;
2595
         entry= (TABLE*) hash_next(&open_cache,(uchar*) key,key_length, &state))
2596
      entry->s->mysql_version= MYSQL_VERSION_ID;
2597
  }
2598
err:
2599
  if (file >= 0)
2600
    VOID(my_close(file,MYF(MY_WME)));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2601
  return(result);
1 by brian
clean slate
2602
}
2603
2604
2605
2606
/**
2607
  @return
2608
    key if error because of duplicated keys
2609
*/
2610
uint handler::get_dup_key(int error)
2611
{
2612
  table->file->errkey  = (uint) -1;
2613
  if (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOREIGN_DUPLICATE_KEY ||
2614
      error == HA_ERR_FOUND_DUPP_UNIQUE ||
2615
      error == HA_ERR_DROP_INDEX_FK)
2616
    info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2617
  return(table->file->errkey);
1 by brian
clean slate
2618
}
2619
2620
2621
/**
2622
  Delete all files with extension from bas_ext().
2623
2624
  @param name		Base name of table
2625
2626
  @note
2627
    We assume that the handler may return more extensions than
2628
    was actually used for the file.
2629
2630
  @retval
2631
    0   If we successfully deleted at least one file from base_ext and
2632
    didn't get any other errors than ENOENT
2633
  @retval
2634
    !0  Error
2635
*/
2636
int handler::delete_table(const char *name)
2637
{
2638
  int error= 0;
2639
  int enoent_or_zero= ENOENT;                   // Error if no file was deleted
2640
  char buff[FN_REFLEN];
2641
2642
  for (const char **ext=bas_ext(); *ext ; ext++)
2643
  {
2644
    fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT);
2645
    if (my_delete_with_symlink(buff, MYF(0)))
2646
    {
2647
      if ((error= my_errno) != ENOENT)
2648
	break;
2649
    }
2650
    else
2651
      enoent_or_zero= 0;                        // No error for ENOENT
2652
    error= enoent_or_zero;
2653
  }
2654
  return error;
2655
}
2656
2657
2658
int handler::rename_table(const char * from, const char * to)
2659
{
2660
  int error= 0;
2661
  for (const char **ext= bas_ext(); *ext ; ext++)
2662
  {
2663
    if (rename_file_ext(from, to, *ext))
2664
    {
2665
      if ((error=my_errno) != ENOENT)
2666
	break;
2667
      error= 0;
2668
    }
2669
  }
2670
  return error;
2671
}
2672
2673
2674
void handler::drop_table(const char *name)
2675
{
2676
  close();
2677
  delete_table(name);
2678
}
2679
2680
2681
/**
2682
  Performs checks upon the table.
2683
2684
  @param thd                thread doing CHECK TABLE operation
2685
  @param check_opt          options from the parser
2686
2687
  @retval
2688
    HA_ADMIN_OK               Successful upgrade
2689
  @retval
2690
    HA_ADMIN_NEEDS_UPGRADE    Table has structures requiring upgrade
2691
  @retval
2692
    HA_ADMIN_NEEDS_ALTER      Table has structures requiring ALTER TABLE
2693
  @retval
2694
    HA_ADMIN_NOT_IMPLEMENTED
2695
*/
2696
int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
2697
{
2698
  int error;
2699
2700
  if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
2701
      (check_opt->sql_flags & TT_FOR_UPGRADE))
2702
    return 0;
2703
2704
  if (table->s->mysql_version < MYSQL_VERSION_ID)
2705
  {
2706
    if ((error= check_old_types()))
2707
      return error;
2708
    error= ha_check_for_upgrade(check_opt);
2709
    if (error && (error != HA_ADMIN_NEEDS_CHECK))
2710
      return error;
2711
    if (!error && (check_opt->sql_flags & TT_FOR_UPGRADE))
2712
      return 0;
2713
  }
2714
  if ((error= check(thd, check_opt)))
2715
    return error;
2716
  return update_frm_version(table);
2717
}
2718
2719
/**
2720
  A helper function to mark a transaction read-write,
2721
  if it is started.
2722
*/
2723
2724
inline
2725
void
2726
handler::mark_trx_read_write()
2727
{
2728
  Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
2729
  /*
2730
    When a storage engine method is called, the transaction must
2731
    have been started, unless it's a DDL call, for which the
2732
    storage engine starts the transaction internally, and commits
2733
    it internally, without registering in the ha_list.
2734
    Unfortunately here we can't know know for sure if the engine
2735
    has registered the transaction or not, so we must check.
2736
  */
2737
  if (ha_info->is_started())
2738
  {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2739
    assert(has_transactions());
1 by brian
clean slate
2740
    /*
2741
      table_share can be NULL in ha_delete_table(). See implementation
2742
      of standalone function ha_delete_table() in sql_base.cc.
2743
    */
2744
    if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
2745
      ha_info->set_trx_read_write();
2746
  }
2747
}
2748
2749
2750
/**
2751
  Repair table: public interface.
2752
2753
  @sa handler::repair()
2754
*/
2755
2756
int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
2757
{
2758
  int result;
2759
2760
  mark_trx_read_write();
2761
2762
  if ((result= repair(thd, check_opt)))
2763
    return result;
2764
  return update_frm_version(table);
2765
}
2766
2767
2768
/**
2769
  Bulk update row: public interface.
2770
2771
  @sa handler::bulk_update_row()
2772
*/
2773
2774
int
2775
handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
2776
                            uint *dup_key_found)
2777
{
2778
  mark_trx_read_write();
2779
2780
  return bulk_update_row(old_data, new_data, dup_key_found);
2781
}
2782
2783
2784
/**
2785
  Delete all rows: public interface.
2786
2787
  @sa handler::delete_all_rows()
2788
*/
2789
2790
int
2791
handler::ha_delete_all_rows()
2792
{
2793
  mark_trx_read_write();
2794
2795
  return delete_all_rows();
2796
}
2797
2798
2799
/**
2800
  Reset auto increment: public interface.
2801
2802
  @sa handler::reset_auto_increment()
2803
*/
2804
2805
int
2806
handler::ha_reset_auto_increment(uint64_t value)
2807
{
2808
  mark_trx_read_write();
2809
2810
  return reset_auto_increment(value);
2811
}
2812
2813
2814
/**
2815
  Optimize table: public interface.
2816
2817
  @sa handler::optimize()
2818
*/
2819
2820
int
2821
handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
2822
{
2823
  mark_trx_read_write();
2824
2825
  return optimize(thd, check_opt);
2826
}
2827
2828
2829
/**
2830
  Analyze table: public interface.
2831
2832
  @sa handler::analyze()
2833
*/
2834
2835
int
2836
handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
2837
{
2838
  mark_trx_read_write();
2839
2840
  return analyze(thd, check_opt);
2841
}
2842
2843
2844
/**
2845
  Check and repair table: public interface.
2846
2847
  @sa handler::check_and_repair()
2848
*/
2849
2850
bool
2851
handler::ha_check_and_repair(THD *thd)
2852
{
2853
  mark_trx_read_write();
2854
2855
  return check_and_repair(thd);
2856
}
2857
2858
2859
/**
2860
  Disable indexes: public interface.
2861
2862
  @sa handler::disable_indexes()
2863
*/
2864
2865
int
2866
handler::ha_disable_indexes(uint mode)
2867
{
2868
  mark_trx_read_write();
2869
2870
  return disable_indexes(mode);
2871
}
2872
2873
2874
/**
2875
  Enable indexes: public interface.
2876
2877
  @sa handler::enable_indexes()
2878
*/
2879
2880
int
2881
handler::ha_enable_indexes(uint mode)
2882
{
2883
  mark_trx_read_write();
2884
2885
  return enable_indexes(mode);
2886
}
2887
2888
2889
/**
2890
  Discard or import tablespace: public interface.
2891
2892
  @sa handler::discard_or_import_tablespace()
2893
*/
2894
2895
int
2896
handler::ha_discard_or_import_tablespace(my_bool discard)
2897
{
2898
  mark_trx_read_write();
2899
2900
  return discard_or_import_tablespace(discard);
2901
}
2902
2903
2904
/**
2905
  Prepare for alter: public interface.
2906
2907
  Called to prepare an *online* ALTER.
2908
2909
  @sa handler::prepare_for_alter()
2910
*/
2911
2912
void
2913
handler::ha_prepare_for_alter()
2914
{
2915
  mark_trx_read_write();
2916
2917
  prepare_for_alter();
2918
}
2919
2920
2921
/**
2922
  Rename table: public interface.
2923
2924
  @sa handler::rename_table()
2925
*/
2926
2927
int
2928
handler::ha_rename_table(const char *from, const char *to)
2929
{
2930
  mark_trx_read_write();
2931
2932
  return rename_table(from, to);
2933
}
2934
2935
2936
/**
2937
  Delete table: public interface.
2938
2939
  @sa handler::delete_table()
2940
*/
2941
2942
int
2943
handler::ha_delete_table(const char *name)
2944
{
2945
  mark_trx_read_write();
2946
2947
  return delete_table(name);
2948
}
2949
2950
2951
/**
2952
  Drop table in the engine: public interface.
2953
2954
  @sa handler::drop_table()
2955
*/
2956
2957
void
2958
handler::ha_drop_table(const char *name)
2959
{
2960
  mark_trx_read_write();
2961
2962
  return drop_table(name);
2963
}
2964
2965
2966
/**
2967
  Create a table in the engine: public interface.
2968
2969
  @sa handler::create()
2970
*/
2971
2972
int
2973
handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info)
2974
{
2975
  mark_trx_read_write();
2976
2977
  return create(name, form, info);
2978
}
2979
2980
2981
/**
2982
  Create handler files for CREATE TABLE: public interface.
2983
2984
  @sa handler::create_handler_files()
2985
*/
2986
2987
int
2988
handler::ha_create_handler_files(const char *name, const char *old_name,
2989
                        int action_flag, HA_CREATE_INFO *info)
2990
{
2991
  mark_trx_read_write();
2992
2993
  return create_handler_files(name, old_name, action_flag, info);
2994
}
2995
2996
2997
/**
2998
  Tell the storage engine that it is allowed to "disable transaction" in the
2999
  handler. It is a hint that ACID is not required - it is used in NDB for
3000
  ALTER TABLE, for example, when data are copied to temporary table.
3001
  A storage engine may treat this hint any way it likes. NDB for example
3002
  starts to commit every now and then automatically.
3003
  This hint can be safely ignored.
3004
*/
3005
int ha_enable_transaction(THD *thd, bool on)
3006
{
3007
  int error=0;
3008
3009
  if ((thd->transaction.on= on))
3010
  {
3011
    /*
3012
      Now all storage engines should have transaction handling enabled.
3013
      But some may have it enabled all the time - "disabling" transactions
3014
      is an optimization hint that storage engine is free to ignore.
3015
      So, let's commit an open transaction (if any) now.
3016
    */
3017
    if (!(error= ha_commit_trans(thd, 0)))
3018
      error= end_trans(thd, COMMIT);
3019
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3020
  return(error);
1 by brian
clean slate
3021
}
3022
3023
int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
3024
{
3025
  int error;
3026
  if (!(error=index_next(buf)))
3027
  {
3028
    my_ptrdiff_t ptrdiff= buf - table->record[0];
3029
    uchar *save_record_0= NULL;
3030
    KEY *key_info= NULL;
3031
    KEY_PART_INFO *key_part;
3032
    KEY_PART_INFO *key_part_end= NULL;
3033
3034
    /*
3035
      key_cmp_if_same() compares table->record[0] against 'key'.
3036
      In parts it uses table->record[0] directly, in parts it uses
3037
      field objects with their local pointers into table->record[0].
3038
      If 'buf' is distinct from table->record[0], we need to move
3039
      all record references. This is table->record[0] itself and
3040
      the field pointers of the fields used in this key.
3041
    */
3042
    if (ptrdiff)
3043
    {
3044
      save_record_0= table->record[0];
3045
      table->record[0]= buf;
3046
      key_info= table->key_info + active_index;
3047
      key_part= key_info->key_part;
3048
      key_part_end= key_part + key_info->key_parts;
3049
      for (; key_part < key_part_end; key_part++)
3050
      {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3051
        assert(key_part->field);
1 by brian
clean slate
3052
        key_part->field->move_field_offset(ptrdiff);
3053
      }
3054
    }
3055
3056
    if (key_cmp_if_same(table, key, active_index, keylen))
3057
    {
3058
      table->status=STATUS_NOT_FOUND;
3059
      error=HA_ERR_END_OF_FILE;
3060
    }
3061
3062
    /* Move back if necessary. */
3063
    if (ptrdiff)
3064
    {
3065
      table->record[0]= save_record_0;
3066
      for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
3067
        key_part->field->move_field_offset(-ptrdiff);
3068
    }
3069
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3070
  return(error);
1 by brian
clean slate
3071
}
3072
3073
3074
/****************************************************************************
3075
** Some general functions that isn't in the handler class
3076
****************************************************************************/
3077
3078
/**
3079
  Initiates table-file and calls appropriate database-creator.
3080
3081
  @retval
3082
   0  ok
3083
  @retval
3084
   1  error
3085
*/
3086
int ha_create_table(THD *thd, const char *path,
3087
                    const char *db, const char *table_name,
3088
                    HA_CREATE_INFO *create_info,
3089
		    bool update_create_info)
3090
{
3091
  int error= 1;
3092
  TABLE table;
3093
  char name_buff[FN_REFLEN];
3094
  const char *name;
3095
  TABLE_SHARE share;
3096
  
3097
  init_tmp_table_share(thd, &share, db, 0, table_name, path);
3098
  if (open_table_def(thd, &share, 0) ||
3099
      open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
3100
                            OTM_CREATE))
3101
    goto err;
3102
3103
  if (update_create_info)
3104
    update_create_info_from_table(create_info, &table);
3105
3106
  name= check_lowercase_names(table.file, share.path.str, name_buff);
3107
3108
  error= table.file->ha_create(name, &table, create_info);
3109
  VOID(closefrm(&table, 0));
3110
  if (error)
3111
  {
3112
    strxmov(name_buff, db, ".", table_name, NullS);
3113
    my_error(ER_CANT_CREATE_TABLE, MYF(ME_BELL+ME_WAITTANG), name_buff, error);
3114
  }
3115
err:
3116
  free_table_share(&share);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3117
  return(error != 0);
1 by brian
clean slate
3118
}
3119
3120
/**
3121
  Try to discover table from engine.
3122
3123
  @note
3124
    If found, write the frm file to disk.
3125
3126
  @retval
3127
  -1    Table did not exists
3128
  @retval
3129
   0    Table created ok
3130
  @retval
3131
   > 0  Error, table existed but could not be created
3132
*/
3133
int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
3134
{
3135
  int error;
3136
  uchar *frmblob;
3137
  size_t frmlen;
3138
  char path[FN_REFLEN];
3139
  HA_CREATE_INFO create_info;
3140
  TABLE table;
3141
  TABLE_SHARE share;
3142
3143
  bzero((uchar*) &create_info,sizeof(create_info));
3144
  if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
3145
  {
3146
    /* Table could not be discovered and thus not created */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3147
    return(error);
1 by brian
clean slate
3148
  }
3149
3150
  /*
3151
    Table exists in handler and could be discovered
3152
    frmblob and frmlen are set, write the frm to disk
3153
  */
3154
3155
  build_table_filename(path, FN_REFLEN-1, db, name, "", 0);
3156
  // Save the frm file
3157
  error= writefrm(path, frmblob, frmlen);
3158
  my_free(frmblob, MYF(0));
3159
  if (error)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3160
    return(2);
1 by brian
clean slate
3161
3162
  init_tmp_table_share(thd, &share, db, 0, name, path);
3163
  if (open_table_def(thd, &share, 0))
3164
  {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3165
    return(3);
1 by brian
clean slate
3166
  }
3167
  if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, OTM_OPEN))
3168
  {
3169
    free_table_share(&share);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3170
    return(3);
1 by brian
clean slate
3171
  }
3172
3173
  update_create_info_from_table(&create_info, &table);
3174
  create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
3175
3176
  check_lowercase_names(table.file, path, path);
3177
  error=table.file->ha_create(path, &table, &create_info);
3178
  VOID(closefrm(&table, 1));
3179
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3180
  return(error != 0);
1 by brian
clean slate
3181
}
3182
3183
void st_ha_check_opt::init()
3184
{
3185
  flags= sql_flags= 0;
3186
  sort_buffer_size = current_thd->variables.myisam_sort_buff_size;
3187
}
3188
3189
3190
/*****************************************************************************
3191
  Key cache handling.
3192
3193
  This code is only relevant for ISAM/MyISAM tables
3194
3195
  key_cache->cache may be 0 only in the case where a key cache is not
3196
  initialized or when we where not able to init the key cache in a previous
3197
  call to ha_init_key_cache() (probably out of memory)
3198
*****************************************************************************/
3199
3200
/**
3201
  Init a key cache if it has not been initied before.
3202
*/
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3203
int ha_init_key_cache(const char *name __attribute__((__unused__)),
3204
                      KEY_CACHE *key_cache)
1 by brian
clean slate
3205
{
3206
  if (!key_cache->key_cache_inited)
3207
  {
3208
    pthread_mutex_lock(&LOCK_global_system_variables);
61 by Brian Aker
Conversion of handler type.
3209
    uint32_t tmp_buff_size= (uint32_t) key_cache->param_buff_size;
1 by brian
clean slate
3210
    uint tmp_block_size= (uint) key_cache->param_block_size;
3211
    uint division_limit= key_cache->param_division_limit;
3212
    uint age_threshold=  key_cache->param_age_threshold;
3213
    pthread_mutex_unlock(&LOCK_global_system_variables);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3214
    return(!init_key_cache(key_cache,
1 by brian
clean slate
3215
				tmp_block_size,
3216
				tmp_buff_size,
3217
				division_limit, age_threshold));
3218
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3219
  return(0);
1 by brian
clean slate
3220
}
3221
3222
3223
/**
3224
  Resize key cache.
3225
*/
3226
int ha_resize_key_cache(KEY_CACHE *key_cache)
3227
{
3228
  if (key_cache->key_cache_inited)
3229
  {
3230
    pthread_mutex_lock(&LOCK_global_system_variables);
3231
    long tmp_buff_size= (long) key_cache->param_buff_size;
3232
    long tmp_block_size= (long) key_cache->param_block_size;
3233
    uint division_limit= key_cache->param_division_limit;
3234
    uint age_threshold=  key_cache->param_age_threshold;
3235
    pthread_mutex_unlock(&LOCK_global_system_variables);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3236
    return(!resize_key_cache(key_cache, tmp_block_size,
1 by brian
clean slate
3237
				  tmp_buff_size,
3238
				  division_limit, age_threshold));
3239
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3240
  return(0);
1 by brian
clean slate
3241
}
3242
3243
3244
/**
3245
  Change parameters for key cache (like size)
3246
*/
3247
int ha_change_key_cache_param(KEY_CACHE *key_cache)
3248
{
3249
  if (key_cache->key_cache_inited)
3250
  {
3251
    pthread_mutex_lock(&LOCK_global_system_variables);
3252
    uint division_limit= key_cache->param_division_limit;
3253
    uint age_threshold=  key_cache->param_age_threshold;
3254
    pthread_mutex_unlock(&LOCK_global_system_variables);
3255
    change_key_cache_param(key_cache, division_limit, age_threshold);
3256
  }
3257
  return 0;
3258
}
3259
3260
/**
3261
  Free memory allocated by a key cache.
3262
*/
3263
int ha_end_key_cache(KEY_CACHE *key_cache)
3264
{
3265
  end_key_cache(key_cache, 1);		// Can never fail
3266
  return 0;
3267
}
3268
3269
/**
3270
  Move all tables from one key cache to another one.
3271
*/
3272
int ha_change_key_cache(KEY_CACHE *old_key_cache,
3273
			KEY_CACHE *new_key_cache)
3274
{
3275
  mi_change_key_cache(old_key_cache, new_key_cache);
3276
  return 0;
3277
}
3278
3279
3280
/**
3281
  Try to discover one table from handler(s).
3282
3283
  @retval
3284
    -1   Table did not exists
3285
  @retval
3286
    0   OK. In this case *frmblob and *frmlen are set
3287
  @retval
3288
    >0   error.  frmblob and frmlen may not be set
3289
*/
3290
struct st_discover_args
3291
{
3292
  const char *db;
3293
  const char *name;
3294
  uchar **frmblob; 
3295
  size_t *frmlen;
3296
};
3297
3298
static my_bool discover_handlerton(THD *thd, plugin_ref plugin,
3299
                                   void *arg)
3300
{
3301
  st_discover_args *vargs= (st_discover_args *)arg;
3302
  handlerton *hton= plugin_data(plugin, handlerton *);
3303
  if (hton->state == SHOW_OPTION_YES && hton->discover &&
3304
      (!(hton->discover(hton, thd, vargs->db, vargs->name, 
3305
                        vargs->frmblob, 
3306
                        vargs->frmlen))))
56 by brian
Next pass of true/false update.
3307
    return true;
1 by brian
clean slate
3308
56 by brian
Next pass of true/false update.
3309
  return false;
1 by brian
clean slate
3310
}
3311
3312
int ha_discover(THD *thd, const char *db, const char *name,
3313
		uchar **frmblob, size_t *frmlen)
3314
{
3315
  int error= -1; // Table does not exist in any handler
3316
  st_discover_args args= {db, name, frmblob, frmlen};
3317
3318
  if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3319
    return(error);
1 by brian
clean slate
3320
3321
  if (plugin_foreach(thd, discover_handlerton,
3322
                 MYSQL_STORAGE_ENGINE_PLUGIN, &args))
3323
    error= 0;
3324
3325
  if (!error)
3326
    status_var_increment(thd->status_var.ha_discover_count);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3327
  return(error);
1 by brian
clean slate
3328
}
3329
3330
3331
/**
3332
  Call this function in order to give the handler the possiblity
3333
  to ask engine if there are any new tables that should be written to disk
3334
  or any dropped tables that need to be removed from disk
3335
*/
3336
struct st_find_files_args
3337
{
3338
  const char *db;
3339
  const char *path;
3340
  const char *wild;
3341
  bool dir;
3342
  List<LEX_STRING> *files;
3343
};
3344
3345
/**
3346
  Ask handler if the table exists in engine.
3347
  @retval
3348
    HA_ERR_NO_SUCH_TABLE     Table does not exist
3349
  @retval
3350
    HA_ERR_TABLE_EXIST       Table exists
3351
  @retval
3352
    \#                  Error code
3353
*/
3354
struct st_table_exists_in_engine_args
3355
{
3356
  const char *db;
3357
  const char *name;
3358
  int err;
3359
};
3360
3361
static my_bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
3362
                                   void *arg)
3363
{
3364
  st_table_exists_in_engine_args *vargs= (st_table_exists_in_engine_args *)arg;
3365
  handlerton *hton= plugin_data(plugin, handlerton *);
3366
3367
  int err= HA_ERR_NO_SUCH_TABLE;
3368
3369
  if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
3370
    err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
3371
3372
  vargs->err = err;
3373
  if (vargs->err == HA_ERR_TABLE_EXIST)
56 by brian
Next pass of true/false update.
3374
    return true;
1 by brian
clean slate
3375
56 by brian
Next pass of true/false update.
3376
  return false;
1 by brian
clean slate
3377
}
3378
3379
int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
3380
{
3381
  st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
3382
  plugin_foreach(thd, table_exists_in_engine_handlerton,
3383
                 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3384
  return(args.err);
1 by brian
clean slate
3385
}
3386
3387
/**
3388
  Calculate cost of 'index only' scan for given index and number of records
3389
3390
  @param keynr    Index number
3391
  @param records  Estimated number of records to be retrieved
3392
3393
  @note
3394
    It is assumed that we will read trough the whole key range and that all
3395
    key blocks are half full (normally things are much better). It is also
3396
    assumed that each time we read the next key from the index, the handler
3397
    performs a random seek, thus the cost is proportional to the number of
3398
    blocks read.
3399
3400
  @todo
3401
    Consider joining this function and handler::read_time() into one
3402
    handler::read_time(keynr, records, ranges, bool index_only) function.
3403
3404
  @return
3405
    Estimated cost of 'index only' scan
3406
*/
3407
3408
double handler::index_only_read_time(uint keynr, double records)
3409
{
3410
  double read_time;
3411
  uint keys_per_block= (stats.block_size/2/
3412
			(table->key_info[keynr].key_length + ref_length) + 1);
3413
  read_time=((double) (records + keys_per_block-1) /
3414
             (double) keys_per_block);
3415
  return read_time;
3416
}
3417
3418
3419
/****************************************************************************
3420
 * Default MRR implementation (MRR to non-MRR converter)
3421
 ***************************************************************************/
3422
3423
/**
3424
  Get cost and other information about MRR scan over a known list of ranges
3425
3426
  Calculate estimated cost and other information about an MRR scan for given
3427
  sequence of ranges.
3428
3429
  @param keyno           Index number
3430
  @param seq             Range sequence to be traversed
3431
  @param seq_init_param  First parameter for seq->init()
3432
  @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
3433
                         can't efficiently determine it
3434
  @param bufsz    INOUT  IN:  Size of the buffer available for use
3435
                         OUT: Size of the buffer that is expected to be actually
3436
                              used, or 0 if buffer is not needed.
3437
  @param flags    INOUT  A combination of HA_MRR_* flags
3438
  @param cost     OUT    Estimated cost of MRR access
3439
3440
  @note
3441
    This method (or an overriding one in a derived class) must check for
3442
    thd->killed and return HA_POS_ERROR if it is not zero. This is required
3443
    for a user to be able to interrupt the calculation by killing the
3444
    connection/query.
3445
3446
  @retval
3447
    HA_POS_ERROR  Error or the engine is unable to perform the requested
3448
                  scan. Values of OUT parameters are undefined.
3449
  @retval
3450
    other         OK, *cost contains cost of the scan, *bufsz and *flags
3451
                  contain scan parameters.
3452
*/
3453
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3454
ha_rows
1 by brian
clean slate
3455
handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3456
                                     void *seq_init_param,
3457
                                     uint n_ranges_arg __attribute__((__unused__)),
1 by brian
clean slate
3458
                                     uint *bufsz, uint *flags, COST_VECT *cost)
3459
{
3460
  KEY_MULTI_RANGE range;
3461
  range_seq_t seq_it;
3462
  ha_rows rows, total_rows= 0;
3463
  uint n_ranges=0;
3464
  THD *thd= current_thd;
3465
  
3466
  /* Default MRR implementation doesn't need buffer */
3467
  *bufsz= 0;
3468
3469
  seq_it= seq->init(seq_init_param, n_ranges, *flags);
3470
  while (!seq->next(seq_it, &range))
3471
  {
3472
    if (unlikely(thd->killed != 0))
3473
      return HA_POS_ERROR;
3474
    
3475
    n_ranges++;
3476
    key_range *min_endp, *max_endp;
3477
    {
3478
      min_endp= range.start_key.length? &range.start_key : NULL;
3479
      max_endp= range.end_key.length? &range.end_key : NULL;
3480
    }
3481
    if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
3482
      rows= 1; /* there can be at most one row */
3483
    else
3484
    {
3485
      if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, 
3486
                                                        max_endp)))
3487
      {
3488
        /* Can't scan one range => can't do MRR scan at all */
3489
        total_rows= HA_POS_ERROR;
3490
        break;
3491
      }
3492
    }
3493
    total_rows += rows;
3494
  }
3495
  
3496
  if (total_rows != HA_POS_ERROR)
3497
  {
3498
    /* The following calculation is the same as in multi_range_read_info(): */
3499
    *flags |= HA_MRR_USE_DEFAULT_IMPL;
3500
    cost->zero();
3501
    cost->avg_io_cost= 1; /* assume random seeks */
3502
    if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
3503
      cost->io_count= index_only_read_time(keyno, (uint)total_rows);
3504
    else
3505
      cost->io_count= read_time(keyno, n_ranges, total_rows);
3506
    cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
3507
  }
3508
  return total_rows;
3509
}
3510
3511
3512
/**
3513
  Get cost and other information about MRR scan over some sequence of ranges
3514
3515
  Calculate estimated cost and other information about an MRR scan for some
3516
  sequence of ranges.
3517
3518
  The ranges themselves will be known only at execution phase. When this
3519
  function is called we only know number of ranges and a (rough) E(#records)
3520
  within those ranges.
3521
3522
  Currently this function is only called for "n-keypart singlepoint" ranges,
3523
  i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
3524
3525
  The flags parameter is a combination of those flags: HA_MRR_SORTED,
3526
  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
3527
3528
  @param keyno           Index number
3529
  @param n_ranges        Estimated number of ranges (i.e. intervals) in the
3530
                         range sequence.
3531
  @param n_rows          Estimated total number of records contained within all
3532
                         of the ranges
3533
  @param bufsz    INOUT  IN:  Size of the buffer available for use
3534
                         OUT: Size of the buffer that will be actually used, or
3535
                              0 if buffer is not needed.
3536
  @param flags    INOUT  A combination of HA_MRR_* flags
3537
  @param cost     OUT    Estimated cost of MRR access
3538
3539
  @retval
3540
    0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
3541
          parameters.
3542
  @retval
3543
    other Error or can't perform the requested scan
3544
*/
3545
3546
int handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
3547
                                   uint *bufsz, uint *flags, COST_VECT *cost)
3548
{
3549
  *bufsz= 0; /* Default implementation doesn't need a buffer */
3550
3551
  *flags |= HA_MRR_USE_DEFAULT_IMPL;
3552
3553
  cost->zero();
3554
  cost->avg_io_cost= 1; /* assume random seeks */
3555
3556
  /* Produce the same cost as non-MRR code does */
3557
  if (*flags & HA_MRR_INDEX_ONLY)
3558
    cost->io_count= index_only_read_time(keyno, n_rows);
3559
  else
3560
    cost->io_count= read_time(keyno, n_ranges, n_rows);
3561
  return 0;
3562
}
3563
3564
3565
/**
3566
  Initialize the MRR scan
3567
3568
  Initialize the MRR scan. This function may do heavyweight scan 
3569
  initialization like row prefetching/sorting/etc (NOTE: but better not do
3570
  it here as we may not need it, e.g. if we never satisfy WHERE clause on
3571
  previous tables. For many implementations it would be natural to do such
3572
  initializations in the first multi_read_range_next() call)
3573
3574
  mode is a combination of the following flags: HA_MRR_SORTED,
3575
  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION 
3576
3577
  @param seq             Range sequence to be traversed
3578
  @param seq_init_param  First parameter for seq->init()
3579
  @param n_ranges        Number of ranges in the sequence
3580
  @param mode            Flags, see the description section for the details
3581
  @param buf             INOUT: memory buffer to be used
3582
3583
  @note
3584
    One must have called index_init() before calling this function. Several
3585
    multi_range_read_init() calls may be made in course of one query.
3586
3587
    Until WL#2623 is done (see its text, section 3.2), the following will 
3588
    also hold:
3589
    The caller will guarantee that if "seq->init == mrr_ranges_array_init"
3590
    then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
3591
    This property will only be used by NDB handler until WL#2623 is done.
3592
     
3593
    Buffer memory management is done according to the following scenario:
3594
    The caller allocates the buffer and provides it to the callee by filling
3595
    the members of HANDLER_BUFFER structure.
3596
    The callee consumes all or some fraction of the provided buffer space, and
3597
    sets the HANDLER_BUFFER members accordingly.
3598
    The callee may use the buffer memory until the next multi_range_read_init()
3599
    call is made, all records have been read, or until index_end() call is
3600
    made, whichever comes first.
3601
3602
  @retval 0  OK
3603
  @retval 1  Error
3604
*/
3605
3606
int
3607
handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3608
                               uint n_ranges, uint mode,
3609
                               HANDLER_BUFFER *buf __attribute__((__unused__)))
1 by brian
clean slate
3610
{
3611
  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
3612
  mrr_funcs= *seq_funcs;
3613
  mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
56 by brian
Next pass of true/false update.
3614
  mrr_have_range= false;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3615
  return(0);
1 by brian
clean slate
3616
}
3617
3618
3619
/**
3620
  Get next record in MRR scan
3621
3622
  Default MRR implementation: read the next record
3623
3624
  @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
3625
                          Otherwise, the opaque value associated with the range
3626
                          that contains the returned record.
3627
3628
  @retval 0      OK
3629
  @retval other  Error code
3630
*/
3631
3632
int handler::multi_range_read_next(char **range_info)
3633
{
3634
  int result= 0;
3635
  int range_res;
3636
3637
  if (!mrr_have_range)
3638
  {
56 by brian
Next pass of true/false update.
3639
    mrr_have_range= true;
1 by brian
clean slate
3640
    goto start;
3641
  }
3642
3643
  do
3644
  {
3645
    /* Save a call if there can be only one row in range. */
3646
    if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
3647
    {
3648
      result= read_range_next();
3649
      /* On success or non-EOF errors jump to the end. */
3650
      if (result != HA_ERR_END_OF_FILE)
3651
        break;
3652
    }
3653
    else
3654
    {
3655
      if (was_semi_consistent_read())
3656
        goto scan_it_again;
3657
      /*
3658
        We need to set this for the last range only, but checking this
3659
        condition is more expensive than just setting the result code.
3660
      */
3661
      result= HA_ERR_END_OF_FILE;
3662
    }
3663
3664
start:
3665
    /* Try the next range(s) until one matches a record. */
3666
    while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
3667
    {
3668
scan_it_again:
3669
      result= read_range_first(mrr_cur_range.start_key.keypart_map ?
3670
                                 &mrr_cur_range.start_key : 0,
3671
                               mrr_cur_range.end_key.keypart_map ?
3672
                                 &mrr_cur_range.end_key : 0,
3673
                               test(mrr_cur_range.range_flag & EQ_RANGE),
3674
                               mrr_is_output_sorted);
3675
      if (result != HA_ERR_END_OF_FILE)
3676
        break;
3677
    }
3678
  }
3679
  while ((result == HA_ERR_END_OF_FILE) && !range_res);
3680
3681
  *range_info= mrr_cur_range.ptr;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3682
  return(result);
1 by brian
clean slate
3683
}
3684
3685
3686
/* **************************************************************************
3687
 * DS-MRR implementation 
3688
 ***************************************************************************/
3689
3690
/**
3691
  DS-MRR: Initialize and start MRR scan
3692
3693
  Initialize and start the MRR scan. Depending on the mode parameter, this
3694
  may use default or DS-MRR implementation.
3695
3696
  @param h               Table handler to be used
3697
  @param key             Index to be used
3698
  @param seq_funcs       Interval sequence enumeration functions
3699
  @param seq_init_param  Interval sequence enumeration parameter
3700
  @param n_ranges        Number of ranges in the sequence.
3701
  @param mode            HA_MRR_* modes to use
3702
  @param buf             INOUT Buffer to use
3703
3704
  @retval 0     Ok, Scan started.
3705
  @retval other Error
3706
*/
3707
3708
int DsMrr_impl::dsmrr_init(handler *h, KEY *key,
3709
                           RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
3710
                           uint n_ranges, uint mode, HANDLER_BUFFER *buf)
3711
{
3712
  uint elem_size;
3713
  uint keyno;
3714
  Item *pushed_cond= NULL;
3715
  handler *new_h2;
3716
  keyno= h->active_index;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3717
  assert(h2 == NULL);
1 by brian
clean slate
3718
  if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED)
3719
  {
56 by brian
Next pass of true/false update.
3720
    use_default_impl= true;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3721
    return(h->handler::multi_range_read_init(seq_funcs, seq_init_param,
1 by brian
clean slate
3722
                                                  n_ranges, mode, buf));
3723
  }
3724
  rowids_buf= buf->buffer;
3725
  //psergey-todo: don't add key_length as it is not needed anymore
3726
  rowids_buf += key->key_length + h->ref_length;
3727
3728
  is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
3729
  rowids_buf_end= buf->buffer_end;
3730
  
3731
  elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
3732
  rowids_buf_last= rowids_buf + 
3733
                      ((rowids_buf_end - rowids_buf)/ elem_size)*
3734
                      elem_size;
3735
  rowids_buf_end= rowids_buf_last;
3736
3737
  /* Create a separate handler object to do rndpos() calls. */
3738
  THD *thd= current_thd;
3739
  if (!(new_h2= h->clone(thd->mem_root)) || 
3740
      new_h2->ha_external_lock(thd, F_RDLCK))
3741
  {
3742
    delete new_h2;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3743
    return(1);
1 by brian
clean slate
3744
  }
3745
3746
  if (keyno == h->pushed_idx_cond_keyno)
3747
    pushed_cond= h->pushed_idx_cond;
3748
  if (h->ha_index_end())
3749
  {
3750
    new_h2= h2;
3751
    goto error;
3752
  }
3753
3754
  h2= new_h2;
3755
  table->prepare_for_position();
3756
  new_h2->extra(HA_EXTRA_KEYREAD);
3757
56 by brian
Next pass of true/false update.
3758
  if (h2->ha_index_init(keyno, false) || 
1 by brian
clean slate
3759
      h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
3760
                                         mode, buf))
3761
    goto error;
56 by brian
Next pass of true/false update.
3762
  use_default_impl= false;
1 by brian
clean slate
3763
  
3764
  if (pushed_cond)
3765
    h2->idx_cond_push(keyno, pushed_cond);
3766
  if (dsmrr_fill_buffer(new_h2))
3767
    goto error;
3768
3769
  /*
3770
    If the above call has scanned through all intervals in *seq, then
3771
    adjust *buf to indicate that the remaining buffer space will not be used.
3772
  */
3773
  if (dsmrr_eof) 
3774
    buf->end_of_used_area= rowids_buf_last;
3775
56 by brian
Next pass of true/false update.
3776
  if (h->ha_rnd_init(false))
1 by brian
clean slate
3777
    goto error;
3778
  
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3779
  return(0);
1 by brian
clean slate
3780
error:
3781
  h2->ha_index_or_rnd_end();
3782
  h2->ha_external_lock(thd, F_UNLCK);
3783
  h2->close();
3784
  delete h2;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3785
  return(1);
1 by brian
clean slate
3786
}
3787
3788
3789
void DsMrr_impl::dsmrr_close()
3790
{
3791
  if (h2)
3792
  {
3793
    h2->ha_external_lock(current_thd, F_UNLCK);
3794
    h2->close();
3795
    delete h2;
3796
    h2= NULL;
3797
  }
56 by brian
Next pass of true/false update.
3798
  use_default_impl= true;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3799
  return;
1 by brian
clean slate
3800
}
3801
3802
3803
static int rowid_cmp(void *h, uchar *a, uchar *b)
3804
{
3805
  return ((handler*)h)->cmp_ref(a, b);
3806
}
3807
3808
3809
/**
3810
  DS-MRR: Fill the buffer with rowids and sort it by rowid
3811
3812
  {This is an internal function of DiskSweep MRR implementation}
3813
  Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into 
3814
  buffer. When the buffer is full or scan is completed, sort the buffer by 
3815
  rowid and return.
3816
  
3817
  The function assumes that rowids buffer is empty when it is invoked. 
3818
  
3819
  @param h  Table handler
3820
3821
  @retval 0      OK, the next portion of rowids is in the buffer,
3822
                 properly ordered
3823
  @retval other  Error
3824
*/
3825
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3826
int DsMrr_impl::dsmrr_fill_buffer(handler *unused __attribute__((__unused__)))
1 by brian
clean slate
3827
{
3828
  char *range_info;
3829
  int res;
3830
3831
  rowids_buf_cur= rowids_buf;
3832
  while ((rowids_buf_cur < rowids_buf_end) && 
3833
         !(res= h2->handler::multi_range_read_next(&range_info)))
3834
  {
3835
    /* Put rowid, or {rowid, range_id} pair into the buffer */
3836
    h2->position(table->record[0]);
3837
    memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
3838
    rowids_buf_cur += h->ref_length;
3839
3840
    if (is_mrr_assoc)
3841
    {
3842
      memcpy(rowids_buf_cur, &range_info, sizeof(void*));
3843
      rowids_buf_cur += sizeof(void*);
3844
    }
3845
  }
3846
3847
  if (res && res != HA_ERR_END_OF_FILE)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3848
    return(res); 
1 by brian
clean slate
3849
  dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
3850
3851
  /* Sort the buffer contents by rowid */
3852
  uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
3853
  uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
3854
  
3855
  my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
3856
            (void*)h);
3857
  rowids_buf_last= rowids_buf_cur;
3858
  rowids_buf_cur=  rowids_buf;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3859
  return(0);
1 by brian
clean slate
3860
}
3861
3862
3863
/**
3864
  DS-MRR implementation: multi_range_read_next() function
3865
*/
3866
3867
int DsMrr_impl::dsmrr_next(handler *h, char **range_info)
3868
{
3869
  int res;
3870
  
3871
  if (use_default_impl)
3872
    return h->handler::multi_range_read_next(range_info);
3873
    
3874
  if (rowids_buf_cur == rowids_buf_last)
3875
  {
3876
    if (dsmrr_eof)
3877
    {
3878
      res= HA_ERR_END_OF_FILE;
3879
      goto end;
3880
    }
3881
    res= dsmrr_fill_buffer(h);
3882
    if (res)
3883
      goto end;
3884
  }
3885
  
3886
  /* Return EOF if there are no rowids in the buffer after re-fill attempt */
3887
  if (rowids_buf_cur == rowids_buf_last)
3888
  {
3889
    res= HA_ERR_END_OF_FILE;
3890
    goto end;
3891
  }
3892
3893
  res= h->rnd_pos(table->record[0], rowids_buf_cur);
3894
  rowids_buf_cur += h->ref_length;
3895
  if (is_mrr_assoc)
3896
  {
3897
    memcpy(range_info, rowids_buf_cur, sizeof(void*));
3898
    rowids_buf_cur += sizeof(void*);
3899
  }
3900
3901
end:
3902
  if (res)
3903
    dsmrr_close();
3904
  return res;
3905
}
3906
3907
3908
/**
3909
  DS-MRR implementation: multi_range_read_info() function
3910
*/
3911
int DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, uint *bufsz,
3912
                           uint *flags, COST_VECT *cost)
3913
{  
3914
  int res;
3915
  uint def_flags= *flags;
3916
  uint def_bufsz= *bufsz;
3917
3918
  /* Get cost/flags/mem_usage of default MRR implementation */
3919
  res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
3920
                                         &def_flags, cost);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3921
  assert(!res);
1 by brian
clean slate
3922
3923
  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || 
3924
      choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost))
3925
  {
3926
    /* Default implementation is choosen */
3927
    *flags= def_flags;
3928
    *bufsz= def_bufsz;
3929
  }
3930
  return 0;
3931
}
3932
3933
3934
/**
3935
  DS-MRR Implementation: multi_range_read_info_const() function
3936
*/
3937
3938
ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
3939
                                 void *seq_init_param, uint n_ranges, 
3940
                                 uint *bufsz, uint *flags, COST_VECT *cost)
3941
{
3942
  ha_rows rows;
3943
  uint def_flags= *flags;
3944
  uint def_bufsz= *bufsz;
3945
  /* Get cost/flags/mem_usage of default MRR implementation */
3946
  rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
3947
                                                n_ranges, &def_bufsz, 
3948
                                                &def_flags, cost);
3949
  if (rows == HA_POS_ERROR)
3950
  {
3951
    /* Default implementation can't perform MRR scan => we can't either */
3952
    return rows;
3953
  }
3954
3955
  /*
3956
    If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
3957
    use the default MRR implementation (we need it for UPDATE/DELETE).
3958
    Otherwise, make a choice based on cost and @@optimizer_use_mrr.
3959
  */
3960
  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
3961
      choose_mrr_impl(keyno, rows, flags, bufsz, cost))
3962
  {
3963
    *flags= def_flags;
3964
    *bufsz= def_bufsz;
3965
  }
3966
  else
3967
  {
3968
    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;
3969
  }
3970
  return rows;
3971
}
3972
3973
3974
/**
3975
  Check if key has partially-covered columns
3976
3977
  We can't use DS-MRR to perform range scans when the ranges are over
3978
  partially-covered keys, because we'll not have full key part values
3979
  (we'll have their prefixes from the index) and will not be able to check
3980
  if we've reached the end the range.
3981
3982
  @param keyno  Key to check
3983
3984
  @todo
3985
    Allow use of DS-MRR in cases where the index has partially-covered
3986
    components but they are not used for scanning.
3987
56 by brian
Next pass of true/false update.
3988
  @retval true   Yes
3989
  @retval false  No
1 by brian
clean slate
3990
*/
3991
3992
bool DsMrr_impl::key_uses_partial_cols(uint keyno)
3993
{
3994
  KEY_PART_INFO *kp= table->key_info[keyno].key_part;
3995
  KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts;
3996
  for (; kp != kp_end; kp++)
3997
  {
3998
    if (!kp->field->part_of_key.is_set(keyno))
56 by brian
Next pass of true/false update.
3999
      return true;
1 by brian
clean slate
4000
  }
56 by brian
Next pass of true/false update.
4001
  return false;
1 by brian
clean slate
4002
}
4003
4004
4005
/**
4006
  DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
4007
4008
  Make the choice between using Default MRR implementation and DS-MRR.
4009
  This function contains common functionality factored out of dsmrr_info()
4010
  and dsmrr_info_const(). The function assumes that the default MRR
4011
  implementation's applicability requirements are satisfied.
4012
4013
  @param keyno       Index number
4014
  @param rows        E(full rows to be retrieved)
4015
  @param flags  IN   MRR flags provided by the MRR user
4016
                OUT  If DS-MRR is choosen, flags of DS-MRR implementation
4017
                     else the value is not modified
4018
  @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
4019
                     else the value is not modified
4020
  @param cost   IN   Cost of default MRR implementation
4021
                OUT  If DS-MRR is choosen, cost of DS-MRR scan
4022
                     else the value is not modified
4023
56 by brian
Next pass of true/false update.
4024
  @retval true   Default MRR implementation should be used
4025
  @retval false  DS-MRR implementation should be used
1 by brian
clean slate
4026
*/
4027
4028
bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
4029
                                 uint *bufsz, COST_VECT *cost)
4030
{
4031
  COST_VECT dsmrr_cost;
4032
  bool res;
4033
  THD *thd= current_thd;
4034
  if ((thd->variables.optimizer_use_mrr == 2) || 
4035
      (*flags & HA_MRR_INDEX_ONLY) || (*flags & HA_MRR_SORTED) ||
4036
      (keyno == table->s->primary_key && 
4037
       h->primary_key_is_clustered()) || 
4038
       key_uses_partial_cols(keyno))
4039
  {
4040
    /* Use the default implementation */
4041
    *flags |= HA_MRR_USE_DEFAULT_IMPL;
56 by brian
Next pass of true/false update.
4042
    return true;
1 by brian
clean slate
4043
  }
4044
  
4045
  uint add_len= table->key_info[keyno].key_length + h->ref_length; 
4046
  *bufsz -= add_len;
4047
  if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
56 by brian
Next pass of true/false update.
4048
    return true;
1 by brian
clean slate
4049
  *bufsz += add_len;
4050
  
4051
  bool force_dsmrr;
4052
  /* 
4053
    If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of
4054
    DS-MRR and Default implementations cost. This allows one to force use of
4055
    DS-MRR whenever it is applicable without affecting other cost-based
4056
    choices.
4057
  */
4058
  if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) &&
4059
      dsmrr_cost.total_cost() > cost->total_cost())
4060
    dsmrr_cost= *cost;
4061
4062
  if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
4063
  {
4064
    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
4065
    *flags &= ~HA_MRR_SORTED;          /* We will return unordered output */
4066
    *cost= dsmrr_cost;
56 by brian
Next pass of true/false update.
4067
    res= false;
1 by brian
clean slate
4068
  }
4069
  else
4070
  {
4071
    /* Use the default MRR implementation */
56 by brian
Next pass of true/false update.
4072
    res= true;
1 by brian
clean slate
4073
  }
4074
  return res;
4075
}
4076
4077
4078
static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost);
4079
4080
4081
/**
4082
  Get cost of DS-MRR scan
4083
4084
  @param keynr              Index to be used
4085
  @param rows               E(Number of rows to be scanned)
4086
  @param flags              Scan parameters (HA_MRR_* flags)
4087
  @param buffer_size INOUT  Buffer size
4088
  @param cost        OUT    The cost
4089
56 by brian
Next pass of true/false update.
4090
  @retval false  OK
4091
  @retval true   Error, DS-MRR cannot be used (the buffer is too small
1 by brian
clean slate
4092
                 for even 1 rowid)
4093
*/
4094
4095
bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
4096
                                         uint *buffer_size, COST_VECT *cost)
4097
{
61 by Brian Aker
Conversion of handler type.
4098
  uint32_t max_buff_entries, elem_size;
1 by brian
clean slate
4099
  ha_rows rows_in_full_step, rows_in_last_step;
4100
  uint n_full_steps;
4101
  double index_read_cost;
4102
4103
  elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
4104
  max_buff_entries = *buffer_size / elem_size;
4105
4106
  if (!max_buff_entries)
56 by brian
Next pass of true/false update.
4107
    return true; /* Buffer has not enough space for even 1 rowid */
1 by brian
clean slate
4108
4109
  /* Number of iterations we'll make with full buffer */
4110
  n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
4111
  
4112
  /* 
4113
    Get numbers of rows we'll be processing in 
4114
     - non-last sweep, with full buffer 
4115
     - last iteration, with non-full buffer
4116
  */
4117
  rows_in_full_step= max_buff_entries;
4118
  rows_in_last_step= rows % max_buff_entries;
4119
  
4120
  /* Adjust buffer size if we expect to use only part of the buffer */
4121
  if (n_full_steps)
4122
  {
4123
    get_sort_and_sweep_cost(table, rows, cost);
4124
    cost->multiply(n_full_steps);
4125
  }
4126
  else
4127
  {
4128
    cost->zero();
4129
    *buffer_size= max(*buffer_size, 
4130
                      (size_t)(1.2*rows_in_last_step) * elem_size + 
4131
                      h->ref_length + table->key_info[keynr].key_length);
4132
  }
4133
  
4134
  COST_VECT last_step_cost;
4135
  get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
4136
  cost->add(&last_step_cost);
4137
 
4138
  if (n_full_steps != 0)
4139
    cost->mem_cost= *buffer_size;
4140
  else
4141
    cost->mem_cost= (double)rows_in_last_step * elem_size;
4142
  
4143
  /* Total cost of all index accesses */
4144
  index_read_cost= h->index_only_read_time(keynr, (double)rows);
4145
  cost->add_io(index_read_cost, 1 /* Random seeks */);
56 by brian
Next pass of true/false update.
4146
  return false;
1 by brian
clean slate
4147
}
4148
4149
4150
/* 
4151
  Get cost of one sort-and-sweep step
4152
4153
  SYNOPSIS
4154
    get_sort_and_sweep_cost()
4155
      table       Table being accessed
4156
      nrows       Number of rows to be sorted and retrieved
4157
      cost   OUT  The cost
4158
4159
  DESCRIPTION
4160
    Get cost of these operations:
4161
     - sort an array of #nrows ROWIDs using qsort
4162
     - read #nrows records from table in a sweep.
4163
*/
4164
4165
static 
4166
void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost)
4167
{
4168
  if (nrows)
4169
  {
56 by brian
Next pass of true/false update.
4170
    get_sweep_read_cost(table, nrows, false, cost);
1 by brian
clean slate
4171
    /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
4172
    double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
4173
    if (cmp_op < 3)
4174
      cmp_op= 3;
4175
    cost->cpu_cost += cmp_op * log2(cmp_op);
4176
  }
4177
  else
4178
    cost->zero();
4179
}
4180
4181
4182
/**
4183
  Get cost of reading nrows table records in a "disk sweep"
4184
4185
  A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
4186
  for an ordered sequence of rowids.
4187
4188
  We assume hard disk IO. The read is performed as follows:
4189
4190
   1. The disk head is moved to the needed cylinder
4191
   2. The controller waits for the plate to rotate
4192
   3. The data is transferred
4193
4194
  Time to do #3 is insignificant compared to #2+#1.
4195
4196
  Time to move the disk head is proportional to head travel distance.
4197
4198
  Time to wait for the plate to rotate depends on whether the disk head
4199
  was moved or not. 
4200
4201
  If disk head wasn't moved, the wait time is proportional to distance
4202
  between the previous block and the block we're reading.
4203
4204
  If the head was moved, we don't know how much we'll need to wait for the
4205
  plate to rotate. We assume the wait time to be a variate with a mean of
4206
  0.5 of full rotation time.
4207
4208
  Our cost units are "random disk seeks". The cost of random disk seek is
4209
  actually not a constant, it depends one range of cylinders we're going
4210
  to access. We make it constant by introducing a fuzzy concept of "typical 
4211
  datafile length" (it's fuzzy as it's hard to tell whether it should
4212
  include index file, temp.tables etc). Then random seek cost is:
4213
4214
    1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
4215
4216
  We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
4217
4218
  @param table             Table to be accessed
4219
  @param nrows             Number of rows to retrieve
56 by brian
Next pass of true/false update.
4220
  @param interrupted       true <=> Assume that the disk sweep will be
4221
                           interrupted by other disk IO. false - otherwise.
1 by brian
clean slate
4222
  @param cost         OUT  The cost.
4223
*/
4224
4225
void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
4226
                         COST_VECT *cost)
4227
{
4228
  cost->zero();
4229
  if (table->file->primary_key_is_clustered())
4230
  {
4231
    cost->io_count= table->file->read_time(table->s->primary_key,
4232
                                           (uint) nrows, nrows);
4233
  }
4234
  else
4235
  {
4236
    double n_blocks=
4237
      ceil(ulonglong2double(table->file->stats.data_file_length) / IO_SIZE);
4238
    double busy_blocks=
4239
      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
4240
    if (busy_blocks < 1.0)
4241
      busy_blocks= 1.0;
4242
4243
    cost->io_count= busy_blocks;
4244
4245
    if (!interrupted)
4246
    {
4247
      /* Assume reading is done in one 'sweep' */
4248
      cost->avg_io_cost= (DISK_SEEK_BASE_COST +
4249
                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
4250
    }
4251
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4252
  return;
1 by brian
clean slate
4253
}
4254
4255
4256
/* **************************************************************************
4257
 * DS-MRR implementation ends
4258
 ***************************************************************************/
4259
4260
/**
4261
  Read first row between two ranges.
4262
4263
  @param start_key		Start key. Is 0 if no min range
4264
  @param end_key		End key.  Is 0 if no max range
4265
  @param eq_range_arg	        Set to 1 if start_key == end_key
4266
  @param sorted		Set to 1 if result should be sorted per key
4267
4268
  @note
4269
    Record is read into table->record[0]
4270
4271
  @retval
4272
    0			Found row
4273
  @retval
4274
    HA_ERR_END_OF_FILE	No rows in range
4275
  @retval
4276
    \#			Error code
4277
*/
4278
int handler::read_range_first(const key_range *start_key,
4279
			      const key_range *end_key,
4280
			      bool eq_range_arg,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
4281
                              bool sorted  __attribute__((__unused__)))
1 by brian
clean slate
4282
{
4283
  int result;
4284
4285
  eq_range= eq_range_arg;
4286
  end_range= 0;
4287
  if (end_key)
4288
  {
4289
    end_range= &save_end_range;
4290
    save_end_range= *end_key;
4291
    key_compare_result_on_equal= ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
4292
				  (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
4293
  }
4294
  range_key_part= table->key_info[active_index].key_part;
4295
4296
  if (!start_key)			// Read first record
4297
    result= index_first(table->record[0]);
4298
  else
4299
    result= index_read_map(table->record[0],
4300
                           start_key->key,
4301
                           start_key->keypart_map,
4302
                           start_key->flag);
4303
  if (result)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4304
    return((result == HA_ERR_KEY_NOT_FOUND) 
1 by brian
clean slate
4305
		? HA_ERR_END_OF_FILE
4306
		: result);
4307
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4308
  return (compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
1 by brian
clean slate
4309
}
4310
4311
4312
/**
4313
  Read next row between two endpoints.
4314
4315
  @note
4316
    Record is read into table->record[0]
4317
4318
  @retval
4319
    0			Found row
4320
  @retval
4321
    HA_ERR_END_OF_FILE	No rows in range
4322
  @retval
4323
    \#			Error code
4324
*/
4325
int handler::read_range_next()
4326
{
4327
  int result;
4328
4329
  if (eq_range)
4330
  {
4331
    /* We trust that index_next_same always gives a row in range */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4332
    return(index_next_same(table->record[0],
1 by brian
clean slate
4333
                                end_range->key,
4334
                                end_range->length));
4335
  }
4336
  result= index_next(table->record[0]);
4337
  if (result)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4338
    return(result);
4339
  return(compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
1 by brian
clean slate
4340
}
4341
4342
4343
/**
4344
  Compare if found key (in row) is over max-value.
4345
4346
  @param range		range to compare to row. May be 0 for no range
4347
4348
  @seealso
4349
    key.cc::key_cmp()
4350
4351
  @return
4352
    The return value is SIGN(key_in_row - range_key):
4353
4354
    - 0   : Key is equal to range or 'range' == 0 (no range)
4355
    - -1  : Key is less than range
4356
    - 1   : Key is larger than range
4357
*/
4358
int handler::compare_key(key_range *range)
4359
{
4360
  int cmp;
4361
  if (!range || in_range_check_pushed_down)
4362
    return 0;					// No max range
4363
  cmp= key_cmp(range_key_part, range->key, range->length);
4364
  if (!cmp)
4365
    cmp= key_compare_result_on_equal;
4366
  return cmp;
4367
}
4368
4369
4370
/*
4371
  Same as compare_key() but doesn't check have in_range_check_pushed_down.
4372
  This is used by index condition pushdown implementation.
4373
*/
4374
4375
int handler::compare_key2(key_range *range)
4376
{
4377
  int cmp;
4378
  if (!range)
4379
    return 0;					// no max range
4380
  cmp= key_cmp(range_key_part, range->key, range->length);
4381
  if (!cmp)
4382
    cmp= key_compare_result_on_equal;
4383
  return cmp;
4384
}
4385
4386
int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
4387
                                key_part_map keypart_map,
4388
                                enum ha_rkey_function find_flag)
4389
{
4390
  int error, error1;
4391
  error= index_init(index, 0);
4392
  if (!error)
4393
  {
4394
    error= index_read_map(buf, key, keypart_map, find_flag);
4395
    error1= index_end();
4396
  }
4397
  return error ?  error : error1;
4398
}
4399
4400
4401
/**
4402
  Returns a list of all known extensions.
4403
4404
    No mutexes, worst case race is a minor surplus memory allocation
4405
    We have to recreate the extension map if mysqld is restarted (for example
4406
    within libmysqld)
4407
4408
  @retval
4409
    pointer		pointer to TYPELIB structure
4410
*/
77.1.15 by Monty Taylor
Bunch of warning cleanups.
4411
static my_bool exts_handlerton(THD *unused __attribute__((__unused__)),
4412
                               plugin_ref plugin,
1 by brian
clean slate
4413
                               void *arg)
4414
{
4415
  List<char> *found_exts= (List<char> *) arg;
4416
  handlerton *hton= plugin_data(plugin, handlerton *);
4417
  handler *file;
4418
  if (hton->state == SHOW_OPTION_YES && hton->create &&
4419
      (file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
4420
  {
4421
    List_iterator_fast<char> it(*found_exts);
4422
    const char **ext, *old_ext;
4423
4424
    for (ext= file->bas_ext(); *ext; ext++)
4425
    {
4426
      while ((old_ext= it++))
4427
      {
4428
        if (!strcmp(old_ext, *ext))
4429
	  break;
4430
      }
4431
      if (!old_ext)
4432
        found_exts->push_back((char *) *ext);
4433
4434
      it.rewind();
4435
    }
4436
    delete file;
4437
  }
56 by brian
Next pass of true/false update.
4438
  return false;
1 by brian
clean slate
4439
}
4440
4441
TYPELIB *ha_known_exts(void)
4442
{
4443
  if (!known_extensions.type_names || mysys_usage_id != known_extensions_id)
4444
  {
4445
    List<char> found_exts;
4446
    const char **ext, *old_ext;
4447
4448
    known_extensions_id= mysys_usage_id;
4449
4450
    plugin_foreach(NULL, exts_handlerton,
4451
                   MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
4452
4453
    ext= (const char **) my_once_alloc(sizeof(char *)*
4454
                                       (found_exts.elements+1),
4455
                                       MYF(MY_WME | MY_FAE));
4456
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4457
    assert(ext != 0);
1 by brian
clean slate
4458
    known_extensions.count= found_exts.elements;
4459
    known_extensions.type_names= ext;
4460
4461
    List_iterator_fast<char> it(found_exts);
4462
    while ((old_ext= it++))
4463
      *ext++= old_ext;
4464
    *ext= 0;
4465
  }
4466
  return &known_extensions;
4467
}
4468
4469
4470
static bool stat_print(THD *thd, const char *type, uint type_len,
4471
                       const char *file, uint file_len,
4472
                       const char *status, uint status_len)
4473
{
4474
  Protocol *protocol= thd->protocol;
4475
  protocol->prepare_for_resend();
4476
  protocol->store(type, type_len, system_charset_info);
4477
  protocol->store(file, file_len, system_charset_info);
4478
  protocol->store(status, status_len, system_charset_info);
4479
  if (protocol->write())
56 by brian
Next pass of true/false update.
4480
    return true;
4481
  return false;
1 by brian
clean slate
4482
}
4483
4484
bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
4485
{
4486
  List<Item> field_list;
4487
  Protocol *protocol= thd->protocol;
4488
  bool result;
4489
4490
  field_list.push_back(new Item_empty_string("Type",10));
4491
  field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
4492
  field_list.push_back(new Item_empty_string("Status",10));
4493
4494
  if (protocol->send_fields(&field_list,
4495
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
56 by brian
Next pass of true/false update.
4496
    return true;
1 by brian
clean slate
4497
12.1.1 by Brian Aker
Cleaned up show status.
4498
  result= db_type->show_status &&
4499
    db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
1 by brian
clean slate
4500
4501
  if (!result)
4502
    my_eof(thd);
4503
  return result;
4504
}
4505
4506
4507
/**
4508
  Check if the conditions for row-based binlogging is correct for the table.
4509
4510
  A row in the given table should be replicated if:
4511
  - Row-based replication is enabled in the current thread
4512
  - The binlog is enabled
4513
  - It is not a temporary table
4514
  - The binary log is open
4515
  - The database the table resides in shall be binlogged (binlog_*_db rules)
4516
  - table is not mysql.event
4517
*/
4518
4519
static bool check_table_binlog_row_based(THD *thd, TABLE *table)
4520
{
4521
  if (table->s->cached_row_logging_check == -1)
4522
  {
4523
    int const check(table->s->tmp_table == NO_TMP_TABLE &&
4524
                    binlog_filter->db_ok(table->s->db.str));
4525
    table->s->cached_row_logging_check= check;
4526
  }
4527
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4528
  assert(table->s->cached_row_logging_check == 0 ||
1 by brian
clean slate
4529
              table->s->cached_row_logging_check == 1);
4530
4531
  return (thd->current_stmt_binlog_row_based &&
4532
          table->s->cached_row_logging_check &&
4533
          (thd->options & OPTION_BIN_LOG) &&
4534
          mysql_bin_log.is_open());
4535
}
4536
4537
4538
/**
4539
   Write table maps for all (manually or automatically) locked tables
4540
   to the binary log.
4541
4542
   This function will generate and write table maps for all tables
4543
   that are locked by the thread 'thd'.  Either manually locked
4544
   (stored in THD::locked_tables) and automatically locked (stored
4545
   in THD::lock) are considered.
4546
4547
   @param thd     Pointer to THD structure
4548
4549
   @retval 0   All OK
4550
   @retval 1   Failed to write all table maps
4551
4552
   @sa
4553
       THD::lock
4554
       THD::locked_tables
4555
*/
4556
4557
static int write_locked_table_maps(THD *thd)
4558
{
4559
  if (thd->get_binlog_table_maps() == 0)
4560
  {
4561
    MYSQL_LOCK *locks[3];
4562
    locks[0]= thd->extra_lock;
4563
    locks[1]= thd->lock;
4564
    locks[2]= thd->locked_tables;
4565
    for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
4566
    {
4567
      MYSQL_LOCK const *const lock= locks[i];
4568
      if (lock == NULL)
4569
        continue;
4570
4571
      TABLE **const end_ptr= lock->table + lock->table_count;
4572
      for (TABLE **table_ptr= lock->table ; 
4573
           table_ptr != end_ptr ;
4574
           ++table_ptr)
4575
      {
4576
        TABLE *const table= *table_ptr;
4577
        if (table->current_lock == F_WRLCK &&
4578
            check_table_binlog_row_based(thd, table))
4579
        {
4580
          int const has_trans= table->file->has_transactions();
4581
          int const error= thd->binlog_write_table_map(table, has_trans);
4582
          /*
4583
            If an error occurs, it is the responsibility of the caller to
4584
            roll back the transaction.
4585
          */
4586
          if (unlikely(error))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4587
            return(1);
1 by brian
clean slate
4588
        }
4589
      }
4590
    }
4591
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4592
  return(0);
1 by brian
clean slate
4593
}
4594
4595
4596
typedef bool Log_func(THD*, TABLE*, bool, const uchar*, const uchar*);
4597
4598
static int binlog_log_row(TABLE* table,
4599
                          const uchar *before_record,
4600
                          const uchar *after_record,
4601
                          Log_func *log_func)
4602
{
4603
  if (table->no_replicate)
4604
    return 0;
4605
  bool error= 0;
4606
  THD *const thd= table->in_use;
4607
4608
  if (check_table_binlog_row_based(thd, table))
4609
  {
4610
    /*
4611
      If there are no table maps written to the binary log, this is
4612
      the first row handled in this statement. In that case, we need
4613
      to write table maps for all locked tables to the binary log.
4614
    */
4615
    if (likely(!(error= write_locked_table_maps(thd))))
4616
    {
4617
      bool const has_trans= table->file->has_transactions();
4618
      error= (*log_func)(thd, table, has_trans, before_record, after_record);
4619
    }
4620
  }
4621
  return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
4622
}
4623
4624
int handler::ha_external_lock(THD *thd, int lock_type)
4625
{
4626
  /*
4627
    Whether this is lock or unlock, this should be true, and is to verify that
4628
    if get_auto_increment() was called (thus may have reserved intervals or
4629
    taken a table lock), ha_release_auto_increment() was too.
4630
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4631
  assert(next_insert_id == 0);
1 by brian
clean slate
4632
4633
  /*
4634
    We cache the table flags if the locking succeeded. Otherwise, we
4635
    keep them as they were when they were fetched in ha_open().
4636
  */
4637
  MYSQL_EXTERNAL_LOCK(lock_type);
4638
4639
  int error= external_lock(thd, lock_type);
4640
  if (error == 0)
4641
    cached_table_flags= table_flags();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4642
  return(error);
1 by brian
clean slate
4643
}
4644
4645
4646
/**
4647
  Check handler usage and reset state of file to after 'open'
4648
*/
4649
int handler::ha_reset()
4650
{
4651
  /* Check that we have called all proper deallocation functions */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4652
  assert((uchar*) table->def_read_set.bitmap +
1 by brian
clean slate
4653
              table->s->column_bitmap_size ==
4654
              (uchar*) table->def_write_set.bitmap);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4655
  assert(bitmap_is_set_all(&table->s->all_set));
4656
  assert(table->key_read == 0);
1 by brian
clean slate
4657
  /* ensure that ha_index_end / ha_rnd_end has been called */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4658
  assert(inited == NONE);
1 by brian
clean slate
4659
  /* Free cache used by filesort */
4660
  free_io_cache(table);
4661
  /* reset the bitmaps to point to defaults */
4662
  table->default_column_bitmaps();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4663
  return(reset());
1 by brian
clean slate
4664
}
4665
4666
4667
int handler::ha_write_row(uchar *buf)
4668
{
4669
  int error;
4670
  Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
4671
  MYSQL_INSERT_ROW_START();
4672
4673
  mark_trx_read_write();
4674
4675
  if (unlikely(error= write_row(buf)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4676
    return(error);
1 by brian
clean slate
4677
  if (unlikely(error= binlog_log_row(table, 0, buf, log_func)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4678
    return(error); /* purecov: inspected */
1 by brian
clean slate
4679
  MYSQL_INSERT_ROW_END();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4680
  return(0);
1 by brian
clean slate
4681
}
4682
4683
4684
int handler::ha_update_row(const uchar *old_data, uchar *new_data)
4685
{
4686
  int error;
4687
  Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
4688
4689
  /*
4690
    Some storage engines require that the new record is in record[0]
4691
    (and the old record is in record[1]).
4692
   */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4693
  assert(new_data == table->record[0]);
1 by brian
clean slate
4694
4695
  mark_trx_read_write();
4696
4697
  if (unlikely(error= update_row(old_data, new_data)))
4698
    return error;
4699
  if (unlikely(error= binlog_log_row(table, old_data, new_data, log_func)))
4700
    return error;
4701
  return 0;
4702
}
4703
4704
int handler::ha_delete_row(const uchar *buf)
4705
{
4706
  int error;
4707
  Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
4708
4709
  mark_trx_read_write();
4710
4711
  if (unlikely(error= delete_row(buf)))
4712
    return error;
4713
  if (unlikely(error= binlog_log_row(table, buf, 0, log_func)))
4714
    return error;
4715
  return 0;
4716
}
4717
4718
4719
4720
/**
4721
  @details
4722
  use_hidden_primary_key() is called in case of an update/delete when
4723
  (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
4724
  but we don't have a primary key
4725
*/
4726
void handler::use_hidden_primary_key()
4727
{
4728
  /* fallback to use all columns in the table to identify row */
4729
  table->use_all_columns();
4730
}