~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000-2006 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/**
17
  @file handler.cc
18
19
  Handler-calling-functions
20
*/
21
22
#ifdef USE_PRAGMA_IMPLEMENTATION
23
#pragma implementation				// gcc: Class implementation
24
#endif
25
26
#include "mysql_priv.h"
27
#include "rpl_filter.h"
28
#include <myisampack.h>
29
#include <errno.h>
30
31
/*
32
  While we have legacy_db_type, we have this array to
33
  check for dups and to find handlerton from legacy_db_type.
34
  Remove when legacy_db_type is finally gone
35
*/
36
st_plugin_int *hton2plugin[MAX_HA];
37
38
static handlerton *installed_htons[128];
39
40
#define BITMAP_STACKBUF_SIZE (128/8)
41
42
KEY_CREATE_INFO default_key_create_info= { HA_KEY_ALG_UNDEF, 0, {NullS,0}, {NullS,0} };
43
44
/* number of entries in handlertons[] */
61 by Brian Aker
Conversion of handler type.
45
uint32_t total_ha= 0;
1 by brian
clean slate
46
/* number of storage engines (from handlertons[]) that support 2pc */
61 by Brian Aker
Conversion of handler type.
47
uint32_t total_ha_2pc= 0;
1 by brian
clean slate
48
/* size of savepoint storage area (see ha_init) */
61 by Brian Aker
Conversion of handler type.
49
uint32_t savepoint_alloc_size= 0;
1 by brian
clean slate
50
51
static const LEX_STRING sys_table_aliases[]=
52
{
53
  { C_STRING_WITH_LEN("INNOBASE") },  { C_STRING_WITH_LEN("INNODB") },
54
  { C_STRING_WITH_LEN("HEAP") },      { C_STRING_WITH_LEN("MEMORY") },
55
  {NullS, 0}
56
};
57
58
const char *ha_row_type[] = {
59
  "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE", "?","?","?"
60
};
61
62
const char *tx_isolation_names[] =
63
{ "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
64
  NullS};
65
TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
66
			       tx_isolation_names, NULL};
67
68
static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
69
uint known_extensions_id= 0;
70
71
72
73
static plugin_ref ha_default_plugin(THD *thd)
74
{
75
  if (thd->variables.table_plugin)
76
    return thd->variables.table_plugin;
77
  return my_plugin_lock(thd, &global_system_variables.table_plugin);
78
}
79
80
81
/**
82
  Return the default storage engine handlerton for thread
83
84
  @param ha_default_handlerton(thd)
85
  @param thd         current thread
86
87
  @return
88
    pointer to handlerton
89
*/
90
handlerton *ha_default_handlerton(THD *thd)
91
{
92
  plugin_ref plugin= ha_default_plugin(thd);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
93
  assert(plugin);
1 by brian
clean slate
94
  handlerton *hton= plugin_data(plugin, handlerton*);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
95
  assert(hton);
1 by brian
clean slate
96
  return hton;
97
}
98
99
100
/**
101
  Return the storage engine handlerton for the supplied name
102
  
103
  @param thd         current thread
104
  @param name        name of storage engine
105
  
106
  @return
107
    pointer to storage engine plugin handle
108
*/
109
plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name)
110
{
111
  const LEX_STRING *table_alias;
112
  plugin_ref plugin;
113
114
redo:
115
  /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
116
  if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
117
                           (const uchar *)name->str, name->length,
118
                           (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
119
    return ha_default_plugin(thd);
120
121
  if ((plugin= my_plugin_lock_by_name(thd, name, MYSQL_STORAGE_ENGINE_PLUGIN)))
122
  {
123
    handlerton *hton= plugin_data(plugin, handlerton *);
124
    if (!(hton->flags & HTON_NOT_USER_SELECTABLE))
125
      return plugin;
126
      
127
    /*
128
      unlocking plugin immediately after locking is relatively low cost.
129
    */
130
    plugin_unlock(thd, plugin);
131
  }
132
133
  /*
134
    We check for the historical aliases.
135
  */
136
  for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
137
  {
138
    if (!my_strnncoll(&my_charset_latin1,
139
                      (const uchar *)name->str, name->length,
140
                      (const uchar *)table_alias->str, table_alias->length))
141
    {
142
      name= table_alias + 1;
143
      goto redo;
144
    }
145
  }
146
147
  return NULL;
148
}
149
150
151
plugin_ref ha_lock_engine(THD *thd, handlerton *hton)
152
{
153
  if (hton)
154
  {
155
    st_plugin_int **plugin= hton2plugin + hton->slot;
156
    
157
    return my_plugin_lock(thd, &plugin);
158
  }
159
  return NULL;
160
}
161
162
163
handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
164
{
165
  plugin_ref plugin;
166
  switch (db_type) {
167
  case DB_TYPE_DEFAULT:
168
    return ha_default_handlerton(thd);
169
  default:
170
    if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
171
        (plugin= ha_lock_engine(thd, installed_htons[db_type])))
172
      return plugin_data(plugin, handlerton*);
173
    /* fall through */
174
  case DB_TYPE_UNKNOWN:
175
    return NULL;
176
  }
177
}
178
179
180
/**
181
  Use other database handler if databasehandler is not compiled in.
182
*/
183
handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
184
                          bool no_substitute, bool report_error)
185
{
186
  handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
187
  if (ha_storage_engine_is_enabled(hton))
188
    return hton;
189
190
  if (no_substitute)
191
  {
192
    if (report_error)
193
    {
194
      const char *engine_name= ha_resolve_storage_engine_name(hton);
195
      my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
196
    }
197
    return NULL;
198
  }
199
200
  switch (database_type) {
201
  case DB_TYPE_HASH:
202
    return ha_resolve_by_legacy_type(thd, DB_TYPE_HASH);
203
  default:
204
    break;
205
  }
206
207
  return ha_default_handlerton(thd);
208
} /* ha_checktype */
209
210
211
handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
212
                         handlerton *db_type)
213
{
214
  handler *file;
215
216
  if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
217
  {
218
    if ((file= db_type->create(db_type, share, alloc)))
219
      file->init();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
220
    return(file);
1 by brian
clean slate
221
  }
222
  /*
223
    Try the default table type
224
    Here the call to current_thd() is ok as we call this function a lot of
225
    times but we enter this branch very seldom.
226
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
227
  return(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
1 by brian
clean slate
228
}
229
230
231
/**
232
  Register handler error messages for use with my_error().
233
234
  @retval
235
    0           OK
236
  @retval
237
    !=0         Error
238
*/
239
240
int ha_init_errors(void)
241
{
242
#define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg)
243
  const char    **errmsgs;
244
245
  /* Allocate a pointer array for the error message strings. */
246
  /* Zerofill it to avoid uninitialized gaps. */
247
  if (! (errmsgs= (const char**) my_malloc(HA_ERR_ERRORS * sizeof(char*),
248
                                           MYF(MY_WME | MY_ZEROFILL))))
249
    return 1;
250
251
  /* Set the dedicated error messages. */
252
  SETMSG(HA_ERR_KEY_NOT_FOUND,          ER(ER_KEY_NOT_FOUND));
253
  SETMSG(HA_ERR_FOUND_DUPP_KEY,         ER(ER_DUP_KEY));
254
  SETMSG(HA_ERR_RECORD_CHANGED,         "Update wich is recoverable");
255
  SETMSG(HA_ERR_WRONG_INDEX,            "Wrong index given to function");
256
  SETMSG(HA_ERR_CRASHED,                ER(ER_NOT_KEYFILE));
257
  SETMSG(HA_ERR_WRONG_IN_RECORD,        ER(ER_CRASHED_ON_USAGE));
258
  SETMSG(HA_ERR_OUT_OF_MEM,             "Table handler out of memory");
259
  SETMSG(HA_ERR_NOT_A_TABLE,            "Incorrect file format '%.64s'");
260
  SETMSG(HA_ERR_WRONG_COMMAND,          "Command not supported");
261
  SETMSG(HA_ERR_OLD_FILE,               ER(ER_OLD_KEYFILE));
262
  SETMSG(HA_ERR_NO_ACTIVE_RECORD,       "No record read in update");
263
  SETMSG(HA_ERR_RECORD_DELETED,         "Intern record deleted");
264
  SETMSG(HA_ERR_RECORD_FILE_FULL,       ER(ER_RECORD_FILE_FULL));
265
  SETMSG(HA_ERR_INDEX_FILE_FULL,        "No more room in index file '%.64s'");
266
  SETMSG(HA_ERR_END_OF_FILE,            "End in next/prev/first/last");
267
  SETMSG(HA_ERR_UNSUPPORTED,            ER(ER_ILLEGAL_HA));
268
  SETMSG(HA_ERR_TO_BIG_ROW,             "Too big row");
269
  SETMSG(HA_WRONG_CREATE_OPTION,        "Wrong create option");
270
  SETMSG(HA_ERR_FOUND_DUPP_UNIQUE,      ER(ER_DUP_UNIQUE));
271
  SETMSG(HA_ERR_UNKNOWN_CHARSET,        "Can't open charset");
272
  SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF,    ER(ER_WRONG_MRG_TABLE));
273
  SETMSG(HA_ERR_CRASHED_ON_REPAIR,      ER(ER_CRASHED_ON_REPAIR));
274
  SETMSG(HA_ERR_CRASHED_ON_USAGE,       ER(ER_CRASHED_ON_USAGE));
275
  SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT,      ER(ER_LOCK_WAIT_TIMEOUT));
276
  SETMSG(HA_ERR_LOCK_TABLE_FULL,        ER(ER_LOCK_TABLE_FULL));
277
  SETMSG(HA_ERR_READ_ONLY_TRANSACTION,  ER(ER_READ_ONLY_TRANSACTION));
278
  SETMSG(HA_ERR_LOCK_DEADLOCK,          ER(ER_LOCK_DEADLOCK));
279
  SETMSG(HA_ERR_CANNOT_ADD_FOREIGN,     ER(ER_CANNOT_ADD_FOREIGN));
280
  SETMSG(HA_ERR_NO_REFERENCED_ROW,      ER(ER_NO_REFERENCED_ROW_2));
281
  SETMSG(HA_ERR_ROW_IS_REFERENCED,      ER(ER_ROW_IS_REFERENCED_2));
282
  SETMSG(HA_ERR_NO_SAVEPOINT,           "No savepoint with that name");
283
  SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE,  "Non unique key block size");
284
  SETMSG(HA_ERR_NO_SUCH_TABLE,          "No such table: '%.64s'");
285
  SETMSG(HA_ERR_TABLE_EXIST,            ER(ER_TABLE_EXISTS_ERROR));
286
  SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
287
  SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER(ER_TABLE_DEF_CHANGED));
288
  SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,  "FK constraint would lead to duplicate key");
289
  SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    ER(ER_TABLE_NEEDS_UPGRADE));
290
  SETMSG(HA_ERR_TABLE_READONLY,         ER(ER_OPEN_AS_READONLY));
291
  SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER(ER_AUTOINC_READ_FAILED));
292
  SETMSG(HA_ERR_AUTOINC_ERANGE,         ER(ER_WARN_DATA_OUT_OF_RANGE));
293
294
  /* Register the error messages for use with my_error(). */
295
  return my_error_register(errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
296
}
297
298
299
/**
300
  Unregister handler error messages.
301
302
  @retval
303
    0           OK
304
  @retval
305
    !=0         Error
306
*/
307
static int ha_finish_errors(void)
308
{
309
  const char    **errmsgs;
310
311
  /* Allocate a pointer array for the error message strings. */
312
  if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST)))
313
    return 1;
314
  my_free((uchar*) errmsgs, MYF(0));
315
  return 0;
316
}
317
318
319
int ha_finalize_handlerton(st_plugin_int *plugin)
320
{
321
  handlerton *hton= (handlerton *)plugin->data;
322
323
  switch (hton->state)
324
  {
325
  case SHOW_OPTION_NO:
326
  case SHOW_OPTION_DISABLED:
327
    break;
328
  case SHOW_OPTION_YES:
329
    if (installed_htons[hton->db_type] == hton)
330
      installed_htons[hton->db_type]= NULL;
331
    break;
332
  };
333
334
  if (hton->panic)
335
    hton->panic(hton, HA_PANIC_CLOSE);
336
337
  my_free((uchar*)hton, MYF(0));
338
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
339
  return(0);
1 by brian
clean slate
340
}
341
342
343
int ha_initialize_handlerton(st_plugin_int *plugin)
344
{
345
  handlerton *hton;
346
347
  hton= (handlerton *)my_malloc(sizeof(handlerton),
348
                                MYF(MY_WME | MY_ZEROFILL));
349
  /* 
350
    FIXME: the MY_ZEROFILL flag above doesn't zero all the bytes.
351
    
352
    This was detected after adding get_backup_engine member to handlerton
353
    structure. Apparently get_backup_engine was not NULL even though it was
354
    not initialized.
355
   */
356
  bzero(hton, sizeof(hton));
357
  /* Historical Requirement */
358
  plugin->data= hton; // shortcut for the future
359
  if (plugin->plugin->init)
360
  {
361
    if (plugin->plugin->init(hton))
362
    {
363
      sql_print_error("Plugin '%s' init function returned error.",
364
                      plugin->name.str);
365
      goto err;
366
    }
367
  }
368
369
  /*
370
    the switch below and hton->state should be removed when
371
    command-line options for plugins will be implemented
372
  */
373
  switch (hton->state) {
374
  case SHOW_OPTION_NO:
375
    break;
376
  case SHOW_OPTION_YES:
377
    {
378
      uint tmp;
379
      /* now check the db_type for conflict */
380
      if (hton->db_type <= DB_TYPE_UNKNOWN ||
381
          hton->db_type >= DB_TYPE_DEFAULT ||
382
          installed_htons[hton->db_type])
383
      {
384
        int idx= (int) DB_TYPE_FIRST_DYNAMIC;
385
386
        while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
387
          idx++;
388
389
        if (idx == (int) DB_TYPE_DEFAULT)
390
        {
391
          sql_print_warning("Too many storage engines!");
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
392
          return(1);
1 by brian
clean slate
393
        }
394
        if (hton->db_type != DB_TYPE_UNKNOWN)
395
          sql_print_warning("Storage engine '%s' has conflicting typecode. "
396
                            "Assigning value %d.", plugin->plugin->name, idx);
397
        hton->db_type= (enum legacy_db_type) idx;
398
      }
399
      installed_htons[hton->db_type]= hton;
400
      tmp= hton->savepoint_offset;
401
      hton->savepoint_offset= savepoint_alloc_size;
402
      savepoint_alloc_size+= tmp;
403
      hton->slot= total_ha++;
404
      hton2plugin[hton->slot]=plugin;
405
      if (hton->prepare)
406
        total_ha_2pc++;
407
      break;
408
    }
409
    /* fall through */
410
  default:
411
    hton->state= SHOW_OPTION_DISABLED;
412
    break;
413
  }
414
  
415
  /* 
416
    This is entirely for legacy. We will create a new "disk based" hton and a 
417
    "memory" hton which will be configurable longterm. We should be able to 
418
    remove partition and myisammrg.
419
  */
420
  switch (hton->db_type) {
421
  case DB_TYPE_HEAP:
422
    heap_hton= hton;
423
    break;
424
  case DB_TYPE_MYISAM:
425
    myisam_hton= hton;
426
    break;
427
  default:
428
    break;
429
  };
430
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
431
  return(0);
1 by brian
clean slate
432
err:
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
433
  return(1);
1 by brian
clean slate
434
}
435
436
int ha_init()
437
{
438
  int error= 0;
439
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
440
  assert(total_ha < MAX_HA);
1 by brian
clean slate
441
  /*
442
    Check if there is a transaction-capable storage engine besides the
443
    binary log (which is considered a transaction-capable storage engine in
444
    counting total_ha)
445
  */
61 by Brian Aker
Conversion of handler type.
446
  opt_using_transactions= total_ha>(uint32_t)opt_bin_log;
1 by brian
clean slate
447
  savepoint_alloc_size+= sizeof(SAVEPOINT);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
448
  return(error);
1 by brian
clean slate
449
}
450
451
int ha_end()
452
{
453
  int error= 0;
454
455
  /* 
456
    This should be eventualy based  on the graceful shutdown flag.
457
    So if flag is equal to HA_PANIC_CLOSE, the deallocate
458
    the errors.
459
  */
460
  if (ha_finish_errors())
461
    error= 1;
462
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
463
  return(error);
1 by brian
clean slate
464
}
465
149 by Brian Aker
More bool conversion.
466
static bool dropdb_handlerton(THD *unused1 __attribute__((__unused__)),
467
                              plugin_ref plugin,
468
                              void *path)
1 by brian
clean slate
469
{
470
  handlerton *hton= plugin_data(plugin, handlerton *);
471
  if (hton->state == SHOW_OPTION_YES && hton->drop_database)
472
    hton->drop_database(hton, (char *)path);
56 by brian
Next pass of true/false update.
473
  return false;
1 by brian
clean slate
474
}
475
476
477
void ha_drop_database(char* path)
478
{
479
  plugin_foreach(NULL, dropdb_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, path);
480
}
481
482
149 by Brian Aker
More bool conversion.
483
static bool closecon_handlerton(THD *thd, plugin_ref plugin,
484
                                void *unused __attribute__((__unused__)))
1 by brian
clean slate
485
{
486
  handlerton *hton= plugin_data(plugin, handlerton *);
487
  /*
488
    there's no need to rollback here as all transactions must
489
    be rolled back already
490
  */
491
  if (hton->state == SHOW_OPTION_YES && hton->close_connection &&
492
      thd_get_ha_data(thd, hton))
493
    hton->close_connection(hton, thd);
56 by brian
Next pass of true/false update.
494
  return false;
1 by brian
clean slate
495
}
496
497
498
/**
499
  @note
500
    don't bother to rollback here, it's done already
501
*/
502
void ha_close_connection(THD* thd)
503
{
504
  plugin_foreach(thd, closecon_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, 0);
505
}
506
507
/* ========================================================================
508
 ======================= TRANSACTIONS ===================================*/
509
510
/**
511
  Transaction handling in the server
512
  ==================================
513
514
  In each client connection, MySQL maintains two transactional
515
  states:
516
  - a statement transaction,
517
  - a standard, also called normal transaction.
518
519
  Historical note
520
  ---------------
521
  "Statement transaction" is a non-standard term that comes
522
  from the times when MySQL supported BerkeleyDB storage engine.
523
524
  First of all, it should be said that in BerkeleyDB auto-commit
525
  mode auto-commits operations that are atomic to the storage
526
  engine itself, such as a write of a record, and are too
527
  high-granular to be atomic from the application perspective
528
  (MySQL). One SQL statement could involve many BerkeleyDB
529
  auto-committed operations and thus BerkeleyDB auto-commit was of
530
  little use to MySQL.
531
532
  Secondly, instead of SQL standard savepoints, BerkeleyDB
533
  provided the concept of "nested transactions". In a nutshell,
534
  transactions could be arbitrarily nested, but when the parent
535
  transaction was committed or aborted, all its child (nested)
536
  transactions were handled committed or aborted as well.
537
  Commit of a nested transaction, in turn, made its changes
538
  visible, but not durable: it destroyed the nested transaction,
539
  all its changes would become available to the parent and
540
  currently active nested transactions of this parent.
541
542
  So the mechanism of nested transactions was employed to
543
  provide "all or nothing" guarantee of SQL statements
544
  required by the standard.
545
  A nested transaction would be created at start of each SQL
546
  statement, and destroyed (committed or aborted) at statement
547
  end. Such nested transaction was internally referred to as
548
  a "statement transaction" and gave birth to the term.
549
550
  <Historical note ends>
551
552
  Since then a statement transaction is started for each statement
553
  that accesses transactional tables or uses the binary log.  If
554
  the statement succeeds, the statement transaction is committed.
555
  If the statement fails, the transaction is rolled back. Commits
556
  of statement transactions are not durable -- each such
557
  transaction is nested in the normal transaction, and if the
558
  normal transaction is rolled back, the effects of all enclosed
559
  statement transactions are undone as well.  Technically,
560
  a statement transaction can be viewed as a savepoint which is
561
  maintained automatically in order to make effects of one
562
  statement atomic.
563
564
  The normal transaction is started by the user and is ended
565
  usually upon a user request as well. The normal transaction
566
  encloses transactions of all statements issued between
567
  its beginning and its end.
568
  In autocommit mode, the normal transaction is equivalent
569
  to the statement transaction.
570
571
  Since MySQL supports PSEA (pluggable storage engine
572
  architecture), more than one transactional engine can be
573
  active at a time. Hence transactions, from the server
574
  point of view, are always distributed. In particular,
575
  transactional state is maintained independently for each
576
  engine. In order to commit a transaction the two phase
577
  commit protocol is employed.
578
579
  Not all statements are executed in context of a transaction.
580
  Administrative and status information statements do not modify
581
  engine data, and thus do not start a statement transaction and
582
  also have no effect on the normal transaction. Examples of such
583
  statements are SHOW STATUS and RESET SLAVE.
584
585
  Similarly DDL statements are not transactional,
586
  and therefore a transaction is [almost] never started for a DDL
587
  statement. The difference between a DDL statement and a purely
588
  administrative statement though is that a DDL statement always
589
  commits the current transaction before proceeding, if there is
590
  any.
591
592
  At last, SQL statements that work with non-transactional
593
  engines also have no effect on the transaction state of the
594
  connection. Even though they are written to the binary log,
595
  and the binary log is, overall, transactional, the writes
596
  are done in "write-through" mode, directly to the binlog
597
  file, followed with a OS cache sync, in other words,
598
  bypassing the binlog undo log (translog).
599
  They do not commit the current normal transaction.
600
  A failure of a statement that uses non-transactional tables
601
  would cause a rollback of the statement transaction, but
602
  in case there no non-transactional tables are used,
603
  no statement transaction is started.
604
605
  Data layout
606
  -----------
607
608
  The server stores its transaction-related data in
609
  thd->transaction. This structure has two members of type
610
  THD_TRANS. These members correspond to the statement and
611
  normal transactions respectively:
612
613
  - thd->transaction.stmt contains a list of engines
614
  that are participating in the given statement
615
  - thd->transaction.all contains a list of engines that
616
  have participated in any of the statement transactions started
617
  within the context of the normal transaction.
618
  Each element of the list contains a pointer to the storage
619
  engine, engine-specific transactional data, and engine-specific
620
  transaction flags.
621
622
  In autocommit mode thd->transaction.all is empty.
623
  Instead, data of thd->transaction.stmt is
624
  used to commit/rollback the normal transaction.
625
626
  The list of registered engines has a few important properties:
627
  - no engine is registered in the list twice
628
  - engines are present in the list a reverse temporal order --
629
  new participants are always added to the beginning of the list.
630
631
  Transaction life cycle
632
  ----------------------
633
634
  When a new connection is established, thd->transaction
635
  members are initialized to an empty state.
636
  If a statement uses any tables, all affected engines
637
  are registered in the statement engine list. In
638
  non-autocommit mode, the same engines are registered in
639
  the normal transaction list.
640
  At the end of the statement, the server issues a commit
641
  or a roll back for all engines in the statement list.
642
  At this point transaction flags of an engine, if any, are
643
  propagated from the statement list to the list of the normal
644
  transaction.
645
  When commit/rollback is finished, the statement list is
646
  cleared. It will be filled in again by the next statement,
647
  and emptied again at the next statement's end.
648
649
  The normal transaction is committed in a similar way
650
  (by going over all engines in thd->transaction.all list)
651
  but at different times:
652
  - upon COMMIT SQL statement is issued by the user
653
  - implicitly, by the server, at the beginning of a DDL statement
654
  or SET AUTOCOMMIT={0|1} statement.
655
656
  The normal transaction can be rolled back as well:
657
  - if the user has requested so, by issuing ROLLBACK SQL
658
  statement
659
  - if one of the storage engines requested a rollback
660
  by setting thd->transaction_rollback_request. This may
661
  happen in case, e.g., when the transaction in the engine was
662
  chosen a victim of the internal deadlock resolution algorithm
663
  and rolled back internally. When such a situation happens, there
664
  is little the server can do and the only option is to rollback
665
  transactions in all other participating engines.  In this case
666
  the rollback is accompanied by an error sent to the user.
667
668
  As follows from the use cases above, the normal transaction
669
  is never committed when there is an outstanding statement
670
  transaction. In most cases there is no conflict, since
671
  commits of the normal transaction are issued by a stand-alone
672
  administrative or DDL statement, thus no outstanding statement
673
  transaction of the previous statement exists. Besides,
674
  all statements that manipulate with the normal transaction
675
  are prohibited in stored functions and triggers, therefore
676
  no conflicting situation can occur in a sub-statement either.
677
  The remaining rare cases when the server explicitly has
678
  to commit the statement transaction prior to committing the normal
679
  one cover error-handling scenarios (see for example
680
  SQLCOM_LOCK_TABLES).
681
682
  When committing a statement or a normal transaction, the server
683
  either uses the two-phase commit protocol, or issues a commit
684
  in each engine independently. The two-phase commit protocol
685
  is used only if:
686
  - all participating engines support two-phase commit (provide
687
    handlerton::prepare PSEA API call) and
688
  - transactions in at least two engines modify data (i.e. are
689
  not read-only).
690
691
  Note that the two phase commit is used for
692
  statement transactions, even though they are not durable anyway.
693
  This is done to ensure logical consistency of data in a multiple-
694
  engine transaction.
695
  For example, imagine that some day MySQL supports unique
696
  constraint checks deferred till the end of statement. In such
697
  case a commit in one of the engines may yield ER_DUP_KEY,
698
  and MySQL should be able to gracefully abort statement
699
  transactions of other participants.
700
701
  After the normal transaction has been committed,
702
  thd->transaction.all list is cleared.
703
704
  When a connection is closed, the current normal transaction, if
705
  any, is rolled back.
706
707
  Roles and responsibilities
708
  --------------------------
709
710
  The server has no way to know that an engine participates in
711
  the statement and a transaction has been started
712
  in it unless the engine says so. Thus, in order to be
713
  a part of a transaction, the engine must "register" itself.
714
  This is done by invoking trans_register_ha() server call.
715
  Normally the engine registers itself whenever handler::external_lock()
716
  is called. trans_register_ha() can be invoked many times: if
717
  an engine is already registered, the call does nothing.
718
  In case autocommit is not set, the engine must register itself
719
  twice -- both in the statement list and in the normal transaction
720
  list.
721
  In which list to register is a parameter of trans_register_ha().
722
723
  Note, that although the registration interface in itself is
724
  fairly clear, the current usage practice often leads to undesired
725
  effects. E.g. since a call to trans_register_ha() in most engines
726
  is embedded into implementation of handler::external_lock(), some
727
  DDL statements start a transaction (at least from the server
728
  point of view) even though they are not expected to. E.g.
729
  CREATE TABLE does not start a transaction, since
730
  handler::external_lock() is never called during CREATE TABLE. But
731
  CREATE TABLE ... SELECT does, since handler::external_lock() is
732
  called for the table that is being selected from. This has no
733
  practical effects currently, but must be kept in mind
734
  nevertheless.
735
736
  Once an engine is registered, the server will do the rest
737
  of the work.
738
739
  During statement execution, whenever any of data-modifying
740
  PSEA API methods is used, e.g. handler::write_row() or
741
  handler::update_row(), the read-write flag is raised in the
742
  statement transaction for the involved engine.
743
  Currently All PSEA calls are "traced", and the data can not be
744
  changed in a way other than issuing a PSEA call. Important:
745
  unless this invariant is preserved the server will not know that
746
  a transaction in a given engine is read-write and will not
747
  involve the two-phase commit protocol!
748
749
  At the end of a statement, server call
750
  ha_autocommit_or_rollback() is invoked. This call in turn
751
  invokes handlerton::prepare() for every involved engine.
752
  Prepare is followed by a call to handlerton::commit_one_phase()
753
  If a one-phase commit will suffice, handlerton::prepare() is not
754
  invoked and the server only calls handlerton::commit_one_phase().
755
  At statement commit, the statement-related read-write engine
756
  flag is propagated to the corresponding flag in the normal
757
  transaction.  When the commit is complete, the list of registered
758
  engines is cleared.
759
760
  Rollback is handled in a similar fashion.
761
762
  Additional notes on DDL and the normal transaction.
763
  ---------------------------------------------------
764
765
  DDLs and operations with non-transactional engines
766
  do not "register" in thd->transaction lists, and thus do not
767
  modify the transaction state. Besides, each DDL in
768
  MySQL is prefixed with an implicit normal transaction commit
769
  (a call to end_active_trans()), and thus leaves nothing
770
  to modify.
771
  However, as it has been pointed out with CREATE TABLE .. SELECT,
772
  some DDL statements can start a *new* transaction.
773
774
  Behaviour of the server in this case is currently badly
775
  defined.
776
  DDL statements use a form of "semantic" logging
777
  to maintain atomicity: if CREATE TABLE .. SELECT failed,
778
  the newly created table is deleted.
779
  In addition, some DDL statements issue interim transaction
780
  commits: e.g. ALTER TABLE issues a commit after data is copied
781
  from the original table to the internal temporary table. Other
782
  statements, e.g. CREATE TABLE ... SELECT do not always commit
783
  after itself.
784
  And finally there is a group of DDL statements such as
785
  RENAME/DROP TABLE that doesn't start a new transaction
786
  and doesn't commit.
787
788
  This diversity makes it hard to say what will happen if
789
  by chance a stored function is invoked during a DDL --
790
  whether any modifications it makes will be committed or not
791
  is not clear. Fortunately, SQL grammar of few DDLs allows
792
  invocation of a stored function.
793
794
  A consistent behaviour is perhaps to always commit the normal
795
  transaction after all DDLs, just like the statement transaction
796
  is always committed at the end of all statements.
797
*/
798
799
/**
800
  Register a storage engine for a transaction.
801
802
  Every storage engine MUST call this function when it starts
803
  a transaction or a statement (that is it must be called both for the
804
  "beginning of transaction" and "beginning of statement").
805
  Only storage engines registered for the transaction/statement
806
  will know when to commit/rollback it.
807
808
  @note
809
    trans_register_ha is idempotent - storage engine may register many
810
    times per transaction.
811
812
*/
813
void trans_register_ha(THD *thd, bool all, handlerton *ht_arg)
814
{
815
  THD_TRANS *trans;
816
  Ha_trx_info *ha_info;
817
818
  if (all)
819
  {
820
    trans= &thd->transaction.all;
821
    thd->server_status|= SERVER_STATUS_IN_TRANS;
822
  }
823
  else
824
    trans= &thd->transaction.stmt;
825
826
  ha_info= thd->ha_data[ht_arg->slot].ha_info + static_cast<unsigned>(all);
827
828
  if (ha_info->is_started())
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
829
    return; /* already registered, return */
1 by brian
clean slate
830
831
  ha_info->register_ha(trans, ht_arg);
832
833
  trans->no_2pc|=(ht_arg->prepare==0);
834
  if (thd->transaction.xid_state.xid.is_null())
835
    thd->transaction.xid_state.xid.set(thd->query_id);
836
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
837
  return;
1 by brian
clean slate
838
}
839
840
/**
841
  @retval
842
    0   ok
843
  @retval
844
    1   error, transaction was rolled back
845
*/
846
int ha_prepare(THD *thd)
847
{
848
  int error=0, all=1;
849
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
850
  Ha_trx_info *ha_info= trans->ha_list;
851
  if (ha_info)
852
  {
853
    for (; ha_info; ha_info= ha_info->next())
854
    {
855
      int err;
856
      handlerton *ht= ha_info->ht();
857
      status_var_increment(thd->status_var.ha_prepare_count);
858
      if (ht->prepare)
859
      {
860
        if ((err= ht->prepare(ht, thd, all)))
861
        {
862
          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
863
          ha_rollback_trans(thd, all);
864
          error=1;
865
          break;
866
        }
867
      }
868
      else
869
      {
870
        push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
871
                            ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
872
                            ha_resolve_storage_engine_name(ht));
873
      }
874
    }
875
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
876
  return(error);
1 by brian
clean slate
877
}
878
879
/**
880
  Check if we can skip the two-phase commit.
881
882
  A helper function to evaluate if two-phase commit is mandatory.
883
  As a side effect, propagates the read-only/read-write flags
884
  of the statement transaction to its enclosing normal transaction.
885
56 by brian
Next pass of true/false update.
886
  @retval true   we must run a two-phase commit. Returned
1 by brian
clean slate
887
                 if we have at least two engines with read-write changes.
56 by brian
Next pass of true/false update.
888
  @retval false  Don't need two-phase commit. Even if we have two
1 by brian
clean slate
889
                 transactional engines, we can run two independent
890
                 commits if changes in one of the engines are read-only.
891
*/
892
893
static
894
bool
895
ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
896
                                    bool all)
897
{
898
  /* The number of storage engines that have actual changes. */
899
  unsigned rw_ha_count= 0;
900
  Ha_trx_info *ha_info;
901
902
  for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
903
  {
904
    if (ha_info->is_trx_read_write())
905
      ++rw_ha_count;
906
907
    if (! all)
908
    {
909
      Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
910
      assert(ha_info != ha_info_all);
1 by brian
clean slate
911
      /*
912
        Merge read-only/read-write information about statement
913
        transaction to its enclosing normal transaction. Do this
914
        only if in a real transaction -- that is, if we know
915
        that ha_info_all is registered in thd->transaction.all.
916
        Since otherwise we only clutter the normal transaction flags.
917
      */
56 by brian
Next pass of true/false update.
918
      if (ha_info_all->is_started()) /* false if autocommit. */
1 by brian
clean slate
919
        ha_info_all->coalesce_trx_with(ha_info);
920
    }
921
    else if (rw_ha_count > 1)
922
    {
923
      /*
924
        It is a normal transaction, so we don't need to merge read/write
925
        information up, and the need for two-phase commit has been
926
        already established. Break the loop prematurely.
927
      */
928
      break;
929
    }
930
  }
931
  return rw_ha_count > 1;
932
}
933
934
935
/**
936
  @retval
937
    0   ok
938
  @retval
939
    1   transaction was rolled back
940
  @retval
941
    2   error during commit, data may be inconsistent
942
943
  @todo
944
    Since we don't support nested statement transactions in 5.0,
945
    we can't commit or rollback stmt transactions while we are inside
946
    stored functions or triggers. So we simply do nothing now.
947
    TODO: This should be fixed in later ( >= 5.1) releases.
948
*/
949
int ha_commit_trans(THD *thd, bool all)
950
{
951
  int error= 0, cookie= 0;
952
  /*
953
    'all' means that this is either an explicit commit issued by
954
    user, or an implicit commit issued by a DDL.
955
  */
956
  THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
957
  bool is_real_trans= all || thd->transaction.all.ha_list == 0;
958
  Ha_trx_info *ha_info= trans->ha_list;
959
  my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
960
961
  /*
962
    We must not commit the normal transaction if a statement
963
    transaction is pending. Otherwise statement transaction
964
    flags will not get propagated to its normal transaction's
965
    counterpart.
966
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
967
  assert(thd->transaction.stmt.ha_list == NULL ||
1 by brian
clean slate
968
              trans == &thd->transaction.stmt);
969
970
  if (thd->in_sub_stmt)
971
  {
972
    /*
973
      Since we don't support nested statement transactions in 5.0,
974
      we can't commit or rollback stmt transactions while we are inside
975
      stored functions or triggers. So we simply do nothing now.
976
      TODO: This should be fixed in later ( >= 5.1) releases.
977
    */
978
    if (!all)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
979
      return(0);
1 by brian
clean slate
980
    /*
981
      We assume that all statements which commit or rollback main transaction
982
      are prohibited inside of stored functions or triggers. So they should
983
      bail out with error even before ha_commit_trans() call. To be 100% safe
984
      let us throw error in non-debug builds.
985
    */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
986
    assert(0);
1 by brian
clean slate
987
    my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
988
    return(2);
1 by brian
clean slate
989
  }
990
  if (ha_info)
991
  {
992
    bool must_2pc;
993
994
    if (is_real_trans && wait_if_global_read_lock(thd, 0, 0))
995
    {
996
      ha_rollback_trans(thd, all);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
997
      return(1);
1 by brian
clean slate
998
    }
999
1000
    if (   is_real_trans
1001
        && opt_readonly
1002
        && ! thd->slave_thread
1003
       )
1004
    {
1005
      my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
1006
      ha_rollback_trans(thd, all);
1007
      error= 1;
1008
      goto end;
1009
    }
1010
1011
    must_2pc= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1012
1013
    if (!trans->no_2pc && must_2pc)
1014
    {
1015
      for (; ha_info && !error; ha_info= ha_info->next())
1016
      {
1017
        int err;
1018
        handlerton *ht= ha_info->ht();
1019
        /*
1020
          Do not call two-phase commit if this particular
1021
          transaction is read-only. This allows for simpler
1022
          implementation in engines that are always read-only.
1023
        */
1024
        if (! ha_info->is_trx_read_write())
1025
          continue;
1026
        /*
1027
          Sic: we know that prepare() is not NULL since otherwise
1028
          trans->no_2pc would have been set.
1029
        */
1030
        if ((err= ht->prepare(ht, thd, all)))
1031
        {
1032
          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1033
          error= 1;
1034
        }
1035
        status_var_increment(thd->status_var.ha_prepare_count);
1036
      }
1037
      if (error || (is_real_trans && xid &&
1038
                    (error= !(cookie= tc_log->log_xid(thd, xid)))))
1039
      {
1040
        ha_rollback_trans(thd, all);
1041
        error= 1;
1042
        goto end;
1043
      }
1044
    }
1045
    error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
1046
    if (cookie)
1047
      tc_log->unlog(cookie, xid);
1048
end:
1049
    if (is_real_trans)
1050
      start_waiting_global_read_lock(thd);
1051
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1052
  return(error);
1 by brian
clean slate
1053
}
1054
1055
/**
1056
  @note
1057
  This function does not care about global read lock. A caller should.
1058
*/
1059
int ha_commit_one_phase(THD *thd, bool all)
1060
{
1061
  int error=0;
1062
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1063
  bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1064
  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1065
  if (ha_info)
1066
  {
1067
    for (; ha_info; ha_info= ha_info_next)
1068
    {
1069
      int err;
1070
      handlerton *ht= ha_info->ht();
1071
      if ((err= ht->commit(ht, thd, all)))
1072
      {
1073
        my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1074
        error=1;
1075
      }
1076
      status_var_increment(thd->status_var.ha_commit_count);
1077
      ha_info_next= ha_info->next();
1078
      ha_info->reset(); /* keep it conveniently zero-filled */
1079
    }
1080
    trans->ha_list= 0;
1081
    trans->no_2pc=0;
1082
    if (is_real_trans)
1083
      thd->transaction.xid_state.xid.null();
1084
    if (all)
1085
    {
1086
      thd->variables.tx_isolation=thd->session_tx_isolation;
1087
      thd->transaction.cleanup();
1088
    }
1089
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1090
  return(error);
1 by brian
clean slate
1091
}
1092
1093
1094
int ha_rollback_trans(THD *thd, bool all)
1095
{
1096
  int error=0;
1097
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1098
  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1099
  bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1100
1101
  /*
1102
    We must not rollback the normal transaction if a statement
1103
    transaction is pending.
1104
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1105
  assert(thd->transaction.stmt.ha_list == NULL ||
1 by brian
clean slate
1106
              trans == &thd->transaction.stmt);
1107
1108
  if (thd->in_sub_stmt)
1109
  {
1110
    /*
1111
      If we are inside stored function or trigger we should not commit or
1112
      rollback current statement transaction. See comment in ha_commit_trans()
1113
      call for more information.
1114
    */
1115
    if (!all)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1116
      return(0);
1117
    assert(0);
1 by brian
clean slate
1118
    my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1119
    return(1);
1 by brian
clean slate
1120
  }
1121
  if (ha_info)
1122
  {
1123
    for (; ha_info; ha_info= ha_info_next)
1124
    {
1125
      int err;
1126
      handlerton *ht= ha_info->ht();
1127
      if ((err= ht->rollback(ht, thd, all)))
1128
      { // cannot happen
1129
        my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1130
        error=1;
1131
      }
1132
      status_var_increment(thd->status_var.ha_rollback_count);
1133
      ha_info_next= ha_info->next();
1134
      ha_info->reset(); /* keep it conveniently zero-filled */
1135
    }
1136
    trans->ha_list= 0;
1137
    trans->no_2pc=0;
1138
    if (is_real_trans)
1139
      thd->transaction.xid_state.xid.null();
1140
    if (all)
1141
    {
1142
      thd->variables.tx_isolation=thd->session_tx_isolation;
1143
      thd->transaction.cleanup();
1144
    }
1145
  }
1146
  if (all)
56 by brian
Next pass of true/false update.
1147
    thd->transaction_rollback_request= false;
1 by brian
clean slate
1148
1149
  /*
1150
    If a non-transactional table was updated, warn; don't warn if this is a
1151
    slave thread (because when a slave thread executes a ROLLBACK, it has
1152
    been read from the binary log, so it's 100% sure and normal to produce
1153
    error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
1154
    slave SQL thread, it would not stop the thread but just be printed in
1155
    the error log; but we don't want users to wonder why they have this
1156
    message in the error log, so we don't send it.
1157
  */
1158
  if (is_real_trans && thd->transaction.all.modified_non_trans_table &&
1159
      !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
1160
    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
1161
                 ER_WARNING_NOT_COMPLETE_ROLLBACK,
1162
                 ER(ER_WARNING_NOT_COMPLETE_ROLLBACK));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1163
  return(error);
1 by brian
clean slate
1164
}
1165
1166
/**
1167
  This is used to commit or rollback a single statement depending on
1168
  the value of error.
1169
1170
  @note
1171
    Note that if the autocommit is on, then the following call inside
1172
    InnoDB will commit or rollback the whole transaction (= the statement). The
1173
    autocommit mechanism built into InnoDB is based on counting locks, but if
1174
    the user has used LOCK TABLES then that mechanism does not know to do the
1175
    commit.
1176
*/
1177
int ha_autocommit_or_rollback(THD *thd, int error)
1178
{
1179
  if (thd->transaction.stmt.ha_list)
1180
  {
1181
    if (!error)
1182
    {
1183
      if (ha_commit_trans(thd, 0))
1184
	error=1;
1185
    }
1186
    else 
1187
    {
1188
      (void) ha_rollback_trans(thd, 0);
1189
      if (thd->transaction_rollback_request && !thd->in_sub_stmt)
1190
        (void) ha_rollback(thd);
1191
    }
1192
1193
    thd->variables.tx_isolation=thd->session_tx_isolation;
1194
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1195
  return(error);
1 by brian
clean slate
1196
}
1197
1198
1199
struct xahton_st {
1200
  XID *xid;
1201
  int result;
1202
};
1203
149 by Brian Aker
More bool conversion.
1204
static bool xacommit_handlerton(THD *unused1 __attribute__((__unused__)),
1205
                                plugin_ref plugin,
1206
                                void *arg)
1 by brian
clean slate
1207
{
1208
  handlerton *hton= plugin_data(plugin, handlerton *);
1209
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1210
  {
1211
    hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
1212
    ((struct xahton_st *)arg)->result= 0;
1213
  }
56 by brian
Next pass of true/false update.
1214
  return false;
1 by brian
clean slate
1215
}
1216
149 by Brian Aker
More bool conversion.
1217
static bool xarollback_handlerton(THD *unused1 __attribute__((__unused__)),
1218
                                  plugin_ref plugin,
1219
                                  void *arg)
1 by brian
clean slate
1220
{
1221
  handlerton *hton= plugin_data(plugin, handlerton *);
1222
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1223
  {
1224
    hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
1225
    ((struct xahton_st *)arg)->result= 0;
1226
  }
56 by brian
Next pass of true/false update.
1227
  return false;
1 by brian
clean slate
1228
}
1229
1230
1231
int ha_commit_or_rollback_by_xid(XID *xid, bool commit)
1232
{
1233
  struct xahton_st xaop;
1234
  xaop.xid= xid;
1235
  xaop.result= 1;
1236
1237
  plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
1238
                 MYSQL_STORAGE_ENGINE_PLUGIN, &xaop);
1239
1240
  return xaop.result;
1241
}
1242
1243
/**
1244
  recover() step of xa.
1245
1246
  @note
1247
    there are three modes of operation:
1248
    - automatic recover after a crash
1249
    in this case commit_list != 0, tc_heuristic_recover==0
1250
    all xids from commit_list are committed, others are rolled back
1251
    - manual (heuristic) recover
1252
    in this case commit_list==0, tc_heuristic_recover != 0
1253
    DBA has explicitly specified that all prepared transactions should
1254
    be committed (or rolled back).
1255
    - no recovery (MySQL did not detect a crash)
1256
    in this case commit_list==0, tc_heuristic_recover == 0
1257
    there should be no prepared transactions in this case.
1258
*/
1259
struct xarecover_st
1260
{
1261
  int len, found_foreign_xids, found_my_xids;
1262
  XID *list;
1263
  HASH *commit_list;
1264
  bool dry_run;
1265
};
1266
149 by Brian Aker
More bool conversion.
1267
static bool xarecover_handlerton(THD *unused __attribute__((__unused__)),
1268
                                 plugin_ref plugin,
1269
                                 void *arg)
1 by brian
clean slate
1270
{
1271
  handlerton *hton= plugin_data(plugin, handlerton *);
1272
  struct xarecover_st *info= (struct xarecover_st *) arg;
1273
  int got;
1274
1275
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1276
  {
1277
    while ((got= hton->recover(hton, info->list, info->len)) > 0 )
1278
    {
1279
      sql_print_information("Found %d prepared transaction(s) in %s",
1280
                            got, ha_resolve_storage_engine_name(hton));
1281
      for (int i=0; i < got; i ++)
1282
      {
1283
        my_xid x=info->list[i].get_my_xid();
1284
        if (!x) // not "mine" - that is generated by external TM
1285
        {
1286
          xid_cache_insert(info->list+i, XA_PREPARED);
1287
          info->found_foreign_xids++;
1288
          continue;
1289
        }
1290
        if (info->dry_run)
1291
        {
1292
          info->found_my_xids++;
1293
          continue;
1294
        }
1295
        // recovery mode
1296
        if (info->commit_list ?
1297
            hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
1298
            tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
1299
        {
1300
          hton->commit_by_xid(hton, info->list+i);
1301
        }
1302
        else
1303
        {
1304
          hton->rollback_by_xid(hton, info->list+i);
1305
        }
1306
      }
1307
      if (got < info->len)
1308
        break;
1309
    }
1310
  }
56 by brian
Next pass of true/false update.
1311
  return false;
1 by brian
clean slate
1312
}
1313
1314
int ha_recover(HASH *commit_list)
1315
{
1316
  struct xarecover_st info;
1317
  info.found_foreign_xids= info.found_my_xids= 0;
1318
  info.commit_list= commit_list;
1319
  info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
1320
  info.list= NULL;
1321
1322
  /* commit_list and tc_heuristic_recover cannot be set both */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1323
  assert(info.commit_list==0 || tc_heuristic_recover==0);
1 by brian
clean slate
1324
  /* if either is set, total_ha_2pc must be set too */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1325
  assert(info.dry_run || total_ha_2pc>(uint32_t)opt_bin_log);
1 by brian
clean slate
1326
61 by Brian Aker
Conversion of handler type.
1327
  if (total_ha_2pc <= (uint32_t)opt_bin_log)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1328
    return(0);
1 by brian
clean slate
1329
1330
  if (info.commit_list)
1331
    sql_print_information("Starting crash recovery...");
1332
1333
1334
#ifndef WILL_BE_DELETED_LATER
1335
1336
  /*
1337
    for now, only InnoDB supports 2pc. It means we can always safely
1338
    rollback all pending transactions, without risking inconsistent data
1339
  */
1340
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1341
  assert(total_ha_2pc == (uint32_t) opt_bin_log+1); // only InnoDB and binlog
1 by brian
clean slate
1342
  tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
56 by brian
Next pass of true/false update.
1343
  info.dry_run=false;
1 by brian
clean slate
1344
#endif
1345
1346
1347
  for (info.len= MAX_XID_LIST_SIZE ; 
1348
       info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
1349
  {
1350
    info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0));
1351
  }
1352
  if (!info.list)
1353
  {
1354
    sql_print_error(ER(ER_OUTOFMEMORY), info.len*sizeof(XID));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1355
    return(1);
1 by brian
clean slate
1356
  }
1357
1358
  plugin_foreach(NULL, xarecover_handlerton, 
1359
                 MYSQL_STORAGE_ENGINE_PLUGIN, &info);
1360
1361
  my_free((uchar*)info.list, MYF(0));
1362
  if (info.found_foreign_xids)
1363
    sql_print_warning("Found %d prepared XA transactions", 
1364
                      info.found_foreign_xids);
1365
  if (info.dry_run && info.found_my_xids)
1366
  {
1367
    sql_print_error("Found %d prepared transactions! It means that mysqld was "
1368
                    "not shut down properly last time and critical recovery "
1369
                    "information (last binlog or %s file) was manually deleted "
1370
                    "after a crash. You have to start mysqld with "
1371
                    "--tc-heuristic-recover switch to commit or rollback "
1372
                    "pending transactions.",
1373
                    info.found_my_xids, opt_tc_log_file);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1374
    return(1);
1 by brian
clean slate
1375
  }
1376
  if (info.commit_list)
1377
    sql_print_information("Crash recovery finished.");
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1378
  return(0);
1 by brian
clean slate
1379
}
1380
1381
/**
1382
  return the list of XID's to a client, the same way SHOW commands do.
1383
1384
  @note
1385
    I didn't find in XA specs that an RM cannot return the same XID twice,
1386
    so mysql_xa_recover does not filter XID's to ensure uniqueness.
1387
    It can be easily fixed later, if necessary.
1388
*/
1389
bool mysql_xa_recover(THD *thd)
1390
{
1391
  List<Item> field_list;
1392
  Protocol *protocol= thd->protocol;
1393
  int i=0;
1394
  XID_STATE *xs;
1395
1396
  field_list.push_back(new Item_int("formatID", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1397
  field_list.push_back(new Item_int("gtrid_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1398
  field_list.push_back(new Item_int("bqual_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1399
  field_list.push_back(new Item_empty_string("data",XIDDATASIZE));
1400
1401
  if (protocol->send_fields(&field_list,
1402
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1403
    return(1);
1 by brian
clean slate
1404
1405
  pthread_mutex_lock(&LOCK_xid_cache);
1406
  while ((xs= (XID_STATE*)hash_element(&xid_cache, i++)))
1407
  {
1408
    if (xs->xa_state==XA_PREPARED)
1409
    {
1410
      protocol->prepare_for_resend();
152 by Brian Aker
longlong replacement
1411
      protocol->store_int64_t((int64_t)xs->xid.formatID, false);
1412
      protocol->store_int64_t((int64_t)xs->xid.gtrid_length, false);
1413
      protocol->store_int64_t((int64_t)xs->xid.bqual_length, false);
1 by brian
clean slate
1414
      protocol->store(xs->xid.data, xs->xid.gtrid_length+xs->xid.bqual_length,
1415
                      &my_charset_bin);
1416
      if (protocol->write())
1417
      {
1418
        pthread_mutex_unlock(&LOCK_xid_cache);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1419
        return(1);
1 by brian
clean slate
1420
      }
1421
    }
1422
  }
1423
1424
  pthread_mutex_unlock(&LOCK_xid_cache);
1425
  my_eof(thd);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1426
  return(0);
1 by brian
clean slate
1427
}
1428
1429
/**
1430
  @details
1431
  This function should be called when MySQL sends rows of a SELECT result set
1432
  or the EOF mark to the client. It releases a possible adaptive hash index
1433
  S-latch held by thd in InnoDB and also releases a possible InnoDB query
1434
  FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a thd to
1435
  keep them over several calls of the InnoDB handler interface when a join
1436
  is executed. But when we let the control to pass to the client they have
1437
  to be released because if the application program uses mysql_use_result(),
1438
  it may deadlock on the S-latch if the application on another connection
1439
  performs another SQL query. In MySQL-4.1 this is even more important because
1440
  there a connection can have several SELECT queries open at the same time.
1441
1442
  @param thd           the thread handle of the current connection
1443
1444
  @return
1445
    always 0
1446
*/
149 by Brian Aker
More bool conversion.
1447
static bool release_temporary_latches(THD *thd, plugin_ref plugin,
1448
                                      void *unused __attribute__((__unused__)))
1 by brian
clean slate
1449
{
1450
  handlerton *hton= plugin_data(plugin, handlerton *);
1451
1452
  if (hton->state == SHOW_OPTION_YES && hton->release_temporary_latches)
1453
    hton->release_temporary_latches(hton, thd);
1454
56 by brian
Next pass of true/false update.
1455
  return false;
1 by brian
clean slate
1456
}
1457
1458
1459
int ha_release_temporary_latches(THD *thd)
1460
{
1461
  plugin_foreach(thd, release_temporary_latches, MYSQL_STORAGE_ENGINE_PLUGIN, 
1462
                 NULL);
1463
1464
  return 0;
1465
}
1466
1467
int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
1468
{
1469
  int error=0;
1470
  THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
1471
                                        &thd->transaction.all);
1472
  Ha_trx_info *ha_info, *ha_info_next;
1473
1474
  trans->no_2pc=0;
1475
  /*
1476
    rolling back to savepoint in all storage engines that were part of the
1477
    transaction when the savepoint was set
1478
  */
1479
  for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
1480
  {
1481
    int err;
1482
    handlerton *ht= ha_info->ht();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1483
    assert(ht);
1484
    assert(ht->savepoint_set != 0);
1 by brian
clean slate
1485
    if ((err= ht->savepoint_rollback(ht, thd,
1486
                                     (uchar *)(sv+1)+ht->savepoint_offset)))
1487
    { // cannot happen
1488
      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1489
      error=1;
1490
    }
1491
    status_var_increment(thd->status_var.ha_savepoint_rollback_count);
1492
    trans->no_2pc|= ht->prepare == 0;
1493
  }
1494
  /*
1495
    rolling back the transaction in all storage engines that were not part of
1496
    the transaction when the savepoint was set
1497
  */
1498
  for (ha_info= trans->ha_list; ha_info != sv->ha_list;
1499
       ha_info= ha_info_next)
1500
  {
1501
    int err;
1502
    handlerton *ht= ha_info->ht();
1503
    if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
1504
    { // cannot happen
1505
      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1506
      error=1;
1507
    }
1508
    status_var_increment(thd->status_var.ha_rollback_count);
1509
    ha_info_next= ha_info->next();
1510
    ha_info->reset(); /* keep it conveniently zero-filled */
1511
  }
1512
  trans->ha_list= sv->ha_list;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1513
  return(error);
1 by brian
clean slate
1514
}
1515
1516
/**
1517
  @note
1518
  according to the sql standard (ISO/IEC 9075-2:2003)
1519
  section "4.33.4 SQL-statements and transaction states",
1520
  SAVEPOINT is *not* transaction-initiating SQL-statement
1521
*/
1522
int ha_savepoint(THD *thd, SAVEPOINT *sv)
1523
{
1524
  int error=0;
1525
  THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
1526
                                        &thd->transaction.all);
1527
  Ha_trx_info *ha_info= trans->ha_list;
1528
  for (; ha_info; ha_info= ha_info->next())
1529
  {
1530
    int err;
1531
    handlerton *ht= ha_info->ht();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1532
    assert(ht);
1 by brian
clean slate
1533
    if (! ht->savepoint_set)
1534
    {
1535
      my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
1536
      error=1;
1537
      break;
1538
    }
1539
    if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
1540
    { // cannot happen
1541
      my_error(ER_GET_ERRNO, MYF(0), err);
1542
      error=1;
1543
    }
1544
    status_var_increment(thd->status_var.ha_savepoint_count);
1545
  }
1546
  /*
1547
    Remember the list of registered storage engines. All new
1548
    engines are prepended to the beginning of the list.
1549
  */
1550
  sv->ha_list= trans->ha_list;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1551
  return(error);
1 by brian
clean slate
1552
}
1553
1554
int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
1555
{
1556
  int error=0;
1557
  Ha_trx_info *ha_info= sv->ha_list;
1558
1559
  for (; ha_info; ha_info= ha_info->next())
1560
  {
1561
    int err;
1562
    handlerton *ht= ha_info->ht();
1563
    /* Savepoint life time is enclosed into transaction life time. */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1564
    assert(ht);
1 by brian
clean slate
1565
    if (!ht->savepoint_release)
1566
      continue;
1567
    if ((err= ht->savepoint_release(ht, thd,
1568
                                    (uchar *)(sv+1) + ht->savepoint_offset)))
1569
    { // cannot happen
1570
      my_error(ER_GET_ERRNO, MYF(0), err);
1571
      error=1;
1572
    }
1573
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1574
  return(error);
1 by brian
clean slate
1575
}
1576
1577
149 by Brian Aker
More bool conversion.
1578
static bool snapshot_handlerton(THD *thd, plugin_ref plugin, void *arg)
1 by brian
clean slate
1579
{
1580
  handlerton *hton= plugin_data(plugin, handlerton *);
1581
  if (hton->state == SHOW_OPTION_YES &&
1582
      hton->start_consistent_snapshot)
1583
  {
1584
    hton->start_consistent_snapshot(hton, thd);
1585
    *((bool *)arg)= false;
1586
  }
56 by brian
Next pass of true/false update.
1587
  return false;
1 by brian
clean slate
1588
}
1589
1590
int ha_start_consistent_snapshot(THD *thd)
1591
{
1592
  bool warn= true;
1593
1594
  plugin_foreach(thd, snapshot_handlerton, MYSQL_STORAGE_ENGINE_PLUGIN, &warn);
1595
1596
  /*
1597
    Same idea as when one wants to CREATE TABLE in one engine which does not
1598
    exist:
1599
  */
1600
  if (warn)
1601
    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
1602
                 "This MySQL server does not support any "
1603
                 "consistent-read capable storage engine");
1604
  return 0;
1605
}
1606
1607
149 by Brian Aker
More bool conversion.
1608
static bool flush_handlerton(THD *thd __attribute__((__unused__)),
1609
                             plugin_ref plugin,
1610
                             void *arg __attribute__((__unused__)))
1 by brian
clean slate
1611
{
1612
  handlerton *hton= plugin_data(plugin, handlerton *);
1613
  if (hton->state == SHOW_OPTION_YES && hton->flush_logs && 
1614
      hton->flush_logs(hton))
56 by brian
Next pass of true/false update.
1615
    return true;
1616
  return false;
1 by brian
clean slate
1617
}
1618
1619
1620
bool ha_flush_logs(handlerton *db_type)
1621
{
1622
  if (db_type == NULL)
1623
  {
1624
    if (plugin_foreach(NULL, flush_handlerton,
1625
                          MYSQL_STORAGE_ENGINE_PLUGIN, 0))
56 by brian
Next pass of true/false update.
1626
      return true;
1 by brian
clean slate
1627
  }
1628
  else
1629
  {
1630
    if (db_type->state != SHOW_OPTION_YES ||
1631
        (db_type->flush_logs && db_type->flush_logs(db_type)))
56 by brian
Next pass of true/false update.
1632
      return true;
1 by brian
clean slate
1633
  }
56 by brian
Next pass of true/false update.
1634
  return false;
1 by brian
clean slate
1635
}
1636
1637
static const char *check_lowercase_names(handler *file, const char *path,
1638
                                         char *tmp_path)
1639
{
1640
  if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
1641
    return path;
1642
1643
  /* Ensure that table handler get path in lower case */
1644
  if (tmp_path != path)
1645
    strmov(tmp_path, path);
1646
1647
  /*
1648
    we only should turn into lowercase database/table part
1649
    so start the process after homedirectory
1650
  */
1651
  my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
1652
  return tmp_path;
1653
}
1654
1655
1656
/**
1657
  An interceptor to hijack the text of the error message without
1658
  setting an error in the thread. We need the text to present it
1659
  in the form of a warning to the user.
1660
*/
1661
1662
struct Ha_delete_table_error_handler: public Internal_error_handler
1663
{
1664
public:
1665
  virtual bool handle_error(uint sql_errno,
1666
                            const char *message,
1667
                            MYSQL_ERROR::enum_warning_level level,
1668
                            THD *thd);
1669
  char buff[MYSQL_ERRMSG_SIZE];
1670
};
1671
1672
1673
bool
1674
Ha_delete_table_error_handler::
77.1.15 by Monty Taylor
Bunch of warning cleanups.
1675
handle_error(uint sql_errno  __attribute__((__unused__)),
1 by brian
clean slate
1676
             const char *message,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
1677
             MYSQL_ERROR::enum_warning_level level __attribute__((__unused__)),
1678
             THD *thd __attribute__((__unused__)))
1 by brian
clean slate
1679
{
1680
  /* Grab the error message */
1681
  strmake(buff, message, sizeof(buff)-1);
56 by brian
Next pass of true/false update.
1682
  return true;
1 by brian
clean slate
1683
}
1684
1685
1686
/**
1687
  This should return ENOENT if the file doesn't exists.
1688
  The .frm file will be deleted only if we return 0 or ENOENT
1689
*/
1690
int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
1691
                    const char *db, const char *alias, bool generate_warning)
1692
{
1693
  handler *file;
1694
  char tmp_path[FN_REFLEN];
1695
  int error;
1696
  TABLE dummy_table;
1697
  TABLE_SHARE dummy_share;
1698
1699
  bzero((char*) &dummy_table, sizeof(dummy_table));
1700
  bzero((char*) &dummy_share, sizeof(dummy_share));
1701
  dummy_table.s= &dummy_share;
1702
1703
  /* DB_TYPE_UNKNOWN is used in ALTER TABLE when renaming only .frm files */
1704
  if (table_type == NULL ||
1705
      ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1706
    return(ENOENT);
1 by brian
clean slate
1707
1708
  path= check_lowercase_names(file, path, tmp_path);
1709
  if ((error= file->ha_delete_table(path)) && generate_warning)
1710
  {
1711
    /*
1712
      Because file->print_error() use my_error() to generate the error message
1713
      we use an internal error handler to intercept it and store the text
1714
      in a temporary buffer. Later the message will be presented to user
1715
      as a warning.
1716
    */
1717
    Ha_delete_table_error_handler ha_delete_table_error_handler;
1718
1719
    /* Fill up strucutures that print_error may need */
1720
    dummy_share.path.str= (char*) path;
1721
    dummy_share.path.length= strlen(path);
1722
    dummy_share.db.str= (char*) db;
1723
    dummy_share.db.length= strlen(db);
1724
    dummy_share.table_name.str= (char*) alias;
1725
    dummy_share.table_name.length= strlen(alias);
1726
    dummy_table.alias= alias;
1727
1728
    file->change_table_ptr(&dummy_table, &dummy_share);
1729
1730
    thd->push_internal_handler(&ha_delete_table_error_handler);
1731
    file->print_error(error, 0);
1732
1733
    thd->pop_internal_handler();
1734
1735
    /*
1736
      XXX: should we convert *all* errors to warnings here?
1737
      What if the error is fatal?
1738
    */
1739
    push_warning(thd, MYSQL_ERROR::WARN_LEVEL_ERROR, error,
1740
                ha_delete_table_error_handler.buff);
1741
  }
1742
  delete file;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1743
  return(error);
1 by brian
clean slate
1744
}
1745
1746
/****************************************************************************
1747
** General handler functions
1748
****************************************************************************/
1749
handler *handler::clone(MEM_ROOT *mem_root)
1750
{
1751
  handler *new_handler= get_new_handler(table->s, mem_root, table->s->db_type());
1752
  /*
1753
    Allocate handler->ref here because otherwise ha_open will allocate it
1754
    on this->table->mem_root and we will not be able to reclaim that memory 
1755
    when the clone handler object is destroyed.
1756
  */
1757
  if (!(new_handler->ref= (uchar*) alloc_root(mem_root, ALIGN_SIZE(ref_length)*2)))
1758
    return NULL;
1759
  if (new_handler && !new_handler->ha_open(table,
1760
                                           table->s->normalized_path.str,
1761
                                           table->db_stat,
1762
                                           HA_OPEN_IGNORE_IF_LOCKED))
1763
    return new_handler;
1764
  return NULL;
1765
}
1766
1767
1768
1769
void handler::ha_statistic_increment(ulong SSV::*offset) const
1770
{
1771
  status_var_increment(table->in_use->status_var.*offset);
1772
}
1773
1774
void **handler::ha_data(THD *thd) const
1775
{
1776
  return thd_ha_data(thd, ht);
1777
}
1778
1779
THD *handler::ha_thd(void) const
1780
{
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1781
  assert(!table || !table->in_use || table->in_use == current_thd);
1 by brian
clean slate
1782
  return (table && table->in_use) ? table->in_use : current_thd;
1783
}
1784
1785
/**
1786
  Open database-handler.
1787
1788
  Try O_RDONLY if cannot open as O_RDWR
1789
  Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
1790
*/
1791
int handler::ha_open(TABLE *table_arg, const char *name, int mode,
1792
                     int test_if_locked)
1793
{
1794
  int error;
1795
1796
  table= table_arg;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1797
  assert(table->s == table_share);
1798
  assert(alloc_root_inited(&table->mem_root));
1 by brian
clean slate
1799
1800
  if ((error=open(name,mode,test_if_locked)))
1801
  {
1802
    if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
1803
	(table->db_stat & HA_TRY_READ_ONLY))
1804
    {
1805
      table->db_stat|=HA_READ_ONLY;
1806
      error=open(name,O_RDONLY,test_if_locked);
1807
    }
1808
  }
1809
  if (error)
1810
  {
1811
    my_errno= error;                            /* Safeguard */
1812
  }
1813
  else
1814
  {
1815
    if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
1816
      table->db_stat|=HA_READ_ONLY;
1817
    (void) extra(HA_EXTRA_NO_READCHECK);	// Not needed in SQL
1818
1819
    /* ref is already allocated for us if we're called from handler::clone() */
1820
    if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root, 
1821
                                          ALIGN_SIZE(ref_length)*2)))
1822
    {
1823
      close();
1824
      error=HA_ERR_OUT_OF_MEM;
1825
    }
1826
    else
1827
      dup_ref=ref+ALIGN_SIZE(ref_length);
1828
    cached_table_flags= table_flags();
1829
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1830
  return(error);
1 by brian
clean slate
1831
}
1832
1833
/**
1834
  one has to use this method when to find
1835
  random position by record as the plain
1836
  position() call doesn't work for some
1837
  handlers for random position
1838
*/
1839
1840
int handler::rnd_pos_by_record(uchar *record)
1841
{
1842
  register int error;
1843
1844
  position(record);
1845
  if (inited && (error= ha_index_end()))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1846
    return(error);
56 by brian
Next pass of true/false update.
1847
  if ((error= ha_rnd_init(false)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1848
    return(error);
1 by brian
clean slate
1849
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1850
  return(rnd_pos(record, ref));
1 by brian
clean slate
1851
}
1852
1853
/**
1854
  Read first row (only) from a table.
1855
1856
  This is never called for InnoDB tables, as these table types
1857
  has the HA_STATS_RECORDS_IS_EXACT set.
1858
*/
1859
int handler::read_first_row(uchar * buf, uint primary_key)
1860
{
1861
  register int error;
1862
1863
  ha_statistic_increment(&SSV::ha_read_first_count);
1864
1865
  /*
1866
    If there is very few deleted rows in the table, find the first row by
1867
    scanning the table.
1868
    TODO remove the test for HA_READ_ORDER
1869
  */
1870
  if (stats.deleted < 10 || primary_key >= MAX_KEY ||
1871
      !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
1872
  {
1873
    (void) ha_rnd_init(1);
1874
    while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED) ;
1875
    (void) ha_rnd_end();
1876
  }
1877
  else
1878
  {
1879
    /* Find the first row through the primary key */
1880
    (void) ha_index_init(primary_key, 0);
1881
    error=index_first(buf);
1882
    (void) ha_index_end();
1883
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1884
  return(error);
1 by brian
clean slate
1885
}
1886
1887
/**
1888
  Generate the next auto-increment number based on increment and offset.
1889
  computes the lowest number
1890
  - strictly greater than "nr"
1891
  - of the form: auto_increment_offset + N * auto_increment_increment
1892
1893
  In most cases increment= offset= 1, in which case we get:
1894
  @verbatim 1,2,3,4,5,... @endverbatim
1895
    If increment=10 and offset=5 and previous number is 1, we get:
1896
  @verbatim 1,5,15,25,35,... @endverbatim
1897
*/
1898
inline uint64_t
1899
compute_next_insert_id(uint64_t nr,struct system_variables *variables)
1900
{
1901
  if (variables->auto_increment_increment == 1)
1902
    return (nr+1); // optimization of the formula below
1903
  nr= (((nr+ variables->auto_increment_increment -
1904
         variables->auto_increment_offset)) /
1905
       (uint64_t) variables->auto_increment_increment);
1906
  return (nr* (uint64_t) variables->auto_increment_increment +
1907
          variables->auto_increment_offset);
1908
}
1909
1910
1911
void handler::adjust_next_insert_id_after_explicit_value(uint64_t nr)
1912
{
1913
  /*
1914
    If we have set THD::next_insert_id previously and plan to insert an
1915
    explicitely-specified value larger than this, we need to increase
1916
    THD::next_insert_id to be greater than the explicit value.
1917
  */
1918
  if ((next_insert_id > 0) && (nr >= next_insert_id))
1919
    set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
1920
}
1921
1922
1923
/**
1924
  Compute a previous insert id
1925
1926
  Computes the largest number X:
1927
  - smaller than or equal to "nr"
1928
  - of the form: auto_increment_offset + N * auto_increment_increment
1929
    where N>=0.
1930
1931
  @param nr            Number to "round down"
1932
  @param variables     variables struct containing auto_increment_increment and
1933
                       auto_increment_offset
1934
1935
  @return
1936
    The number X if it exists, "nr" otherwise.
1937
*/
1938
inline uint64_t
1939
prev_insert_id(uint64_t nr, struct system_variables *variables)
1940
{
1941
  if (unlikely(nr < variables->auto_increment_offset))
1942
  {
1943
    /*
1944
      There's nothing good we can do here. That is a pathological case, where
1945
      the offset is larger than the column's max possible value, i.e. not even
1946
      the first sequence value may be inserted. User will receive warning.
1947
    */
1948
    return nr;
1949
  }
1950
  if (variables->auto_increment_increment == 1)
1951
    return nr; // optimization of the formula below
1952
  nr= (((nr - variables->auto_increment_offset)) /
1953
       (uint64_t) variables->auto_increment_increment);
1954
  return (nr * (uint64_t) variables->auto_increment_increment +
1955
          variables->auto_increment_offset);
1956
}
1957
1958
1959
/**
1960
  Update the auto_increment field if necessary.
1961
1962
  Updates columns with type NEXT_NUMBER if:
1963
1964
  - If column value is set to NULL (in which case
1965
    auto_increment_field_not_null is 0)
1966
  - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
1967
    set. In the future we will only set NEXT_NUMBER fields if one sets them
1968
    to NULL (or they are not included in the insert list).
1969
1970
    In those cases, we check if the currently reserved interval still has
1971
    values we have not used. If yes, we pick the smallest one and use it.
1972
    Otherwise:
1973
1974
  - If a list of intervals has been provided to the statement via SET
1975
    INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
1976
    first unused interval from this list, consider it as reserved.
1977
1978
  - Otherwise we set the column for the first row to the value
1979
    next_insert_id(get_auto_increment(column))) which is usually
1980
    max-used-column-value+1.
1981
    We call get_auto_increment() for the first row in a multi-row
1982
    statement. get_auto_increment() will tell us the interval of values it
1983
    reserved for us.
1984
1985
  - In both cases, for the following rows we use those reserved values without
1986
    calling the handler again (we just progress in the interval, computing
1987
    each new value from the previous one). Until we have exhausted them, then
1988
    we either take the next provided interval or call get_auto_increment()
1989
    again to reserve a new interval.
1990
1991
  - In both cases, the reserved intervals are remembered in
1992
    thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
1993
    binlogging; the last reserved interval is remembered in
1994
    auto_inc_interval_for_cur_row.
1995
1996
    The idea is that generated auto_increment values are predictable and
1997
    independent of the column values in the table.  This is needed to be
1998
    able to replicate into a table that already has rows with a higher
1999
    auto-increment value than the one that is inserted.
2000
2001
    After we have already generated an auto-increment number and the user
2002
    inserts a column with a higher value than the last used one, we will
2003
    start counting from the inserted value.
2004
2005
    This function's "outputs" are: the table's auto_increment field is filled
2006
    with a value, thd->next_insert_id is filled with the value to use for the
2007
    next row, if a value was autogenerated for the current row it is stored in
2008
    thd->insert_id_for_cur_row, if get_auto_increment() was called
2009
    thd->auto_inc_interval_for_cur_row is modified, if that interval is not
2010
    present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
2011
    this list.
2012
2013
  @todo
2014
    Replace all references to "next number" or NEXT_NUMBER to
2015
    "auto_increment", everywhere (see below: there is
2016
    table->auto_increment_field_not_null, and there also exists
2017
    table->next_number_field, it's not consistent).
2018
2019
  @retval
2020
    0	ok
2021
  @retval
2022
    HA_ERR_AUTOINC_READ_FAILED  get_auto_increment() was called and
2023
    returned ~(uint64_t) 0
2024
  @retval
2025
    HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
2026
    failure.
2027
*/
2028
2029
#define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
2030
#define AUTO_INC_DEFAULT_NB_MAX_BITS 16
2031
#define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
2032
2033
int handler::update_auto_increment()
2034
{
2035
  uint64_t nr, nb_reserved_values;
56 by brian
Next pass of true/false update.
2036
  bool append= false;
1 by brian
clean slate
2037
  THD *thd= table->in_use;
2038
  struct system_variables *variables= &thd->variables;
2039
2040
  /*
2041
    next_insert_id is a "cursor" into the reserved interval, it may go greater
2042
    than the interval, but not smaller.
2043
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2044
  assert(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
1 by brian
clean slate
2045
2046
  if (((nr= table->next_number_field->val_int()) != 0) || 
2047
      (table->auto_increment_field_not_null && (thd->variables.sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO)))
2048
  {
2049
    /*
2050
      Update next_insert_id if we had already generated a value in this
2051
      statement (case of INSERT VALUES(null),(3763),(null):
2052
      the last NULL needs to insert 3764, not the value of the first NULL plus
2053
      1).
2054
    */
2055
    adjust_next_insert_id_after_explicit_value(nr);
2056
    insert_id_for_cur_row= 0; // didn't generate anything
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2057
    return(0);
1 by brian
clean slate
2058
  }
2059
2060
  if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
2061
  {
2062
    /* next_insert_id is beyond what is reserved, so we reserve more. */
2063
    const Discrete_interval *forced=
2064
      thd->auto_inc_intervals_forced.get_next();
2065
    if (forced != NULL)
2066
    {
2067
      nr= forced->minimum();
2068
      nb_reserved_values= forced->values();
2069
    }
2070
    else
2071
    {
2072
      /*
2073
        handler::estimation_rows_to_insert was set by
2074
        handler::ha_start_bulk_insert(); if 0 it means "unknown".
2075
      */
2076
      uint nb_already_reserved_intervals=
2077
        thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements();
2078
      uint64_t nb_desired_values;
2079
      /*
2080
        If an estimation was given to the engine:
2081
        - use it.
2082
        - if we already reserved numbers, it means the estimation was
2083
        not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
2084
        time, twice that the 3rd time etc.
2085
        If no estimation was given, use those increasing defaults from the
2086
        start, starting from AUTO_INC_DEFAULT_NB_ROWS.
2087
        Don't go beyond a max to not reserve "way too much" (because
2088
        reservation means potentially losing unused values).
2089
      */
2090
      if (nb_already_reserved_intervals == 0 &&
2091
          (estimation_rows_to_insert > 0))
2092
        nb_desired_values= estimation_rows_to_insert;
2093
      else /* go with the increasing defaults */
2094
      {
2095
        /* avoid overflow in formula, with this if() */
2096
        if (nb_already_reserved_intervals <= AUTO_INC_DEFAULT_NB_MAX_BITS)
2097
        {
2098
          nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS * 
2099
            (1 << nb_already_reserved_intervals);
2100
          set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
2101
        }
2102
        else
2103
          nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
2104
      }
2105
      /* This call ignores all its parameters but nr, currently */
2106
      get_auto_increment(variables->auto_increment_offset,
2107
                         variables->auto_increment_increment,
2108
                         nb_desired_values, &nr,
2109
                         &nb_reserved_values);
2110
      if (nr == ~(uint64_t) 0)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2111
        return(HA_ERR_AUTOINC_READ_FAILED);  // Mark failure
1 by brian
clean slate
2112
      
2113
      /*
2114
        That rounding below should not be needed when all engines actually
2115
        respect offset and increment in get_auto_increment(). But they don't
2116
        so we still do it. Wonder if for the not-first-in-index we should do
2117
        it. Hope that this rounding didn't push us out of the interval; even
2118
        if it did we cannot do anything about it (calling the engine again
2119
        will not help as we inserted no row).
2120
      */
2121
      nr= compute_next_insert_id(nr-1, variables);
2122
    }
2123
    
2124
    if (table->s->next_number_keypart == 0)
2125
    {
2126
      /* We must defer the appending until "nr" has been possibly truncated */
56 by brian
Next pass of true/false update.
2127
      append= true;
1 by brian
clean slate
2128
    }
2129
  }
2130
152 by Brian Aker
longlong replacement
2131
  if (unlikely(table->next_number_field->store((int64_t) nr, true)))
1 by brian
clean slate
2132
  {
2133
    /*
2134
      first test if the query was aborted due to strict mode constraints
2135
    */
2136
    if (thd->killed == THD::KILL_BAD_DATA)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2137
      return(HA_ERR_AUTOINC_ERANGE);
1 by brian
clean slate
2138
2139
    /*
2140
      field refused this value (overflow) and truncated it, use the result of
2141
      the truncation (which is going to be inserted); however we try to
2142
      decrease it to honour auto_increment_* variables.
2143
      That will shift the left bound of the reserved interval, we don't
2144
      bother shifting the right bound (anyway any other value from this
2145
      interval will cause a duplicate key).
2146
    */
2147
    nr= prev_insert_id(table->next_number_field->val_int(), variables);
152 by Brian Aker
longlong replacement
2148
    if (unlikely(table->next_number_field->store((int64_t) nr, true)))
1 by brian
clean slate
2149
      nr= table->next_number_field->val_int();
2150
  }
2151
  if (append)
2152
  {
2153
    auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
2154
                                          variables->auto_increment_increment);
2155
    /* Row-based replication does not need to store intervals in binlog */
2156
    if (!thd->current_stmt_binlog_row_based)
2157
        thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
2158
                                                              auto_inc_interval_for_cur_row.values(),
2159
                                                              variables->auto_increment_increment);
2160
  }
2161
2162
  /*
2163
    Record this autogenerated value. If the caller then
2164
    succeeds to insert this value, it will call
2165
    record_first_successful_insert_id_in_cur_stmt()
2166
    which will set first_successful_insert_id_in_cur_stmt if it's not
2167
    already set.
2168
  */
2169
  insert_id_for_cur_row= nr;
2170
  /*
2171
    Set next insert id to point to next auto-increment value to be able to
2172
    handle multi-row statements.
2173
  */
2174
  set_next_insert_id(compute_next_insert_id(nr, variables));
2175
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2176
  return(0);
1 by brian
clean slate
2177
}
2178
2179
2180
/**
2181
  MySQL signal that it changed the column bitmap
2182
2183
  This is for handlers that needs to setup their own column bitmaps.
2184
  Normally the handler should set up their own column bitmaps in
2185
  index_init() or rnd_init() and in any column_bitmaps_signal() call after
2186
  this.
2187
2188
  The handler is allowed to do changes to the bitmap after a index_init or
2189
  rnd_init() call is made as after this, MySQL will not use the bitmap
2190
  for any program logic checking.
2191
*/
2192
void handler::column_bitmaps_signal()
2193
{
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2194
  return;
1 by brian
clean slate
2195
}
2196
2197
2198
/**
2199
  Reserves an interval of auto_increment values from the handler.
2200
2201
  offset and increment means that we want values to be of the form
2202
  offset + N * increment, where N>=0 is integer.
2203
  If the function sets *first_value to ~(uint64_t)0 it means an error.
163 by Brian Aker
Merge Monty's code.
2204
  If the function sets *nb_reserved_values to UINT64_MAX it means it has
1 by brian
clean slate
2205
  reserved to "positive infinite".
2206
2207
  @param offset
2208
  @param increment
2209
  @param nb_desired_values   how many values we want
2210
  @param first_value         (OUT) the first value reserved by the handler
2211
  @param nb_reserved_values  (OUT) how many values the handler reserved
2212
*/
77.1.15 by Monty Taylor
Bunch of warning cleanups.
2213
void handler::get_auto_increment(uint64_t offset __attribute__((__unused__)),
2214
                                 uint64_t increment __attribute__((__unused__)),
2215
                                 uint64_t nb_desired_values __attribute__((__unused__)),
1 by brian
clean slate
2216
                                 uint64_t *first_value,
2217
                                 uint64_t *nb_reserved_values)
2218
{
2219
  uint64_t nr;
2220
  int error;
2221
2222
  (void) extra(HA_EXTRA_KEYREAD);
2223
  table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
2224
                                        table->read_set);
2225
  column_bitmaps_signal();
2226
  index_init(table->s->next_number_index, 1);
2227
  if (table->s->next_number_keypart == 0)
2228
  {						// Autoincrement at key-start
2229
    error=index_last(table->record[1]);
2230
    /*
2231
      MySQL implicitely assumes such method does locking (as MySQL decides to
2232
      use nr+increment without checking again with the handler, in
2233
      handler::update_auto_increment()), so reserves to infinite.
2234
    */
163 by Brian Aker
Merge Monty's code.
2235
    *nb_reserved_values= UINT64_MAX;
1 by brian
clean slate
2236
  }
2237
  else
2238
  {
2239
    uchar key[MAX_KEY_LENGTH];
2240
    key_copy(key, table->record[0],
2241
             table->key_info + table->s->next_number_index,
2242
             table->s->next_number_key_offset);
2243
    error= index_read_map(table->record[1], key,
2244
                          make_prev_keypart_map(table->s->next_number_keypart),
2245
                          HA_READ_PREFIX_LAST);
2246
    /*
2247
      MySQL needs to call us for next row: assume we are inserting ("a",null)
2248
      here, we return 3, and next this statement will want to insert
2249
      ("b",null): there is no reason why ("b",3+1) would be the good row to
2250
      insert: maybe it already exists, maybe 3+1 is too large...
2251
    */
2252
    *nb_reserved_values= 1;
2253
  }
2254
2255
  if (error)
2256
    nr=1;
2257
  else
2258
    nr= ((uint64_t) table->next_number_field->
2259
         val_int_offset(table->s->rec_buff_length)+1);
2260
  index_end();
2261
  (void) extra(HA_EXTRA_NO_KEYREAD);
2262
  *first_value= nr;
2263
}
2264
2265
2266
void handler::ha_release_auto_increment()
2267
{
2268
  release_auto_increment();
2269
  insert_id_for_cur_row= 0;
2270
  auto_inc_interval_for_cur_row.replace(0, 0, 0);
2271
  if (next_insert_id > 0)
2272
  {
2273
    next_insert_id= 0;
2274
    /*
2275
      this statement used forced auto_increment values if there were some,
2276
      wipe them away for other statements.
2277
    */
2278
    table->in_use->auto_inc_intervals_forced.empty();
2279
  }
2280
}
2281
2282
2283
void handler::print_keydup_error(uint key_nr, const char *msg)
2284
{
2285
  /* Write the duplicated key in the error message */
2286
  char key[MAX_KEY_LENGTH];
2287
  String str(key,sizeof(key),system_charset_info);
2288
2289
  if (key_nr == MAX_KEY)
2290
  {
2291
    /* Key is unknown */
2292
    str.copy("", 0, system_charset_info);
2293
    my_printf_error(ER_DUP_ENTRY, msg, MYF(0), str.c_ptr(), "*UNKNOWN*");
2294
  }
2295
  else
2296
  {
2297
    /* Table is opened and defined at this point */
2298
    key_unpack(&str,table,(uint) key_nr);
2299
    uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
2300
    if (str.length() >= max_length)
2301
    {
2302
      str.length(max_length-4);
2303
      str.append(STRING_WITH_LEN("..."));
2304
    }
2305
    my_printf_error(ER_DUP_ENTRY, msg,
2306
		    MYF(0), str.c_ptr(), table->key_info[key_nr].name);
2307
  }
2308
}
2309
2310
2311
/**
2312
  Print error that we got from handler function.
2313
2314
  @note
2315
    In case of delete table it's only safe to use the following parts of
2316
    the 'table' structure:
2317
    - table->s->path
2318
    - table->alias
2319
*/
2320
void handler::print_error(int error, myf errflag)
2321
{
2322
  int textno=ER_GET_ERRNO;
2323
  switch (error) {
2324
  case EACCES:
2325
    textno=ER_OPEN_AS_READONLY;
2326
    break;
2327
  case EAGAIN:
2328
    textno=ER_FILE_USED;
2329
    break;
2330
  case ENOENT:
2331
    textno=ER_FILE_NOT_FOUND;
2332
    break;
2333
  case HA_ERR_KEY_NOT_FOUND:
2334
  case HA_ERR_NO_ACTIVE_RECORD:
2335
  case HA_ERR_END_OF_FILE:
2336
    textno=ER_KEY_NOT_FOUND;
2337
    break;
2338
  case HA_ERR_WRONG_MRG_TABLE_DEF:
2339
    textno=ER_WRONG_MRG_TABLE;
2340
    break;
2341
  case HA_ERR_FOUND_DUPP_KEY:
2342
  {
2343
    uint key_nr=get_dup_key(error);
2344
    if ((int) key_nr >= 0)
2345
    {
2346
      print_keydup_error(key_nr, ER(ER_DUP_ENTRY_WITH_KEY_NAME));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2347
      return;
1 by brian
clean slate
2348
    }
2349
    textno=ER_DUP_KEY;
2350
    break;
2351
  }
2352
  case HA_ERR_FOREIGN_DUPLICATE_KEY:
2353
  {
2354
    uint key_nr= get_dup_key(error);
2355
    if ((int) key_nr >= 0)
2356
    {
2357
      uint max_length;
2358
      /* Write the key in the error message */
2359
      char key[MAX_KEY_LENGTH];
2360
      String str(key,sizeof(key),system_charset_info);
2361
      /* Table is opened and defined at this point */
2362
      key_unpack(&str,table,(uint) key_nr);
2363
      max_length= (MYSQL_ERRMSG_SIZE-
2364
                   (uint) strlen(ER(ER_FOREIGN_DUPLICATE_KEY)));
2365
      if (str.length() >= max_length)
2366
      {
2367
        str.length(max_length-4);
2368
        str.append(STRING_WITH_LEN("..."));
2369
      }
2370
      my_error(ER_FOREIGN_DUPLICATE_KEY, MYF(0), table_share->table_name.str,
2371
        str.c_ptr(), key_nr+1);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2372
      return;
1 by brian
clean slate
2373
    }
2374
    textno= ER_DUP_KEY;
2375
    break;
2376
  }
2377
  case HA_ERR_FOUND_DUPP_UNIQUE:
2378
    textno=ER_DUP_UNIQUE;
2379
    break;
2380
  case HA_ERR_RECORD_CHANGED:
2381
    textno=ER_CHECKREAD;
2382
    break;
2383
  case HA_ERR_CRASHED:
2384
    textno=ER_NOT_KEYFILE;
2385
    break;
2386
  case HA_ERR_WRONG_IN_RECORD:
2387
    textno= ER_CRASHED_ON_USAGE;
2388
    break;
2389
  case HA_ERR_CRASHED_ON_USAGE:
2390
    textno=ER_CRASHED_ON_USAGE;
2391
    break;
2392
  case HA_ERR_NOT_A_TABLE:
2393
    textno= error;
2394
    break;
2395
  case HA_ERR_CRASHED_ON_REPAIR:
2396
    textno=ER_CRASHED_ON_REPAIR;
2397
    break;
2398
  case HA_ERR_OUT_OF_MEM:
2399
    textno=ER_OUT_OF_RESOURCES;
2400
    break;
2401
  case HA_ERR_WRONG_COMMAND:
2402
    textno=ER_ILLEGAL_HA;
2403
    break;
2404
  case HA_ERR_OLD_FILE:
2405
    textno=ER_OLD_KEYFILE;
2406
    break;
2407
  case HA_ERR_UNSUPPORTED:
2408
    textno=ER_UNSUPPORTED_EXTENSION;
2409
    break;
2410
  case HA_ERR_RECORD_FILE_FULL:
2411
  case HA_ERR_INDEX_FILE_FULL:
2412
    textno=ER_RECORD_FILE_FULL;
2413
    break;
2414
  case HA_ERR_LOCK_WAIT_TIMEOUT:
2415
    textno=ER_LOCK_WAIT_TIMEOUT;
2416
    break;
2417
  case HA_ERR_LOCK_TABLE_FULL:
2418
    textno=ER_LOCK_TABLE_FULL;
2419
    break;
2420
  case HA_ERR_LOCK_DEADLOCK:
2421
    textno=ER_LOCK_DEADLOCK;
2422
    break;
2423
  case HA_ERR_READ_ONLY_TRANSACTION:
2424
    textno=ER_READ_ONLY_TRANSACTION;
2425
    break;
2426
  case HA_ERR_CANNOT_ADD_FOREIGN:
2427
    textno=ER_CANNOT_ADD_FOREIGN;
2428
    break;
2429
  case HA_ERR_ROW_IS_REFERENCED:
2430
  {
2431
    String str;
2432
    get_error_message(error, &str);
2433
    my_error(ER_ROW_IS_REFERENCED_2, MYF(0), str.c_ptr_safe());
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2434
    return;
1 by brian
clean slate
2435
  }
2436
  case HA_ERR_NO_REFERENCED_ROW:
2437
  {
2438
    String str;
2439
    get_error_message(error, &str);
2440
    my_error(ER_NO_REFERENCED_ROW_2, MYF(0), str.c_ptr_safe());
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2441
    return;
1 by brian
clean slate
2442
  }
2443
  case HA_ERR_TABLE_DEF_CHANGED:
2444
    textno=ER_TABLE_DEF_CHANGED;
2445
    break;
2446
  case HA_ERR_NO_SUCH_TABLE:
2447
    my_error(ER_NO_SUCH_TABLE, MYF(0), table_share->db.str,
2448
             table_share->table_name.str);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2449
    return;
1 by brian
clean slate
2450
  case HA_ERR_RBR_LOGGING_FAILED:
2451
    textno= ER_BINLOG_ROW_LOGGING_FAILED;
2452
    break;
2453
  case HA_ERR_DROP_INDEX_FK:
2454
  {
2455
    const char *ptr= "???";
2456
    uint key_nr= get_dup_key(error);
2457
    if ((int) key_nr >= 0)
2458
      ptr= table->key_info[key_nr].name;
2459
    my_error(ER_DROP_INDEX_FK, MYF(0), ptr);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2460
    return;
1 by brian
clean slate
2461
  }
2462
  case HA_ERR_TABLE_NEEDS_UPGRADE:
2463
    textno=ER_TABLE_NEEDS_UPGRADE;
2464
    break;
2465
  case HA_ERR_TABLE_READONLY:
2466
    textno= ER_OPEN_AS_READONLY;
2467
    break;
2468
  case HA_ERR_AUTOINC_READ_FAILED:
2469
    textno= ER_AUTOINC_READ_FAILED;
2470
    break;
2471
  case HA_ERR_AUTOINC_ERANGE:
2472
    textno= ER_WARN_DATA_OUT_OF_RANGE;
2473
    break;
2474
  case HA_ERR_LOCK_OR_ACTIVE_TRANSACTION:
2475
    my_message(ER_LOCK_OR_ACTIVE_TRANSACTION,
2476
               ER(ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2477
    return;
1 by brian
clean slate
2478
    break;
2479
  default:
2480
    {
2481
      /* The error was "unknown" to this function.
2482
	 Ask handler if it has got a message for this error */
56 by brian
Next pass of true/false update.
2483
      bool temporary= false;
1 by brian
clean slate
2484
      String str;
2485
      temporary= get_error_message(error, &str);
2486
      if (!str.is_empty())
2487
      {
2488
	const char* engine= table_type();
2489
	if (temporary)
2490
	  my_error(ER_GET_TEMPORARY_ERRMSG, MYF(0), error, str.ptr(), engine);
2491
	else
2492
	  my_error(ER_GET_ERRMSG, MYF(0), error, str.ptr(), engine);
2493
      }
2494
      else
2495
	my_error(ER_GET_ERRNO,errflag,error);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2496
      return;
1 by brian
clean slate
2497
    }
2498
  }
2499
  my_error(textno, errflag, table_share->table_name.str, error);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2500
  return;
1 by brian
clean slate
2501
}
2502
2503
2504
/**
2505
  Return an error message specific to this handler.
2506
2507
  @param error  error code previously returned by handler
2508
  @param buf    pointer to String where to add error message
2509
2510
  @return
2511
    Returns true if this is a temporary error
2512
*/
77.1.15 by Monty Taylor
Bunch of warning cleanups.
2513
bool handler::get_error_message(int error __attribute__((__unused__)),
2514
                                String* buf __attribute__((__unused__)))
1 by brian
clean slate
2515
{
56 by brian
Next pass of true/false update.
2516
  return false;
1 by brian
clean slate
2517
}
2518
2519
2520
int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
2521
{
2522
  KEY *keyinfo, *keyend;
2523
  KEY_PART_INFO *keypart, *keypartend;
2524
2525
  if (!table->s->mysql_version)
2526
  {
2527
    /* check for blob-in-key error */
2528
    keyinfo= table->key_info;
2529
    keyend= table->key_info + table->s->keys;
2530
    for (; keyinfo < keyend; keyinfo++)
2531
    {
2532
      keypart= keyinfo->key_part;
2533
      keypartend= keypart + keyinfo->key_parts;
2534
      for (; keypart < keypartend; keypart++)
2535
      {
2536
        if (!keypart->fieldnr)
2537
          continue;
2538
        Field *field= table->field[keypart->fieldnr-1];
2539
        if (field->type() == MYSQL_TYPE_BLOB)
2540
        {
2541
          if (check_opt->sql_flags & TT_FOR_UPGRADE)
2542
            check_opt->flags= T_MEDIUM;
2543
          return HA_ADMIN_NEEDS_CHECK;
2544
        }
2545
      }
2546
    }
2547
  }
2548
  return check_for_upgrade(check_opt);
2549
}
2550
2551
2552
/* Code left, but Drizzle has no legacy yet (while MySQL did) */
2553
int handler::check_old_types()
2554
{
2555
  return 0;
2556
}
2557
2558
2559
static bool update_frm_version(TABLE *table)
2560
{
2561
  char path[FN_REFLEN];
2562
  File file;
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2563
  bool result= true;
1 by brian
clean slate
2564
2565
  /*
2566
    No need to update frm version in case table was created or checked
2567
    by server with the same version. This also ensures that we do not
2568
    update frm version for temporary tables as this code doesn't support
2569
    temporary tables.
2570
  */
2571
  if (table->s->mysql_version == MYSQL_VERSION_ID)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2572
    return(0);
1 by brian
clean slate
2573
2574
  strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
2575
2576
  if ((file= my_open(path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
2577
  {
2578
    uchar version[4];
2579
    char *key= table->s->table_cache_key.str;
2580
    uint key_length= table->s->table_cache_key.length;
2581
    TABLE *entry;
2582
    HASH_SEARCH_STATE state;
2583
2584
    int4store(version, MYSQL_VERSION_ID);
2585
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2586
    if (pwrite(file, (uchar*)version, 4, 51L) == 0)
2587
    {
2588
      result= false;
1 by brian
clean slate
2589
      goto err;
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2590
    }
1 by brian
clean slate
2591
2592
    for (entry=(TABLE*) hash_first(&open_cache,(uchar*) key,key_length, &state);
2593
         entry;
2594
         entry= (TABLE*) hash_next(&open_cache,(uchar*) key,key_length, &state))
2595
      entry->s->mysql_version= MYSQL_VERSION_ID;
2596
  }
2597
err:
2598
  if (file >= 0)
2599
    VOID(my_close(file,MYF(MY_WME)));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2600
  return(result);
1 by brian
clean slate
2601
}
2602
2603
2604
2605
/**
2606
  @return
2607
    key if error because of duplicated keys
2608
*/
2609
uint handler::get_dup_key(int error)
2610
{
2611
  table->file->errkey  = (uint) -1;
2612
  if (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOREIGN_DUPLICATE_KEY ||
2613
      error == HA_ERR_FOUND_DUPP_UNIQUE ||
2614
      error == HA_ERR_DROP_INDEX_FK)
2615
    info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2616
  return(table->file->errkey);
1 by brian
clean slate
2617
}
2618
2619
2620
/**
2621
  Delete all files with extension from bas_ext().
2622
2623
  @param name		Base name of table
2624
2625
  @note
2626
    We assume that the handler may return more extensions than
2627
    was actually used for the file.
2628
2629
  @retval
2630
    0   If we successfully deleted at least one file from base_ext and
2631
    didn't get any other errors than ENOENT
2632
  @retval
2633
    !0  Error
2634
*/
2635
int handler::delete_table(const char *name)
2636
{
2637
  int error= 0;
2638
  int enoent_or_zero= ENOENT;                   // Error if no file was deleted
2639
  char buff[FN_REFLEN];
2640
2641
  for (const char **ext=bas_ext(); *ext ; ext++)
2642
  {
2643
    fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT);
2644
    if (my_delete_with_symlink(buff, MYF(0)))
2645
    {
2646
      if ((error= my_errno) != ENOENT)
2647
	break;
2648
    }
2649
    else
2650
      enoent_or_zero= 0;                        // No error for ENOENT
2651
    error= enoent_or_zero;
2652
  }
2653
  return error;
2654
}
2655
2656
2657
int handler::rename_table(const char * from, const char * to)
2658
{
2659
  int error= 0;
2660
  for (const char **ext= bas_ext(); *ext ; ext++)
2661
  {
2662
    if (rename_file_ext(from, to, *ext))
2663
    {
2664
      if ((error=my_errno) != ENOENT)
2665
	break;
2666
      error= 0;
2667
    }
2668
  }
2669
  return error;
2670
}
2671
2672
2673
void handler::drop_table(const char *name)
2674
{
2675
  close();
2676
  delete_table(name);
2677
}
2678
2679
2680
/**
2681
  Performs checks upon the table.
2682
2683
  @param thd                thread doing CHECK TABLE operation
2684
  @param check_opt          options from the parser
2685
2686
  @retval
2687
    HA_ADMIN_OK               Successful upgrade
2688
  @retval
2689
    HA_ADMIN_NEEDS_UPGRADE    Table has structures requiring upgrade
2690
  @retval
2691
    HA_ADMIN_NEEDS_ALTER      Table has structures requiring ALTER TABLE
2692
  @retval
2693
    HA_ADMIN_NOT_IMPLEMENTED
2694
*/
2695
int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
2696
{
2697
  int error;
2698
2699
  if ((table->s->mysql_version >= MYSQL_VERSION_ID) &&
2700
      (check_opt->sql_flags & TT_FOR_UPGRADE))
2701
    return 0;
2702
2703
  if (table->s->mysql_version < MYSQL_VERSION_ID)
2704
  {
2705
    if ((error= check_old_types()))
2706
      return error;
2707
    error= ha_check_for_upgrade(check_opt);
2708
    if (error && (error != HA_ADMIN_NEEDS_CHECK))
2709
      return error;
2710
    if (!error && (check_opt->sql_flags & TT_FOR_UPGRADE))
2711
      return 0;
2712
  }
2713
  if ((error= check(thd, check_opt)))
2714
    return error;
2715
  return update_frm_version(table);
2716
}
2717
2718
/**
2719
  A helper function to mark a transaction read-write,
2720
  if it is started.
2721
*/
2722
2723
inline
2724
void
2725
handler::mark_trx_read_write()
2726
{
2727
  Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
2728
  /*
2729
    When a storage engine method is called, the transaction must
2730
    have been started, unless it's a DDL call, for which the
2731
    storage engine starts the transaction internally, and commits
2732
    it internally, without registering in the ha_list.
2733
    Unfortunately here we can't know know for sure if the engine
2734
    has registered the transaction or not, so we must check.
2735
  */
2736
  if (ha_info->is_started())
2737
  {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2738
    assert(has_transactions());
1 by brian
clean slate
2739
    /*
2740
      table_share can be NULL in ha_delete_table(). See implementation
2741
      of standalone function ha_delete_table() in sql_base.cc.
2742
    */
2743
    if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
2744
      ha_info->set_trx_read_write();
2745
  }
2746
}
2747
2748
2749
/**
2750
  Repair table: public interface.
2751
2752
  @sa handler::repair()
2753
*/
2754
2755
int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
2756
{
2757
  int result;
2758
2759
  mark_trx_read_write();
2760
2761
  if ((result= repair(thd, check_opt)))
2762
    return result;
2763
  return update_frm_version(table);
2764
}
2765
2766
2767
/**
2768
  Bulk update row: public interface.
2769
2770
  @sa handler::bulk_update_row()
2771
*/
2772
2773
int
2774
handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
2775
                            uint *dup_key_found)
2776
{
2777
  mark_trx_read_write();
2778
2779
  return bulk_update_row(old_data, new_data, dup_key_found);
2780
}
2781
2782
2783
/**
2784
  Delete all rows: public interface.
2785
2786
  @sa handler::delete_all_rows()
2787
*/
2788
2789
int
2790
handler::ha_delete_all_rows()
2791
{
2792
  mark_trx_read_write();
2793
2794
  return delete_all_rows();
2795
}
2796
2797
2798
/**
2799
  Reset auto increment: public interface.
2800
2801
  @sa handler::reset_auto_increment()
2802
*/
2803
2804
int
2805
handler::ha_reset_auto_increment(uint64_t value)
2806
{
2807
  mark_trx_read_write();
2808
2809
  return reset_auto_increment(value);
2810
}
2811
2812
2813
/**
2814
  Optimize table: public interface.
2815
2816
  @sa handler::optimize()
2817
*/
2818
2819
int
2820
handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
2821
{
2822
  mark_trx_read_write();
2823
2824
  return optimize(thd, check_opt);
2825
}
2826
2827
2828
/**
2829
  Analyze table: public interface.
2830
2831
  @sa handler::analyze()
2832
*/
2833
2834
int
2835
handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
2836
{
2837
  mark_trx_read_write();
2838
2839
  return analyze(thd, check_opt);
2840
}
2841
2842
2843
/**
2844
  Check and repair table: public interface.
2845
2846
  @sa handler::check_and_repair()
2847
*/
2848
2849
bool
2850
handler::ha_check_and_repair(THD *thd)
2851
{
2852
  mark_trx_read_write();
2853
2854
  return check_and_repair(thd);
2855
}
2856
2857
2858
/**
2859
  Disable indexes: public interface.
2860
2861
  @sa handler::disable_indexes()
2862
*/
2863
2864
int
2865
handler::ha_disable_indexes(uint mode)
2866
{
2867
  mark_trx_read_write();
2868
2869
  return disable_indexes(mode);
2870
}
2871
2872
2873
/**
2874
  Enable indexes: public interface.
2875
2876
  @sa handler::enable_indexes()
2877
*/
2878
2879
int
2880
handler::ha_enable_indexes(uint mode)
2881
{
2882
  mark_trx_read_write();
2883
2884
  return enable_indexes(mode);
2885
}
2886
2887
2888
/**
2889
  Discard or import tablespace: public interface.
2890
2891
  @sa handler::discard_or_import_tablespace()
2892
*/
2893
2894
int
2895
handler::ha_discard_or_import_tablespace(my_bool discard)
2896
{
2897
  mark_trx_read_write();
2898
2899
  return discard_or_import_tablespace(discard);
2900
}
2901
2902
2903
/**
2904
  Prepare for alter: public interface.
2905
2906
  Called to prepare an *online* ALTER.
2907
2908
  @sa handler::prepare_for_alter()
2909
*/
2910
2911
void
2912
handler::ha_prepare_for_alter()
2913
{
2914
  mark_trx_read_write();
2915
2916
  prepare_for_alter();
2917
}
2918
2919
2920
/**
2921
  Rename table: public interface.
2922
2923
  @sa handler::rename_table()
2924
*/
2925
2926
int
2927
handler::ha_rename_table(const char *from, const char *to)
2928
{
2929
  mark_trx_read_write();
2930
2931
  return rename_table(from, to);
2932
}
2933
2934
2935
/**
2936
  Delete table: public interface.
2937
2938
  @sa handler::delete_table()
2939
*/
2940
2941
int
2942
handler::ha_delete_table(const char *name)
2943
{
2944
  mark_trx_read_write();
2945
2946
  return delete_table(name);
2947
}
2948
2949
2950
/**
2951
  Drop table in the engine: public interface.
2952
2953
  @sa handler::drop_table()
2954
*/
2955
2956
void
2957
handler::ha_drop_table(const char *name)
2958
{
2959
  mark_trx_read_write();
2960
2961
  return drop_table(name);
2962
}
2963
2964
2965
/**
2966
  Create a table in the engine: public interface.
2967
2968
  @sa handler::create()
2969
*/
2970
2971
int
2972
handler::ha_create(const char *name, TABLE *form, HA_CREATE_INFO *info)
2973
{
2974
  mark_trx_read_write();
2975
2976
  return create(name, form, info);
2977
}
2978
2979
2980
/**
2981
  Create handler files for CREATE TABLE: public interface.
2982
2983
  @sa handler::create_handler_files()
2984
*/
2985
2986
int
2987
handler::ha_create_handler_files(const char *name, const char *old_name,
2988
                        int action_flag, HA_CREATE_INFO *info)
2989
{
2990
  mark_trx_read_write();
2991
2992
  return create_handler_files(name, old_name, action_flag, info);
2993
}
2994
2995
2996
/**
2997
  Tell the storage engine that it is allowed to "disable transaction" in the
2998
  handler. It is a hint that ACID is not required - it is used in NDB for
2999
  ALTER TABLE, for example, when data are copied to temporary table.
3000
  A storage engine may treat this hint any way it likes. NDB for example
3001
  starts to commit every now and then automatically.
3002
  This hint can be safely ignored.
3003
*/
3004
int ha_enable_transaction(THD *thd, bool on)
3005
{
3006
  int error=0;
3007
3008
  if ((thd->transaction.on= on))
3009
  {
3010
    /*
3011
      Now all storage engines should have transaction handling enabled.
3012
      But some may have it enabled all the time - "disabling" transactions
3013
      is an optimization hint that storage engine is free to ignore.
3014
      So, let's commit an open transaction (if any) now.
3015
    */
3016
    if (!(error= ha_commit_trans(thd, 0)))
3017
      error= end_trans(thd, COMMIT);
3018
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3019
  return(error);
1 by brian
clean slate
3020
}
3021
3022
int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
3023
{
3024
  int error;
3025
  if (!(error=index_next(buf)))
3026
  {
3027
    my_ptrdiff_t ptrdiff= buf - table->record[0];
3028
    uchar *save_record_0= NULL;
3029
    KEY *key_info= NULL;
3030
    KEY_PART_INFO *key_part;
3031
    KEY_PART_INFO *key_part_end= NULL;
3032
3033
    /*
3034
      key_cmp_if_same() compares table->record[0] against 'key'.
3035
      In parts it uses table->record[0] directly, in parts it uses
3036
      field objects with their local pointers into table->record[0].
3037
      If 'buf' is distinct from table->record[0], we need to move
3038
      all record references. This is table->record[0] itself and
3039
      the field pointers of the fields used in this key.
3040
    */
3041
    if (ptrdiff)
3042
    {
3043
      save_record_0= table->record[0];
3044
      table->record[0]= buf;
3045
      key_info= table->key_info + active_index;
3046
      key_part= key_info->key_part;
3047
      key_part_end= key_part + key_info->key_parts;
3048
      for (; key_part < key_part_end; key_part++)
3049
      {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3050
        assert(key_part->field);
1 by brian
clean slate
3051
        key_part->field->move_field_offset(ptrdiff);
3052
      }
3053
    }
3054
3055
    if (key_cmp_if_same(table, key, active_index, keylen))
3056
    {
3057
      table->status=STATUS_NOT_FOUND;
3058
      error=HA_ERR_END_OF_FILE;
3059
    }
3060
3061
    /* Move back if necessary. */
3062
    if (ptrdiff)
3063
    {
3064
      table->record[0]= save_record_0;
3065
      for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
3066
        key_part->field->move_field_offset(-ptrdiff);
3067
    }
3068
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3069
  return(error);
1 by brian
clean slate
3070
}
3071
3072
3073
/****************************************************************************
3074
** Some general functions that isn't in the handler class
3075
****************************************************************************/
3076
3077
/**
3078
  Initiates table-file and calls appropriate database-creator.
3079
3080
  @retval
3081
   0  ok
3082
  @retval
3083
   1  error
3084
*/
3085
int ha_create_table(THD *thd, const char *path,
3086
                    const char *db, const char *table_name,
3087
                    HA_CREATE_INFO *create_info,
3088
		    bool update_create_info)
3089
{
3090
  int error= 1;
3091
  TABLE table;
3092
  char name_buff[FN_REFLEN];
3093
  const char *name;
3094
  TABLE_SHARE share;
3095
  
3096
  init_tmp_table_share(thd, &share, db, 0, table_name, path);
3097
  if (open_table_def(thd, &share, 0) ||
3098
      open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
3099
                            OTM_CREATE))
3100
    goto err;
3101
3102
  if (update_create_info)
3103
    update_create_info_from_table(create_info, &table);
3104
3105
  name= check_lowercase_names(table.file, share.path.str, name_buff);
3106
3107
  error= table.file->ha_create(name, &table, create_info);
3108
  VOID(closefrm(&table, 0));
3109
  if (error)
3110
  {
3111
    strxmov(name_buff, db, ".", table_name, NullS);
3112
    my_error(ER_CANT_CREATE_TABLE, MYF(ME_BELL+ME_WAITTANG), name_buff, error);
3113
  }
3114
err:
3115
  free_table_share(&share);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3116
  return(error != 0);
1 by brian
clean slate
3117
}
3118
3119
/**
3120
  Try to discover table from engine.
3121
3122
  @note
3123
    If found, write the frm file to disk.
3124
3125
  @retval
3126
  -1    Table did not exists
3127
  @retval
3128
   0    Table created ok
3129
  @retval
3130
   > 0  Error, table existed but could not be created
3131
*/
3132
int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
3133
{
3134
  int error;
3135
  uchar *frmblob;
3136
  size_t frmlen;
3137
  char path[FN_REFLEN];
3138
  HA_CREATE_INFO create_info;
3139
  TABLE table;
3140
  TABLE_SHARE share;
3141
3142
  bzero((uchar*) &create_info,sizeof(create_info));
3143
  if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
3144
  {
3145
    /* Table could not be discovered and thus not created */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3146
    return(error);
1 by brian
clean slate
3147
  }
3148
3149
  /*
3150
    Table exists in handler and could be discovered
3151
    frmblob and frmlen are set, write the frm to disk
3152
  */
3153
3154
  build_table_filename(path, FN_REFLEN-1, db, name, "", 0);
3155
  // Save the frm file
3156
  error= writefrm(path, frmblob, frmlen);
3157
  my_free(frmblob, MYF(0));
3158
  if (error)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3159
    return(2);
1 by brian
clean slate
3160
3161
  init_tmp_table_share(thd, &share, db, 0, name, path);
3162
  if (open_table_def(thd, &share, 0))
3163
  {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3164
    return(3);
1 by brian
clean slate
3165
  }
3166
  if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, OTM_OPEN))
3167
  {
3168
    free_table_share(&share);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3169
    return(3);
1 by brian
clean slate
3170
  }
3171
3172
  update_create_info_from_table(&create_info, &table);
3173
  create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
3174
3175
  check_lowercase_names(table.file, path, path);
3176
  error=table.file->ha_create(path, &table, &create_info);
3177
  VOID(closefrm(&table, 1));
3178
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3179
  return(error != 0);
1 by brian
clean slate
3180
}
3181
3182
void st_ha_check_opt::init()
3183
{
3184
  flags= sql_flags= 0;
3185
  sort_buffer_size = current_thd->variables.myisam_sort_buff_size;
3186
}
3187
3188
3189
/*****************************************************************************
3190
  Key cache handling.
3191
3192
  This code is only relevant for ISAM/MyISAM tables
3193
3194
  key_cache->cache may be 0 only in the case where a key cache is not
3195
  initialized or when we where not able to init the key cache in a previous
3196
  call to ha_init_key_cache() (probably out of memory)
3197
*****************************************************************************/
3198
3199
/**
3200
  Init a key cache if it has not been initied before.
3201
*/
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3202
int ha_init_key_cache(const char *name __attribute__((__unused__)),
3203
                      KEY_CACHE *key_cache)
1 by brian
clean slate
3204
{
3205
  if (!key_cache->key_cache_inited)
3206
  {
3207
    pthread_mutex_lock(&LOCK_global_system_variables);
61 by Brian Aker
Conversion of handler type.
3208
    uint32_t tmp_buff_size= (uint32_t) key_cache->param_buff_size;
1 by brian
clean slate
3209
    uint tmp_block_size= (uint) key_cache->param_block_size;
3210
    uint division_limit= key_cache->param_division_limit;
3211
    uint age_threshold=  key_cache->param_age_threshold;
3212
    pthread_mutex_unlock(&LOCK_global_system_variables);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3213
    return(!init_key_cache(key_cache,
1 by brian
clean slate
3214
				tmp_block_size,
3215
				tmp_buff_size,
3216
				division_limit, age_threshold));
3217
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3218
  return(0);
1 by brian
clean slate
3219
}
3220
3221
3222
/**
3223
  Resize key cache.
3224
*/
3225
int ha_resize_key_cache(KEY_CACHE *key_cache)
3226
{
3227
  if (key_cache->key_cache_inited)
3228
  {
3229
    pthread_mutex_lock(&LOCK_global_system_variables);
3230
    long tmp_buff_size= (long) key_cache->param_buff_size;
3231
    long tmp_block_size= (long) key_cache->param_block_size;
3232
    uint division_limit= key_cache->param_division_limit;
3233
    uint age_threshold=  key_cache->param_age_threshold;
3234
    pthread_mutex_unlock(&LOCK_global_system_variables);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3235
    return(!resize_key_cache(key_cache, tmp_block_size,
1 by brian
clean slate
3236
				  tmp_buff_size,
3237
				  division_limit, age_threshold));
3238
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3239
  return(0);
1 by brian
clean slate
3240
}
3241
3242
3243
/**
3244
  Change parameters for key cache (like size)
3245
*/
3246
int ha_change_key_cache_param(KEY_CACHE *key_cache)
3247
{
3248
  if (key_cache->key_cache_inited)
3249
  {
3250
    pthread_mutex_lock(&LOCK_global_system_variables);
3251
    uint division_limit= key_cache->param_division_limit;
3252
    uint age_threshold=  key_cache->param_age_threshold;
3253
    pthread_mutex_unlock(&LOCK_global_system_variables);
3254
    change_key_cache_param(key_cache, division_limit, age_threshold);
3255
  }
3256
  return 0;
3257
}
3258
3259
/**
3260
  Free memory allocated by a key cache.
3261
*/
3262
int ha_end_key_cache(KEY_CACHE *key_cache)
3263
{
3264
  end_key_cache(key_cache, 1);		// Can never fail
3265
  return 0;
3266
}
3267
3268
/**
3269
  Move all tables from one key cache to another one.
3270
*/
3271
int ha_change_key_cache(KEY_CACHE *old_key_cache,
3272
			KEY_CACHE *new_key_cache)
3273
{
3274
  mi_change_key_cache(old_key_cache, new_key_cache);
3275
  return 0;
3276
}
3277
3278
3279
/**
3280
  Try to discover one table from handler(s).
3281
3282
  @retval
3283
    -1   Table did not exists
3284
  @retval
3285
    0   OK. In this case *frmblob and *frmlen are set
3286
  @retval
3287
    >0   error.  frmblob and frmlen may not be set
3288
*/
3289
struct st_discover_args
3290
{
3291
  const char *db;
3292
  const char *name;
3293
  uchar **frmblob; 
3294
  size_t *frmlen;
3295
};
3296
149 by Brian Aker
More bool conversion.
3297
static bool discover_handlerton(THD *thd, plugin_ref plugin,
3298
                                void *arg)
1 by brian
clean slate
3299
{
3300
  st_discover_args *vargs= (st_discover_args *)arg;
3301
  handlerton *hton= plugin_data(plugin, handlerton *);
3302
  if (hton->state == SHOW_OPTION_YES && hton->discover &&
3303
      (!(hton->discover(hton, thd, vargs->db, vargs->name, 
3304
                        vargs->frmblob, 
3305
                        vargs->frmlen))))
56 by brian
Next pass of true/false update.
3306
    return true;
1 by brian
clean slate
3307
56 by brian
Next pass of true/false update.
3308
  return false;
1 by brian
clean slate
3309
}
3310
3311
int ha_discover(THD *thd, const char *db, const char *name,
3312
		uchar **frmblob, size_t *frmlen)
3313
{
3314
  int error= -1; // Table does not exist in any handler
3315
  st_discover_args args= {db, name, frmblob, frmlen};
3316
3317
  if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3318
    return(error);
1 by brian
clean slate
3319
3320
  if (plugin_foreach(thd, discover_handlerton,
3321
                 MYSQL_STORAGE_ENGINE_PLUGIN, &args))
3322
    error= 0;
3323
3324
  if (!error)
3325
    status_var_increment(thd->status_var.ha_discover_count);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3326
  return(error);
1 by brian
clean slate
3327
}
3328
3329
3330
/**
3331
  Call this function in order to give the handler the possiblity
3332
  to ask engine if there are any new tables that should be written to disk
3333
  or any dropped tables that need to be removed from disk
3334
*/
3335
struct st_find_files_args
3336
{
3337
  const char *db;
3338
  const char *path;
3339
  const char *wild;
3340
  bool dir;
3341
  List<LEX_STRING> *files;
3342
};
3343
3344
/**
3345
  Ask handler if the table exists in engine.
3346
  @retval
3347
    HA_ERR_NO_SUCH_TABLE     Table does not exist
3348
  @retval
3349
    HA_ERR_TABLE_EXIST       Table exists
3350
  @retval
3351
    \#                  Error code
3352
*/
3353
struct st_table_exists_in_engine_args
3354
{
3355
  const char *db;
3356
  const char *name;
3357
  int err;
3358
};
3359
149 by Brian Aker
More bool conversion.
3360
static bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
3361
                                              void *arg)
1 by brian
clean slate
3362
{
3363
  st_table_exists_in_engine_args *vargs= (st_table_exists_in_engine_args *)arg;
3364
  handlerton *hton= plugin_data(plugin, handlerton *);
3365
3366
  int err= HA_ERR_NO_SUCH_TABLE;
3367
3368
  if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
3369
    err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
3370
3371
  vargs->err = err;
3372
  if (vargs->err == HA_ERR_TABLE_EXIST)
56 by brian
Next pass of true/false update.
3373
    return true;
1 by brian
clean slate
3374
56 by brian
Next pass of true/false update.
3375
  return false;
1 by brian
clean slate
3376
}
3377
3378
int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
3379
{
3380
  st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
3381
  plugin_foreach(thd, table_exists_in_engine_handlerton,
3382
                 MYSQL_STORAGE_ENGINE_PLUGIN, &args);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3383
  return(args.err);
1 by brian
clean slate
3384
}
3385
3386
/**
3387
  Calculate cost of 'index only' scan for given index and number of records
3388
3389
  @param keynr    Index number
3390
  @param records  Estimated number of records to be retrieved
3391
3392
  @note
3393
    It is assumed that we will read trough the whole key range and that all
3394
    key blocks are half full (normally things are much better). It is also
3395
    assumed that each time we read the next key from the index, the handler
3396
    performs a random seek, thus the cost is proportional to the number of
3397
    blocks read.
3398
3399
  @todo
3400
    Consider joining this function and handler::read_time() into one
3401
    handler::read_time(keynr, records, ranges, bool index_only) function.
3402
3403
  @return
3404
    Estimated cost of 'index only' scan
3405
*/
3406
3407
double handler::index_only_read_time(uint keynr, double records)
3408
{
3409
  double read_time;
3410
  uint keys_per_block= (stats.block_size/2/
3411
			(table->key_info[keynr].key_length + ref_length) + 1);
3412
  read_time=((double) (records + keys_per_block-1) /
3413
             (double) keys_per_block);
3414
  return read_time;
3415
}
3416
3417
3418
/****************************************************************************
3419
 * Default MRR implementation (MRR to non-MRR converter)
3420
 ***************************************************************************/
3421
3422
/**
3423
  Get cost and other information about MRR scan over a known list of ranges
3424
3425
  Calculate estimated cost and other information about an MRR scan for given
3426
  sequence of ranges.
3427
3428
  @param keyno           Index number
3429
  @param seq             Range sequence to be traversed
3430
  @param seq_init_param  First parameter for seq->init()
3431
  @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
3432
                         can't efficiently determine it
3433
  @param bufsz    INOUT  IN:  Size of the buffer available for use
3434
                         OUT: Size of the buffer that is expected to be actually
3435
                              used, or 0 if buffer is not needed.
3436
  @param flags    INOUT  A combination of HA_MRR_* flags
3437
  @param cost     OUT    Estimated cost of MRR access
3438
3439
  @note
3440
    This method (or an overriding one in a derived class) must check for
3441
    thd->killed and return HA_POS_ERROR if it is not zero. This is required
3442
    for a user to be able to interrupt the calculation by killing the
3443
    connection/query.
3444
3445
  @retval
3446
    HA_POS_ERROR  Error or the engine is unable to perform the requested
3447
                  scan. Values of OUT parameters are undefined.
3448
  @retval
3449
    other         OK, *cost contains cost of the scan, *bufsz and *flags
3450
                  contain scan parameters.
3451
*/
3452
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3453
ha_rows
1 by brian
clean slate
3454
handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3455
                                     void *seq_init_param,
3456
                                     uint n_ranges_arg __attribute__((__unused__)),
1 by brian
clean slate
3457
                                     uint *bufsz, uint *flags, COST_VECT *cost)
3458
{
3459
  KEY_MULTI_RANGE range;
3460
  range_seq_t seq_it;
3461
  ha_rows rows, total_rows= 0;
3462
  uint n_ranges=0;
3463
  THD *thd= current_thd;
3464
  
3465
  /* Default MRR implementation doesn't need buffer */
3466
  *bufsz= 0;
3467
3468
  seq_it= seq->init(seq_init_param, n_ranges, *flags);
3469
  while (!seq->next(seq_it, &range))
3470
  {
3471
    if (unlikely(thd->killed != 0))
3472
      return HA_POS_ERROR;
3473
    
3474
    n_ranges++;
3475
    key_range *min_endp, *max_endp;
3476
    {
3477
      min_endp= range.start_key.length? &range.start_key : NULL;
3478
      max_endp= range.end_key.length? &range.end_key : NULL;
3479
    }
3480
    if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
3481
      rows= 1; /* there can be at most one row */
3482
    else
3483
    {
3484
      if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, 
3485
                                                        max_endp)))
3486
      {
3487
        /* Can't scan one range => can't do MRR scan at all */
3488
        total_rows= HA_POS_ERROR;
3489
        break;
3490
      }
3491
    }
3492
    total_rows += rows;
3493
  }
3494
  
3495
  if (total_rows != HA_POS_ERROR)
3496
  {
3497
    /* The following calculation is the same as in multi_range_read_info(): */
3498
    *flags |= HA_MRR_USE_DEFAULT_IMPL;
3499
    cost->zero();
3500
    cost->avg_io_cost= 1; /* assume random seeks */
3501
    if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
3502
      cost->io_count= index_only_read_time(keyno, (uint)total_rows);
3503
    else
3504
      cost->io_count= read_time(keyno, n_ranges, total_rows);
3505
    cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
3506
  }
3507
  return total_rows;
3508
}
3509
3510
3511
/**
3512
  Get cost and other information about MRR scan over some sequence of ranges
3513
3514
  Calculate estimated cost and other information about an MRR scan for some
3515
  sequence of ranges.
3516
3517
  The ranges themselves will be known only at execution phase. When this
3518
  function is called we only know number of ranges and a (rough) E(#records)
3519
  within those ranges.
3520
3521
  Currently this function is only called for "n-keypart singlepoint" ranges,
3522
  i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
3523
3524
  The flags parameter is a combination of those flags: HA_MRR_SORTED,
3525
  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
3526
3527
  @param keyno           Index number
3528
  @param n_ranges        Estimated number of ranges (i.e. intervals) in the
3529
                         range sequence.
3530
  @param n_rows          Estimated total number of records contained within all
3531
                         of the ranges
3532
  @param bufsz    INOUT  IN:  Size of the buffer available for use
3533
                         OUT: Size of the buffer that will be actually used, or
3534
                              0 if buffer is not needed.
3535
  @param flags    INOUT  A combination of HA_MRR_* flags
3536
  @param cost     OUT    Estimated cost of MRR access
3537
3538
  @retval
3539
    0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
3540
          parameters.
3541
  @retval
3542
    other Error or can't perform the requested scan
3543
*/
3544
3545
int handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
3546
                                   uint *bufsz, uint *flags, COST_VECT *cost)
3547
{
3548
  *bufsz= 0; /* Default implementation doesn't need a buffer */
3549
3550
  *flags |= HA_MRR_USE_DEFAULT_IMPL;
3551
3552
  cost->zero();
3553
  cost->avg_io_cost= 1; /* assume random seeks */
3554
3555
  /* Produce the same cost as non-MRR code does */
3556
  if (*flags & HA_MRR_INDEX_ONLY)
3557
    cost->io_count= index_only_read_time(keyno, n_rows);
3558
  else
3559
    cost->io_count= read_time(keyno, n_ranges, n_rows);
3560
  return 0;
3561
}
3562
3563
3564
/**
3565
  Initialize the MRR scan
3566
3567
  Initialize the MRR scan. This function may do heavyweight scan 
3568
  initialization like row prefetching/sorting/etc (NOTE: but better not do
3569
  it here as we may not need it, e.g. if we never satisfy WHERE clause on
3570
  previous tables. For many implementations it would be natural to do such
3571
  initializations in the first multi_read_range_next() call)
3572
3573
  mode is a combination of the following flags: HA_MRR_SORTED,
3574
  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION 
3575
3576
  @param seq             Range sequence to be traversed
3577
  @param seq_init_param  First parameter for seq->init()
3578
  @param n_ranges        Number of ranges in the sequence
3579
  @param mode            Flags, see the description section for the details
3580
  @param buf             INOUT: memory buffer to be used
3581
3582
  @note
3583
    One must have called index_init() before calling this function. Several
3584
    multi_range_read_init() calls may be made in course of one query.
3585
3586
    Until WL#2623 is done (see its text, section 3.2), the following will 
3587
    also hold:
3588
    The caller will guarantee that if "seq->init == mrr_ranges_array_init"
3589
    then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
3590
    This property will only be used by NDB handler until WL#2623 is done.
3591
     
3592
    Buffer memory management is done according to the following scenario:
3593
    The caller allocates the buffer and provides it to the callee by filling
3594
    the members of HANDLER_BUFFER structure.
3595
    The callee consumes all or some fraction of the provided buffer space, and
3596
    sets the HANDLER_BUFFER members accordingly.
3597
    The callee may use the buffer memory until the next multi_range_read_init()
3598
    call is made, all records have been read, or until index_end() call is
3599
    made, whichever comes first.
3600
3601
  @retval 0  OK
3602
  @retval 1  Error
3603
*/
3604
3605
int
3606
handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3607
                               uint n_ranges, uint mode,
3608
                               HANDLER_BUFFER *buf __attribute__((__unused__)))
1 by brian
clean slate
3609
{
3610
  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
3611
  mrr_funcs= *seq_funcs;
3612
  mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
56 by brian
Next pass of true/false update.
3613
  mrr_have_range= false;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3614
  return(0);
1 by brian
clean slate
3615
}
3616
3617
3618
/**
3619
  Get next record in MRR scan
3620
3621
  Default MRR implementation: read the next record
3622
3623
  @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
3624
                          Otherwise, the opaque value associated with the range
3625
                          that contains the returned record.
3626
3627
  @retval 0      OK
3628
  @retval other  Error code
3629
*/
3630
3631
int handler::multi_range_read_next(char **range_info)
3632
{
3633
  int result= 0;
3634
  int range_res;
3635
3636
  if (!mrr_have_range)
3637
  {
56 by brian
Next pass of true/false update.
3638
    mrr_have_range= true;
1 by brian
clean slate
3639
    goto start;
3640
  }
3641
3642
  do
3643
  {
3644
    /* Save a call if there can be only one row in range. */
3645
    if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
3646
    {
3647
      result= read_range_next();
3648
      /* On success or non-EOF errors jump to the end. */
3649
      if (result != HA_ERR_END_OF_FILE)
3650
        break;
3651
    }
3652
    else
3653
    {
3654
      if (was_semi_consistent_read())
3655
        goto scan_it_again;
3656
      /*
3657
        We need to set this for the last range only, but checking this
3658
        condition is more expensive than just setting the result code.
3659
      */
3660
      result= HA_ERR_END_OF_FILE;
3661
    }
3662
3663
start:
3664
    /* Try the next range(s) until one matches a record. */
3665
    while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
3666
    {
3667
scan_it_again:
3668
      result= read_range_first(mrr_cur_range.start_key.keypart_map ?
3669
                                 &mrr_cur_range.start_key : 0,
3670
                               mrr_cur_range.end_key.keypart_map ?
3671
                                 &mrr_cur_range.end_key : 0,
3672
                               test(mrr_cur_range.range_flag & EQ_RANGE),
3673
                               mrr_is_output_sorted);
3674
      if (result != HA_ERR_END_OF_FILE)
3675
        break;
3676
    }
3677
  }
3678
  while ((result == HA_ERR_END_OF_FILE) && !range_res);
3679
3680
  *range_info= mrr_cur_range.ptr;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3681
  return(result);
1 by brian
clean slate
3682
}
3683
3684
3685
/* **************************************************************************
3686
 * DS-MRR implementation 
3687
 ***************************************************************************/
3688
3689
/**
3690
  DS-MRR: Initialize and start MRR scan
3691
3692
  Initialize and start the MRR scan. Depending on the mode parameter, this
3693
  may use default or DS-MRR implementation.
3694
3695
  @param h               Table handler to be used
3696
  @param key             Index to be used
3697
  @param seq_funcs       Interval sequence enumeration functions
3698
  @param seq_init_param  Interval sequence enumeration parameter
3699
  @param n_ranges        Number of ranges in the sequence.
3700
  @param mode            HA_MRR_* modes to use
3701
  @param buf             INOUT Buffer to use
3702
3703
  @retval 0     Ok, Scan started.
3704
  @retval other Error
3705
*/
3706
3707
int DsMrr_impl::dsmrr_init(handler *h, KEY *key,
3708
                           RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
3709
                           uint n_ranges, uint mode, HANDLER_BUFFER *buf)
3710
{
3711
  uint elem_size;
3712
  uint keyno;
3713
  Item *pushed_cond= NULL;
3714
  handler *new_h2;
3715
  keyno= h->active_index;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3716
  assert(h2 == NULL);
1 by brian
clean slate
3717
  if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED)
3718
  {
56 by brian
Next pass of true/false update.
3719
    use_default_impl= true;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3720
    return(h->handler::multi_range_read_init(seq_funcs, seq_init_param,
1 by brian
clean slate
3721
                                                  n_ranges, mode, buf));
3722
  }
3723
  rowids_buf= buf->buffer;
3724
  //psergey-todo: don't add key_length as it is not needed anymore
3725
  rowids_buf += key->key_length + h->ref_length;
3726
3727
  is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
3728
  rowids_buf_end= buf->buffer_end;
3729
  
3730
  elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
3731
  rowids_buf_last= rowids_buf + 
3732
                      ((rowids_buf_end - rowids_buf)/ elem_size)*
3733
                      elem_size;
3734
  rowids_buf_end= rowids_buf_last;
3735
3736
  /* Create a separate handler object to do rndpos() calls. */
3737
  THD *thd= current_thd;
3738
  if (!(new_h2= h->clone(thd->mem_root)) || 
3739
      new_h2->ha_external_lock(thd, F_RDLCK))
3740
  {
3741
    delete new_h2;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3742
    return(1);
1 by brian
clean slate
3743
  }
3744
3745
  if (keyno == h->pushed_idx_cond_keyno)
3746
    pushed_cond= h->pushed_idx_cond;
3747
  if (h->ha_index_end())
3748
  {
3749
    new_h2= h2;
3750
    goto error;
3751
  }
3752
3753
  h2= new_h2;
3754
  table->prepare_for_position();
3755
  new_h2->extra(HA_EXTRA_KEYREAD);
3756
56 by brian
Next pass of true/false update.
3757
  if (h2->ha_index_init(keyno, false) || 
1 by brian
clean slate
3758
      h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
3759
                                         mode, buf))
3760
    goto error;
56 by brian
Next pass of true/false update.
3761
  use_default_impl= false;
1 by brian
clean slate
3762
  
3763
  if (pushed_cond)
3764
    h2->idx_cond_push(keyno, pushed_cond);
3765
  if (dsmrr_fill_buffer(new_h2))
3766
    goto error;
3767
3768
  /*
3769
    If the above call has scanned through all intervals in *seq, then
3770
    adjust *buf to indicate that the remaining buffer space will not be used.
3771
  */
3772
  if (dsmrr_eof) 
3773
    buf->end_of_used_area= rowids_buf_last;
3774
56 by brian
Next pass of true/false update.
3775
  if (h->ha_rnd_init(false))
1 by brian
clean slate
3776
    goto error;
3777
  
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3778
  return(0);
1 by brian
clean slate
3779
error:
3780
  h2->ha_index_or_rnd_end();
3781
  h2->ha_external_lock(thd, F_UNLCK);
3782
  h2->close();
3783
  delete h2;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3784
  return(1);
1 by brian
clean slate
3785
}
3786
3787
3788
void DsMrr_impl::dsmrr_close()
3789
{
3790
  if (h2)
3791
  {
3792
    h2->ha_external_lock(current_thd, F_UNLCK);
3793
    h2->close();
3794
    delete h2;
3795
    h2= NULL;
3796
  }
56 by brian
Next pass of true/false update.
3797
  use_default_impl= true;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3798
  return;
1 by brian
clean slate
3799
}
3800
3801
3802
static int rowid_cmp(void *h, uchar *a, uchar *b)
3803
{
3804
  return ((handler*)h)->cmp_ref(a, b);
3805
}
3806
3807
3808
/**
3809
  DS-MRR: Fill the buffer with rowids and sort it by rowid
3810
3811
  {This is an internal function of DiskSweep MRR implementation}
3812
  Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into 
3813
  buffer. When the buffer is full or scan is completed, sort the buffer by 
3814
  rowid and return.
3815
  
3816
  The function assumes that rowids buffer is empty when it is invoked. 
3817
  
3818
  @param h  Table handler
3819
3820
  @retval 0      OK, the next portion of rowids is in the buffer,
3821
                 properly ordered
3822
  @retval other  Error
3823
*/
3824
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3825
int DsMrr_impl::dsmrr_fill_buffer(handler *unused __attribute__((__unused__)))
1 by brian
clean slate
3826
{
3827
  char *range_info;
3828
  int res;
3829
3830
  rowids_buf_cur= rowids_buf;
3831
  while ((rowids_buf_cur < rowids_buf_end) && 
3832
         !(res= h2->handler::multi_range_read_next(&range_info)))
3833
  {
3834
    /* Put rowid, or {rowid, range_id} pair into the buffer */
3835
    h2->position(table->record[0]);
3836
    memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
3837
    rowids_buf_cur += h->ref_length;
3838
3839
    if (is_mrr_assoc)
3840
    {
3841
      memcpy(rowids_buf_cur, &range_info, sizeof(void*));
3842
      rowids_buf_cur += sizeof(void*);
3843
    }
3844
  }
3845
3846
  if (res && res != HA_ERR_END_OF_FILE)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3847
    return(res); 
1 by brian
clean slate
3848
  dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
3849
3850
  /* Sort the buffer contents by rowid */
3851
  uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
3852
  uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
3853
  
3854
  my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
3855
            (void*)h);
3856
  rowids_buf_last= rowids_buf_cur;
3857
  rowids_buf_cur=  rowids_buf;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3858
  return(0);
1 by brian
clean slate
3859
}
3860
3861
3862
/**
3863
  DS-MRR implementation: multi_range_read_next() function
3864
*/
3865
3866
int DsMrr_impl::dsmrr_next(handler *h, char **range_info)
3867
{
3868
  int res;
3869
  
3870
  if (use_default_impl)
3871
    return h->handler::multi_range_read_next(range_info);
3872
    
3873
  if (rowids_buf_cur == rowids_buf_last)
3874
  {
3875
    if (dsmrr_eof)
3876
    {
3877
      res= HA_ERR_END_OF_FILE;
3878
      goto end;
3879
    }
3880
    res= dsmrr_fill_buffer(h);
3881
    if (res)
3882
      goto end;
3883
  }
3884
  
3885
  /* Return EOF if there are no rowids in the buffer after re-fill attempt */
3886
  if (rowids_buf_cur == rowids_buf_last)
3887
  {
3888
    res= HA_ERR_END_OF_FILE;
3889
    goto end;
3890
  }
3891
3892
  res= h->rnd_pos(table->record[0], rowids_buf_cur);
3893
  rowids_buf_cur += h->ref_length;
3894
  if (is_mrr_assoc)
3895
  {
3896
    memcpy(range_info, rowids_buf_cur, sizeof(void*));
3897
    rowids_buf_cur += sizeof(void*);
3898
  }
3899
3900
end:
3901
  if (res)
3902
    dsmrr_close();
3903
  return res;
3904
}
3905
3906
3907
/**
3908
  DS-MRR implementation: multi_range_read_info() function
3909
*/
3910
int DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, uint *bufsz,
3911
                           uint *flags, COST_VECT *cost)
3912
{  
3913
  int res;
3914
  uint def_flags= *flags;
3915
  uint def_bufsz= *bufsz;
3916
3917
  /* Get cost/flags/mem_usage of default MRR implementation */
3918
  res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
3919
                                         &def_flags, cost);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3920
  assert(!res);
1 by brian
clean slate
3921
3922
  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || 
3923
      choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost))
3924
  {
3925
    /* Default implementation is choosen */
3926
    *flags= def_flags;
3927
    *bufsz= def_bufsz;
3928
  }
3929
  return 0;
3930
}
3931
3932
3933
/**
3934
  DS-MRR Implementation: multi_range_read_info_const() function
3935
*/
3936
3937
ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
3938
                                 void *seq_init_param, uint n_ranges, 
3939
                                 uint *bufsz, uint *flags, COST_VECT *cost)
3940
{
3941
  ha_rows rows;
3942
  uint def_flags= *flags;
3943
  uint def_bufsz= *bufsz;
3944
  /* Get cost/flags/mem_usage of default MRR implementation */
3945
  rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
3946
                                                n_ranges, &def_bufsz, 
3947
                                                &def_flags, cost);
3948
  if (rows == HA_POS_ERROR)
3949
  {
3950
    /* Default implementation can't perform MRR scan => we can't either */
3951
    return rows;
3952
  }
3953
3954
  /*
3955
    If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
3956
    use the default MRR implementation (we need it for UPDATE/DELETE).
3957
    Otherwise, make a choice based on cost and @@optimizer_use_mrr.
3958
  */
3959
  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
3960
      choose_mrr_impl(keyno, rows, flags, bufsz, cost))
3961
  {
3962
    *flags= def_flags;
3963
    *bufsz= def_bufsz;
3964
  }
3965
  else
3966
  {
3967
    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;
3968
  }
3969
  return rows;
3970
}
3971
3972
3973
/**
3974
  Check if key has partially-covered columns
3975
3976
  We can't use DS-MRR to perform range scans when the ranges are over
3977
  partially-covered keys, because we'll not have full key part values
3978
  (we'll have their prefixes from the index) and will not be able to check
3979
  if we've reached the end the range.
3980
3981
  @param keyno  Key to check
3982
3983
  @todo
3984
    Allow use of DS-MRR in cases where the index has partially-covered
3985
    components but they are not used for scanning.
3986
56 by brian
Next pass of true/false update.
3987
  @retval true   Yes
3988
  @retval false  No
1 by brian
clean slate
3989
*/
3990
3991
bool DsMrr_impl::key_uses_partial_cols(uint keyno)
3992
{
3993
  KEY_PART_INFO *kp= table->key_info[keyno].key_part;
3994
  KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts;
3995
  for (; kp != kp_end; kp++)
3996
  {
3997
    if (!kp->field->part_of_key.is_set(keyno))
56 by brian
Next pass of true/false update.
3998
      return true;
1 by brian
clean slate
3999
  }
56 by brian
Next pass of true/false update.
4000
  return false;
1 by brian
clean slate
4001
}
4002
4003
4004
/**
4005
  DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
4006
4007
  Make the choice between using Default MRR implementation and DS-MRR.
4008
  This function contains common functionality factored out of dsmrr_info()
4009
  and dsmrr_info_const(). The function assumes that the default MRR
4010
  implementation's applicability requirements are satisfied.
4011
4012
  @param keyno       Index number
4013
  @param rows        E(full rows to be retrieved)
4014
  @param flags  IN   MRR flags provided by the MRR user
4015
                OUT  If DS-MRR is choosen, flags of DS-MRR implementation
4016
                     else the value is not modified
4017
  @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
4018
                     else the value is not modified
4019
  @param cost   IN   Cost of default MRR implementation
4020
                OUT  If DS-MRR is choosen, cost of DS-MRR scan
4021
                     else the value is not modified
4022
56 by brian
Next pass of true/false update.
4023
  @retval true   Default MRR implementation should be used
4024
  @retval false  DS-MRR implementation should be used
1 by brian
clean slate
4025
*/
4026
4027
bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
4028
                                 uint *bufsz, COST_VECT *cost)
4029
{
4030
  COST_VECT dsmrr_cost;
4031
  bool res;
4032
  THD *thd= current_thd;
4033
  if ((thd->variables.optimizer_use_mrr == 2) || 
4034
      (*flags & HA_MRR_INDEX_ONLY) || (*flags & HA_MRR_SORTED) ||
4035
      (keyno == table->s->primary_key && 
4036
       h->primary_key_is_clustered()) || 
4037
       key_uses_partial_cols(keyno))
4038
  {
4039
    /* Use the default implementation */
4040
    *flags |= HA_MRR_USE_DEFAULT_IMPL;
56 by brian
Next pass of true/false update.
4041
    return true;
1 by brian
clean slate
4042
  }
4043
  
4044
  uint add_len= table->key_info[keyno].key_length + h->ref_length; 
4045
  *bufsz -= add_len;
4046
  if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
56 by brian
Next pass of true/false update.
4047
    return true;
1 by brian
clean slate
4048
  *bufsz += add_len;
4049
  
4050
  bool force_dsmrr;
4051
  /* 
4052
    If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of
4053
    DS-MRR and Default implementations cost. This allows one to force use of
4054
    DS-MRR whenever it is applicable without affecting other cost-based
4055
    choices.
4056
  */
4057
  if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) &&
4058
      dsmrr_cost.total_cost() > cost->total_cost())
4059
    dsmrr_cost= *cost;
4060
4061
  if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
4062
  {
4063
    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
4064
    *flags &= ~HA_MRR_SORTED;          /* We will return unordered output */
4065
    *cost= dsmrr_cost;
56 by brian
Next pass of true/false update.
4066
    res= false;
1 by brian
clean slate
4067
  }
4068
  else
4069
  {
4070
    /* Use the default MRR implementation */
56 by brian
Next pass of true/false update.
4071
    res= true;
1 by brian
clean slate
4072
  }
4073
  return res;
4074
}
4075
4076
4077
static void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost);
4078
4079
4080
/**
4081
  Get cost of DS-MRR scan
4082
4083
  @param keynr              Index to be used
4084
  @param rows               E(Number of rows to be scanned)
4085
  @param flags              Scan parameters (HA_MRR_* flags)
4086
  @param buffer_size INOUT  Buffer size
4087
  @param cost        OUT    The cost
4088
56 by brian
Next pass of true/false update.
4089
  @retval false  OK
4090
  @retval true   Error, DS-MRR cannot be used (the buffer is too small
1 by brian
clean slate
4091
                 for even 1 rowid)
4092
*/
4093
4094
bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
4095
                                         uint *buffer_size, COST_VECT *cost)
4096
{
61 by Brian Aker
Conversion of handler type.
4097
  uint32_t max_buff_entries, elem_size;
1 by brian
clean slate
4098
  ha_rows rows_in_full_step, rows_in_last_step;
4099
  uint n_full_steps;
4100
  double index_read_cost;
4101
4102
  elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
4103
  max_buff_entries = *buffer_size / elem_size;
4104
4105
  if (!max_buff_entries)
56 by brian
Next pass of true/false update.
4106
    return true; /* Buffer has not enough space for even 1 rowid */
1 by brian
clean slate
4107
4108
  /* Number of iterations we'll make with full buffer */
4109
  n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
4110
  
4111
  /* 
4112
    Get numbers of rows we'll be processing in 
4113
     - non-last sweep, with full buffer 
4114
     - last iteration, with non-full buffer
4115
  */
4116
  rows_in_full_step= max_buff_entries;
4117
  rows_in_last_step= rows % max_buff_entries;
4118
  
4119
  /* Adjust buffer size if we expect to use only part of the buffer */
4120
  if (n_full_steps)
4121
  {
4122
    get_sort_and_sweep_cost(table, rows, cost);
4123
    cost->multiply(n_full_steps);
4124
  }
4125
  else
4126
  {
4127
    cost->zero();
4128
    *buffer_size= max(*buffer_size, 
4129
                      (size_t)(1.2*rows_in_last_step) * elem_size + 
4130
                      h->ref_length + table->key_info[keynr].key_length);
4131
  }
4132
  
4133
  COST_VECT last_step_cost;
4134
  get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
4135
  cost->add(&last_step_cost);
4136
 
4137
  if (n_full_steps != 0)
4138
    cost->mem_cost= *buffer_size;
4139
  else
4140
    cost->mem_cost= (double)rows_in_last_step * elem_size;
4141
  
4142
  /* Total cost of all index accesses */
4143
  index_read_cost= h->index_only_read_time(keynr, (double)rows);
4144
  cost->add_io(index_read_cost, 1 /* Random seeks */);
56 by brian
Next pass of true/false update.
4145
  return false;
1 by brian
clean slate
4146
}
4147
4148
4149
/* 
4150
  Get cost of one sort-and-sweep step
4151
4152
  SYNOPSIS
4153
    get_sort_and_sweep_cost()
4154
      table       Table being accessed
4155
      nrows       Number of rows to be sorted and retrieved
4156
      cost   OUT  The cost
4157
4158
  DESCRIPTION
4159
    Get cost of these operations:
4160
     - sort an array of #nrows ROWIDs using qsort
4161
     - read #nrows records from table in a sweep.
4162
*/
4163
4164
static 
4165
void get_sort_and_sweep_cost(TABLE *table, ha_rows nrows, COST_VECT *cost)
4166
{
4167
  if (nrows)
4168
  {
56 by brian
Next pass of true/false update.
4169
    get_sweep_read_cost(table, nrows, false, cost);
1 by brian
clean slate
4170
    /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
4171
    double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
4172
    if (cmp_op < 3)
4173
      cmp_op= 3;
4174
    cost->cpu_cost += cmp_op * log2(cmp_op);
4175
  }
4176
  else
4177
    cost->zero();
4178
}
4179
4180
4181
/**
4182
  Get cost of reading nrows table records in a "disk sweep"
4183
4184
  A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
4185
  for an ordered sequence of rowids.
4186
4187
  We assume hard disk IO. The read is performed as follows:
4188
4189
   1. The disk head is moved to the needed cylinder
4190
   2. The controller waits for the plate to rotate
4191
   3. The data is transferred
4192
4193
  Time to do #3 is insignificant compared to #2+#1.
4194
4195
  Time to move the disk head is proportional to head travel distance.
4196
4197
  Time to wait for the plate to rotate depends on whether the disk head
4198
  was moved or not. 
4199
4200
  If disk head wasn't moved, the wait time is proportional to distance
4201
  between the previous block and the block we're reading.
4202
4203
  If the head was moved, we don't know how much we'll need to wait for the
4204
  plate to rotate. We assume the wait time to be a variate with a mean of
4205
  0.5 of full rotation time.
4206
4207
  Our cost units are "random disk seeks". The cost of random disk seek is
4208
  actually not a constant, it depends one range of cylinders we're going
4209
  to access. We make it constant by introducing a fuzzy concept of "typical 
4210
  datafile length" (it's fuzzy as it's hard to tell whether it should
4211
  include index file, temp.tables etc). Then random seek cost is:
4212
4213
    1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
4214
4215
  We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
4216
4217
  @param table             Table to be accessed
4218
  @param nrows             Number of rows to retrieve
56 by brian
Next pass of true/false update.
4219
  @param interrupted       true <=> Assume that the disk sweep will be
4220
                           interrupted by other disk IO. false - otherwise.
1 by brian
clean slate
4221
  @param cost         OUT  The cost.
4222
*/
4223
4224
void get_sweep_read_cost(TABLE *table, ha_rows nrows, bool interrupted, 
4225
                         COST_VECT *cost)
4226
{
4227
  cost->zero();
4228
  if (table->file->primary_key_is_clustered())
4229
  {
4230
    cost->io_count= table->file->read_time(table->s->primary_key,
4231
                                           (uint) nrows, nrows);
4232
  }
4233
  else
4234
  {
4235
    double n_blocks=
151 by Brian Aker
Ulonglong to uint64_t
4236
      ceil(uint64_t2double(table->file->stats.data_file_length) / IO_SIZE);
1 by brian
clean slate
4237
    double busy_blocks=
4238
      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
4239
    if (busy_blocks < 1.0)
4240
      busy_blocks= 1.0;
4241
4242
    cost->io_count= busy_blocks;
4243
4244
    if (!interrupted)
4245
    {
4246
      /* Assume reading is done in one 'sweep' */
4247
      cost->avg_io_cost= (DISK_SEEK_BASE_COST +
4248
                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
4249
    }
4250
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4251
  return;
1 by brian
clean slate
4252
}
4253
4254
4255
/* **************************************************************************
4256
 * DS-MRR implementation ends
4257
 ***************************************************************************/
4258
4259
/**
4260
  Read first row between two ranges.
4261
4262
  @param start_key		Start key. Is 0 if no min range
4263
  @param end_key		End key.  Is 0 if no max range
4264
  @param eq_range_arg	        Set to 1 if start_key == end_key
4265
  @param sorted		Set to 1 if result should be sorted per key
4266
4267
  @note
4268
    Record is read into table->record[0]
4269
4270
  @retval
4271
    0			Found row
4272
  @retval
4273
    HA_ERR_END_OF_FILE	No rows in range
4274
  @retval
4275
    \#			Error code
4276
*/
4277
int handler::read_range_first(const key_range *start_key,
4278
			      const key_range *end_key,
4279
			      bool eq_range_arg,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
4280
                              bool sorted  __attribute__((__unused__)))
1 by brian
clean slate
4281
{
4282
  int result;
4283
4284
  eq_range= eq_range_arg;
4285
  end_range= 0;
4286
  if (end_key)
4287
  {
4288
    end_range= &save_end_range;
4289
    save_end_range= *end_key;
4290
    key_compare_result_on_equal= ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
4291
				  (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
4292
  }
4293
  range_key_part= table->key_info[active_index].key_part;
4294
4295
  if (!start_key)			// Read first record
4296
    result= index_first(table->record[0]);
4297
  else
4298
    result= index_read_map(table->record[0],
4299
                           start_key->key,
4300
                           start_key->keypart_map,
4301
                           start_key->flag);
4302
  if (result)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4303
    return((result == HA_ERR_KEY_NOT_FOUND) 
1 by brian
clean slate
4304
		? HA_ERR_END_OF_FILE
4305
		: result);
4306
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4307
  return (compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
1 by brian
clean slate
4308
}
4309
4310
4311
/**
4312
  Read next row between two endpoints.
4313
4314
  @note
4315
    Record is read into table->record[0]
4316
4317
  @retval
4318
    0			Found row
4319
  @retval
4320
    HA_ERR_END_OF_FILE	No rows in range
4321
  @retval
4322
    \#			Error code
4323
*/
4324
int handler::read_range_next()
4325
{
4326
  int result;
4327
4328
  if (eq_range)
4329
  {
4330
    /* We trust that index_next_same always gives a row in range */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4331
    return(index_next_same(table->record[0],
1 by brian
clean slate
4332
                                end_range->key,
4333
                                end_range->length));
4334
  }
4335
  result= index_next(table->record[0]);
4336
  if (result)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4337
    return(result);
4338
  return(compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
1 by brian
clean slate
4339
}
4340
4341
4342
/**
4343
  Compare if found key (in row) is over max-value.
4344
4345
  @param range		range to compare to row. May be 0 for no range
4346
4347
  @seealso
4348
    key.cc::key_cmp()
4349
4350
  @return
4351
    The return value is SIGN(key_in_row - range_key):
4352
4353
    - 0   : Key is equal to range or 'range' == 0 (no range)
4354
    - -1  : Key is less than range
4355
    - 1   : Key is larger than range
4356
*/
4357
int handler::compare_key(key_range *range)
4358
{
4359
  int cmp;
4360
  if (!range || in_range_check_pushed_down)
4361
    return 0;					// No max range
4362
  cmp= key_cmp(range_key_part, range->key, range->length);
4363
  if (!cmp)
4364
    cmp= key_compare_result_on_equal;
4365
  return cmp;
4366
}
4367
4368
4369
/*
4370
  Same as compare_key() but doesn't check have in_range_check_pushed_down.
4371
  This is used by index condition pushdown implementation.
4372
*/
4373
4374
int handler::compare_key2(key_range *range)
4375
{
4376
  int cmp;
4377
  if (!range)
4378
    return 0;					// no max range
4379
  cmp= key_cmp(range_key_part, range->key, range->length);
4380
  if (!cmp)
4381
    cmp= key_compare_result_on_equal;
4382
  return cmp;
4383
}
4384
4385
int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
4386
                                key_part_map keypart_map,
4387
                                enum ha_rkey_function find_flag)
4388
{
4389
  int error, error1;
4390
  error= index_init(index, 0);
4391
  if (!error)
4392
  {
4393
    error= index_read_map(buf, key, keypart_map, find_flag);
4394
    error1= index_end();
4395
  }
4396
  return error ?  error : error1;
4397
}
4398
4399
4400
/**
4401
  Returns a list of all known extensions.
4402
4403
    No mutexes, worst case race is a minor surplus memory allocation
4404
    We have to recreate the extension map if mysqld is restarted (for example
4405
    within libmysqld)
4406
4407
  @retval
4408
    pointer		pointer to TYPELIB structure
4409
*/
149 by Brian Aker
More bool conversion.
4410
static bool exts_handlerton(THD *unused __attribute__((__unused__)),
4411
                            plugin_ref plugin,
4412
                            void *arg)
1 by brian
clean slate
4413
{
4414
  List<char> *found_exts= (List<char> *) arg;
4415
  handlerton *hton= plugin_data(plugin, handlerton *);
4416
  handler *file;
4417
  if (hton->state == SHOW_OPTION_YES && hton->create &&
4418
      (file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
4419
  {
4420
    List_iterator_fast<char> it(*found_exts);
4421
    const char **ext, *old_ext;
4422
4423
    for (ext= file->bas_ext(); *ext; ext++)
4424
    {
4425
      while ((old_ext= it++))
4426
      {
4427
        if (!strcmp(old_ext, *ext))
4428
	  break;
4429
      }
4430
      if (!old_ext)
4431
        found_exts->push_back((char *) *ext);
4432
4433
      it.rewind();
4434
    }
4435
    delete file;
4436
  }
56 by brian
Next pass of true/false update.
4437
  return false;
1 by brian
clean slate
4438
}
4439
4440
TYPELIB *ha_known_exts(void)
4441
{
4442
  if (!known_extensions.type_names || mysys_usage_id != known_extensions_id)
4443
  {
4444
    List<char> found_exts;
4445
    const char **ext, *old_ext;
4446
4447
    known_extensions_id= mysys_usage_id;
4448
4449
    plugin_foreach(NULL, exts_handlerton,
4450
                   MYSQL_STORAGE_ENGINE_PLUGIN, &found_exts);
4451
4452
    ext= (const char **) my_once_alloc(sizeof(char *)*
4453
                                       (found_exts.elements+1),
4454
                                       MYF(MY_WME | MY_FAE));
4455
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4456
    assert(ext != 0);
1 by brian
clean slate
4457
    known_extensions.count= found_exts.elements;
4458
    known_extensions.type_names= ext;
4459
4460
    List_iterator_fast<char> it(found_exts);
4461
    while ((old_ext= it++))
4462
      *ext++= old_ext;
4463
    *ext= 0;
4464
  }
4465
  return &known_extensions;
4466
}
4467
4468
4469
static bool stat_print(THD *thd, const char *type, uint type_len,
4470
                       const char *file, uint file_len,
4471
                       const char *status, uint status_len)
4472
{
4473
  Protocol *protocol= thd->protocol;
4474
  protocol->prepare_for_resend();
4475
  protocol->store(type, type_len, system_charset_info);
4476
  protocol->store(file, file_len, system_charset_info);
4477
  protocol->store(status, status_len, system_charset_info);
4478
  if (protocol->write())
56 by brian
Next pass of true/false update.
4479
    return true;
4480
  return false;
1 by brian
clean slate
4481
}
4482
4483
bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
4484
{
4485
  List<Item> field_list;
4486
  Protocol *protocol= thd->protocol;
4487
  bool result;
4488
4489
  field_list.push_back(new Item_empty_string("Type",10));
4490
  field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
4491
  field_list.push_back(new Item_empty_string("Status",10));
4492
4493
  if (protocol->send_fields(&field_list,
4494
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
56 by brian
Next pass of true/false update.
4495
    return true;
1 by brian
clean slate
4496
12.1.1 by Brian Aker
Cleaned up show status.
4497
  result= db_type->show_status &&
4498
    db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
1 by brian
clean slate
4499
4500
  if (!result)
4501
    my_eof(thd);
4502
  return result;
4503
}
4504
4505
4506
/**
4507
  Check if the conditions for row-based binlogging is correct for the table.
4508
4509
  A row in the given table should be replicated if:
4510
  - Row-based replication is enabled in the current thread
4511
  - The binlog is enabled
4512
  - It is not a temporary table
4513
  - The binary log is open
4514
  - The database the table resides in shall be binlogged (binlog_*_db rules)
4515
  - table is not mysql.event
4516
*/
4517
4518
static bool check_table_binlog_row_based(THD *thd, TABLE *table)
4519
{
4520
  if (table->s->cached_row_logging_check == -1)
4521
  {
4522
    int const check(table->s->tmp_table == NO_TMP_TABLE &&
4523
                    binlog_filter->db_ok(table->s->db.str));
4524
    table->s->cached_row_logging_check= check;
4525
  }
4526
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4527
  assert(table->s->cached_row_logging_check == 0 ||
1 by brian
clean slate
4528
              table->s->cached_row_logging_check == 1);
4529
4530
  return (thd->current_stmt_binlog_row_based &&
4531
          table->s->cached_row_logging_check &&
4532
          (thd->options & OPTION_BIN_LOG) &&
4533
          mysql_bin_log.is_open());
4534
}
4535
4536
4537
/**
4538
   Write table maps for all (manually or automatically) locked tables
4539
   to the binary log.
4540
4541
   This function will generate and write table maps for all tables
4542
   that are locked by the thread 'thd'.  Either manually locked
4543
   (stored in THD::locked_tables) and automatically locked (stored
4544
   in THD::lock) are considered.
4545
4546
   @param thd     Pointer to THD structure
4547
4548
   @retval 0   All OK
4549
   @retval 1   Failed to write all table maps
4550
4551
   @sa
4552
       THD::lock
4553
       THD::locked_tables
4554
*/
4555
4556
static int write_locked_table_maps(THD *thd)
4557
{
4558
  if (thd->get_binlog_table_maps() == 0)
4559
  {
4560
    MYSQL_LOCK *locks[3];
4561
    locks[0]= thd->extra_lock;
4562
    locks[1]= thd->lock;
4563
    locks[2]= thd->locked_tables;
4564
    for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
4565
    {
4566
      MYSQL_LOCK const *const lock= locks[i];
4567
      if (lock == NULL)
4568
        continue;
4569
4570
      TABLE **const end_ptr= lock->table + lock->table_count;
4571
      for (TABLE **table_ptr= lock->table ; 
4572
           table_ptr != end_ptr ;
4573
           ++table_ptr)
4574
      {
4575
        TABLE *const table= *table_ptr;
4576
        if (table->current_lock == F_WRLCK &&
4577
            check_table_binlog_row_based(thd, table))
4578
        {
4579
          int const has_trans= table->file->has_transactions();
4580
          int const error= thd->binlog_write_table_map(table, has_trans);
4581
          /*
4582
            If an error occurs, it is the responsibility of the caller to
4583
            roll back the transaction.
4584
          */
4585
          if (unlikely(error))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4586
            return(1);
1 by brian
clean slate
4587
        }
4588
      }
4589
    }
4590
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4591
  return(0);
1 by brian
clean slate
4592
}
4593
4594
4595
typedef bool Log_func(THD*, TABLE*, bool, const uchar*, const uchar*);
4596
4597
static int binlog_log_row(TABLE* table,
4598
                          const uchar *before_record,
4599
                          const uchar *after_record,
4600
                          Log_func *log_func)
4601
{
4602
  if (table->no_replicate)
4603
    return 0;
4604
  bool error= 0;
4605
  THD *const thd= table->in_use;
4606
4607
  if (check_table_binlog_row_based(thd, table))
4608
  {
4609
    /*
4610
      If there are no table maps written to the binary log, this is
4611
      the first row handled in this statement. In that case, we need
4612
      to write table maps for all locked tables to the binary log.
4613
    */
4614
    if (likely(!(error= write_locked_table_maps(thd))))
4615
    {
4616
      bool const has_trans= table->file->has_transactions();
4617
      error= (*log_func)(thd, table, has_trans, before_record, after_record);
4618
    }
4619
  }
4620
  return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
4621
}
4622
4623
int handler::ha_external_lock(THD *thd, int lock_type)
4624
{
4625
  /*
4626
    Whether this is lock or unlock, this should be true, and is to verify that
4627
    if get_auto_increment() was called (thus may have reserved intervals or
4628
    taken a table lock), ha_release_auto_increment() was too.
4629
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4630
  assert(next_insert_id == 0);
1 by brian
clean slate
4631
4632
  /*
4633
    We cache the table flags if the locking succeeded. Otherwise, we
4634
    keep them as they were when they were fetched in ha_open().
4635
  */
4636
  MYSQL_EXTERNAL_LOCK(lock_type);
4637
4638
  int error= external_lock(thd, lock_type);
4639
  if (error == 0)
4640
    cached_table_flags= table_flags();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4641
  return(error);
1 by brian
clean slate
4642
}
4643
4644
4645
/**
4646
  Check handler usage and reset state of file to after 'open'
4647
*/
4648
int handler::ha_reset()
4649
{
4650
  /* Check that we have called all proper deallocation functions */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4651
  assert((uchar*) table->def_read_set.bitmap +
1 by brian
clean slate
4652
              table->s->column_bitmap_size ==
4653
              (uchar*) table->def_write_set.bitmap);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4654
  assert(bitmap_is_set_all(&table->s->all_set));
4655
  assert(table->key_read == 0);
1 by brian
clean slate
4656
  /* ensure that ha_index_end / ha_rnd_end has been called */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4657
  assert(inited == NONE);
1 by brian
clean slate
4658
  /* Free cache used by filesort */
4659
  free_io_cache(table);
4660
  /* reset the bitmaps to point to defaults */
4661
  table->default_column_bitmaps();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4662
  return(reset());
1 by brian
clean slate
4663
}
4664
4665
4666
int handler::ha_write_row(uchar *buf)
4667
{
4668
  int error;
4669
  Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
4670
  MYSQL_INSERT_ROW_START();
4671
4672
  mark_trx_read_write();
4673
4674
  if (unlikely(error= write_row(buf)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4675
    return(error);
1 by brian
clean slate
4676
  if (unlikely(error= binlog_log_row(table, 0, buf, log_func)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4677
    return(error); /* purecov: inspected */
1 by brian
clean slate
4678
  MYSQL_INSERT_ROW_END();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4679
  return(0);
1 by brian
clean slate
4680
}
4681
4682
4683
int handler::ha_update_row(const uchar *old_data, uchar *new_data)
4684
{
4685
  int error;
4686
  Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
4687
4688
  /*
4689
    Some storage engines require that the new record is in record[0]
4690
    (and the old record is in record[1]).
4691
   */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4692
  assert(new_data == table->record[0]);
1 by brian
clean slate
4693
4694
  mark_trx_read_write();
4695
4696
  if (unlikely(error= update_row(old_data, new_data)))
4697
    return error;
4698
  if (unlikely(error= binlog_log_row(table, old_data, new_data, log_func)))
4699
    return error;
4700
  return 0;
4701
}
4702
4703
int handler::ha_delete_row(const uchar *buf)
4704
{
4705
  int error;
4706
  Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
4707
4708
  mark_trx_read_write();
4709
4710
  if (unlikely(error= delete_row(buf)))
4711
    return error;
4712
  if (unlikely(error= binlog_log_row(table, buf, 0, log_func)))
4713
    return error;
4714
  return 0;
4715
}
4716
4717
4718
4719
/**
4720
  @details
4721
  use_hidden_primary_key() is called in case of an update/delete when
4722
  (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
4723
  but we don't have a primary key
4724
*/
4725
void handler::use_hidden_primary_key()
4726
{
4727
  /* fallback to use all columns in the table to identify row */
4728
  table->use_all_columns();
4729
}