~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/* Copyright (C) 2000-2006 MySQL AB
2
3
   This program is free software; you can redistribute it and/or modify
4
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6
7
   This program is distributed in the hope that it will be useful,
8
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
   GNU General Public License for more details.
11
12
   You should have received a copy of the GNU General Public License
13
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
15
16
/**
17
  @file handler.cc
18
19
  Handler-calling-functions
20
*/
21
22
#ifdef USE_PRAGMA_IMPLEMENTATION
23
#pragma implementation				// gcc: Class implementation
24
#endif
25
243.1.17 by Jay Pipes
FINAL PHASE removal of mysql_priv.h (Bye, bye my friend.)
26
#include <drizzled/server_includes.h>
1 by brian
clean slate
27
#include "rpl_filter.h"
202.3.6 by Monty Taylor
First pass at gettexizing the error messages.
28
#include <drizzled/drizzled_error_messages.h>
1 by brian
clean slate
29
30
/*
31
  While we have legacy_db_type, we have this array to
32
  check for dups and to find handlerton from legacy_db_type.
33
  Remove when legacy_db_type is finally gone
34
*/
35
st_plugin_int *hton2plugin[MAX_HA];
36
37
static handlerton *installed_htons[128];
38
39
#define BITMAP_STACKBUF_SIZE (128/8)
40
41
KEY_CREATE_INFO default_key_create_info= { HA_KEY_ALG_UNDEF, 0, {NullS,0}, {NullS,0} };
42
43
/* number of entries in handlertons[] */
61 by Brian Aker
Conversion of handler type.
44
uint32_t total_ha= 0;
1 by brian
clean slate
45
/* number of storage engines (from handlertons[]) that support 2pc */
61 by Brian Aker
Conversion of handler type.
46
uint32_t total_ha_2pc= 0;
1 by brian
clean slate
47
/* size of savepoint storage area (see ha_init) */
61 by Brian Aker
Conversion of handler type.
48
uint32_t savepoint_alloc_size= 0;
1 by brian
clean slate
49
50
static const LEX_STRING sys_table_aliases[]=
51
{
52
  { C_STRING_WITH_LEN("INNOBASE") },  { C_STRING_WITH_LEN("INNODB") },
53
  { C_STRING_WITH_LEN("HEAP") },      { C_STRING_WITH_LEN("MEMORY") },
54
  {NullS, 0}
55
};
56
57
const char *ha_row_type[] = {
58
  "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE", "?","?","?"
59
};
60
61
const char *tx_isolation_names[] =
62
{ "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",
63
  NullS};
64
TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",
65
			       tx_isolation_names, NULL};
66
67
static TYPELIB known_extensions= {0,"known_exts", NULL, NULL};
68
uint known_extensions_id= 0;
69
70
71
72
static plugin_ref ha_default_plugin(THD *thd)
73
{
74
  if (thd->variables.table_plugin)
75
    return thd->variables.table_plugin;
76
  return my_plugin_lock(thd, &global_system_variables.table_plugin);
77
}
78
79
80
/**
81
  Return the default storage engine handlerton for thread
82
83
  @param ha_default_handlerton(thd)
84
  @param thd         current thread
85
86
  @return
87
    pointer to handlerton
88
*/
89
handlerton *ha_default_handlerton(THD *thd)
90
{
91
  plugin_ref plugin= ha_default_plugin(thd);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
92
  assert(plugin);
1 by brian
clean slate
93
  handlerton *hton= plugin_data(plugin, handlerton*);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
94
  assert(hton);
1 by brian
clean slate
95
  return hton;
96
}
97
98
99
/**
100
  Return the storage engine handlerton for the supplied name
101
  
102
  @param thd         current thread
103
  @param name        name of storage engine
104
  
105
  @return
106
    pointer to storage engine plugin handle
107
*/
108
plugin_ref ha_resolve_by_name(THD *thd, const LEX_STRING *name)
109
{
110
  const LEX_STRING *table_alias;
111
  plugin_ref plugin;
112
113
redo:
114
  /* my_strnncoll is a macro and gcc doesn't do early expansion of macro */
115
  if (thd && !my_charset_latin1.coll->strnncoll(&my_charset_latin1,
116
                           (const uchar *)name->str, name->length,
117
                           (const uchar *)STRING_WITH_LEN("DEFAULT"), 0))
118
    return ha_default_plugin(thd);
119
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
120
  if ((plugin= my_plugin_lock_by_name(thd, name, DRIZZLE_STORAGE_ENGINE_PLUGIN)))
1 by brian
clean slate
121
  {
122
    handlerton *hton= plugin_data(plugin, handlerton *);
123
    if (!(hton->flags & HTON_NOT_USER_SELECTABLE))
124
      return plugin;
125
      
126
    /*
127
      unlocking plugin immediately after locking is relatively low cost.
128
    */
129
    plugin_unlock(thd, plugin);
130
  }
131
132
  /*
133
    We check for the historical aliases.
134
  */
135
  for (table_alias= sys_table_aliases; table_alias->str; table_alias+= 2)
136
  {
137
    if (!my_strnncoll(&my_charset_latin1,
138
                      (const uchar *)name->str, name->length,
139
                      (const uchar *)table_alias->str, table_alias->length))
140
    {
141
      name= table_alias + 1;
142
      goto redo;
143
    }
144
  }
145
146
  return NULL;
147
}
148
149
150
plugin_ref ha_lock_engine(THD *thd, handlerton *hton)
151
{
152
  if (hton)
153
  {
154
    st_plugin_int **plugin= hton2plugin + hton->slot;
155
    
156
    return my_plugin_lock(thd, &plugin);
157
  }
158
  return NULL;
159
}
160
161
162
handlerton *ha_resolve_by_legacy_type(THD *thd, enum legacy_db_type db_type)
163
{
164
  plugin_ref plugin;
165
  switch (db_type) {
166
  case DB_TYPE_DEFAULT:
167
    return ha_default_handlerton(thd);
168
  default:
169
    if (db_type > DB_TYPE_UNKNOWN && db_type < DB_TYPE_DEFAULT &&
170
        (plugin= ha_lock_engine(thd, installed_htons[db_type])))
171
      return plugin_data(plugin, handlerton*);
172
    /* fall through */
173
  case DB_TYPE_UNKNOWN:
174
    return NULL;
175
  }
176
}
177
178
179
/**
180
  Use other database handler if databasehandler is not compiled in.
181
*/
182
handlerton *ha_checktype(THD *thd, enum legacy_db_type database_type,
183
                          bool no_substitute, bool report_error)
184
{
185
  handlerton *hton= ha_resolve_by_legacy_type(thd, database_type);
186
  if (ha_storage_engine_is_enabled(hton))
187
    return hton;
188
189
  if (no_substitute)
190
  {
191
    if (report_error)
192
    {
193
      const char *engine_name= ha_resolve_storage_engine_name(hton);
194
      my_error(ER_FEATURE_DISABLED,MYF(0),engine_name,engine_name);
195
    }
196
    return NULL;
197
  }
198
199
  switch (database_type) {
200
  case DB_TYPE_HASH:
201
    return ha_resolve_by_legacy_type(thd, DB_TYPE_HASH);
202
  default:
203
    break;
204
  }
205
206
  return ha_default_handlerton(thd);
207
} /* ha_checktype */
208
209
210
handler *get_new_handler(TABLE_SHARE *share, MEM_ROOT *alloc,
211
                         handlerton *db_type)
212
{
213
  handler *file;
214
215
  if (db_type && db_type->state == SHOW_OPTION_YES && db_type->create)
216
  {
217
    if ((file= db_type->create(db_type, share, alloc)))
218
      file->init();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
219
    return(file);
1 by brian
clean slate
220
  }
221
  /*
222
    Try the default table type
223
    Here the call to current_thd() is ok as we call this function a lot of
224
    times but we enter this branch very seldom.
225
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
226
  return(get_new_handler(share, alloc, ha_default_handlerton(current_thd)));
1 by brian
clean slate
227
}
228
229
230
/**
231
  Register handler error messages for use with my_error().
232
233
  @retval
234
    0           OK
235
  @retval
236
    !=0         Error
237
*/
238
239
int ha_init_errors(void)
240
{
241
#define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg)
242
  const char    **errmsgs;
243
244
  /* Allocate a pointer array for the error message strings. */
245
  /* Zerofill it to avoid uninitialized gaps. */
246
  if (! (errmsgs= (const char**) my_malloc(HA_ERR_ERRORS * sizeof(char*),
247
                                           MYF(MY_WME | MY_ZEROFILL))))
248
    return 1;
249
250
  /* Set the dedicated error messages. */
251
  SETMSG(HA_ERR_KEY_NOT_FOUND,          ER(ER_KEY_NOT_FOUND));
252
  SETMSG(HA_ERR_FOUND_DUPP_KEY,         ER(ER_DUP_KEY));
253
  SETMSG(HA_ERR_RECORD_CHANGED,         "Update wich is recoverable");
254
  SETMSG(HA_ERR_WRONG_INDEX,            "Wrong index given to function");
255
  SETMSG(HA_ERR_CRASHED,                ER(ER_NOT_KEYFILE));
256
  SETMSG(HA_ERR_WRONG_IN_RECORD,        ER(ER_CRASHED_ON_USAGE));
257
  SETMSG(HA_ERR_OUT_OF_MEM,             "Table handler out of memory");
258
  SETMSG(HA_ERR_NOT_A_TABLE,            "Incorrect file format '%.64s'");
259
  SETMSG(HA_ERR_WRONG_COMMAND,          "Command not supported");
260
  SETMSG(HA_ERR_OLD_FILE,               ER(ER_OLD_KEYFILE));
261
  SETMSG(HA_ERR_NO_ACTIVE_RECORD,       "No record read in update");
262
  SETMSG(HA_ERR_RECORD_DELETED,         "Intern record deleted");
263
  SETMSG(HA_ERR_RECORD_FILE_FULL,       ER(ER_RECORD_FILE_FULL));
264
  SETMSG(HA_ERR_INDEX_FILE_FULL,        "No more room in index file '%.64s'");
265
  SETMSG(HA_ERR_END_OF_FILE,            "End in next/prev/first/last");
266
  SETMSG(HA_ERR_UNSUPPORTED,            ER(ER_ILLEGAL_HA));
267
  SETMSG(HA_ERR_TO_BIG_ROW,             "Too big row");
268
  SETMSG(HA_WRONG_CREATE_OPTION,        "Wrong create option");
269
  SETMSG(HA_ERR_FOUND_DUPP_UNIQUE,      ER(ER_DUP_UNIQUE));
270
  SETMSG(HA_ERR_UNKNOWN_CHARSET,        "Can't open charset");
271
  SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF,    ER(ER_WRONG_MRG_TABLE));
272
  SETMSG(HA_ERR_CRASHED_ON_REPAIR,      ER(ER_CRASHED_ON_REPAIR));
273
  SETMSG(HA_ERR_CRASHED_ON_USAGE,       ER(ER_CRASHED_ON_USAGE));
274
  SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT,      ER(ER_LOCK_WAIT_TIMEOUT));
275
  SETMSG(HA_ERR_LOCK_TABLE_FULL,        ER(ER_LOCK_TABLE_FULL));
276
  SETMSG(HA_ERR_READ_ONLY_TRANSACTION,  ER(ER_READ_ONLY_TRANSACTION));
277
  SETMSG(HA_ERR_LOCK_DEADLOCK,          ER(ER_LOCK_DEADLOCK));
278
  SETMSG(HA_ERR_CANNOT_ADD_FOREIGN,     ER(ER_CANNOT_ADD_FOREIGN));
279
  SETMSG(HA_ERR_NO_REFERENCED_ROW,      ER(ER_NO_REFERENCED_ROW_2));
280
  SETMSG(HA_ERR_ROW_IS_REFERENCED,      ER(ER_ROW_IS_REFERENCED_2));
281
  SETMSG(HA_ERR_NO_SAVEPOINT,           "No savepoint with that name");
282
  SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE,  "Non unique key block size");
283
  SETMSG(HA_ERR_NO_SUCH_TABLE,          "No such table: '%.64s'");
284
  SETMSG(HA_ERR_TABLE_EXIST,            ER(ER_TABLE_EXISTS_ERROR));
285
  SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
286
  SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER(ER_TABLE_DEF_CHANGED));
287
  SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,  "FK constraint would lead to duplicate key");
288
  SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    ER(ER_TABLE_NEEDS_UPGRADE));
289
  SETMSG(HA_ERR_TABLE_READONLY,         ER(ER_OPEN_AS_READONLY));
290
  SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER(ER_AUTOINC_READ_FAILED));
291
  SETMSG(HA_ERR_AUTOINC_ERANGE,         ER(ER_WARN_DATA_OUT_OF_RANGE));
292
293
  /* Register the error messages for use with my_error(). */
294
  return my_error_register(errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
295
}
296
297
298
/**
299
  Unregister handler error messages.
300
301
  @retval
302
    0           OK
303
  @retval
304
    !=0         Error
305
*/
306
static int ha_finish_errors(void)
307
{
308
  const char    **errmsgs;
309
310
  /* Allocate a pointer array for the error message strings. */
311
  if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST)))
312
    return 1;
313
  my_free((uchar*) errmsgs, MYF(0));
314
  return 0;
315
}
316
317
318
int ha_finalize_handlerton(st_plugin_int *plugin)
319
{
320
  handlerton *hton= (handlerton *)plugin->data;
321
322
  switch (hton->state)
323
  {
324
  case SHOW_OPTION_NO:
325
  case SHOW_OPTION_DISABLED:
326
    break;
327
  case SHOW_OPTION_YES:
328
    if (installed_htons[hton->db_type] == hton)
329
      installed_htons[hton->db_type]= NULL;
330
    break;
331
  };
332
224.2.3 by Brian Aker
Fix for memory leak in shutdown/restart of an engine (not fixed in 5.1)
333
  if (hton && plugin->plugin->deinit)
334
    (void)plugin->plugin->deinit(hton);
1 by brian
clean slate
335
336
  my_free((uchar*)hton, MYF(0));
337
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
338
  return(0);
1 by brian
clean slate
339
}
340
341
342
int ha_initialize_handlerton(st_plugin_int *plugin)
343
{
344
  handlerton *hton;
345
346
  hton= (handlerton *)my_malloc(sizeof(handlerton),
347
                                MYF(MY_WME | MY_ZEROFILL));
348
  /* 
349
    FIXME: the MY_ZEROFILL flag above doesn't zero all the bytes.
350
    
351
    This was detected after adding get_backup_engine member to handlerton
352
    structure. Apparently get_backup_engine was not NULL even though it was
353
    not initialized.
354
   */
212.6.1 by Mats Kindahl
Replacing all bzero() calls with memset() calls and removing the bzero.c file.
355
  memset(hton, 0, sizeof(hton));
1 by brian
clean slate
356
  /* Historical Requirement */
357
  plugin->data= hton; // shortcut for the future
358
  if (plugin->plugin->init)
359
  {
360
    if (plugin->plugin->init(hton))
361
    {
338 by Monty Taylor
Tagged more strings.
362
      sql_print_error(_("Plugin '%s' init function returned error."),
1 by brian
clean slate
363
                      plugin->name.str);
364
      goto err;
365
    }
366
  }
367
368
  /*
369
    the switch below and hton->state should be removed when
370
    command-line options for plugins will be implemented
371
  */
372
  switch (hton->state) {
373
  case SHOW_OPTION_NO:
374
    break;
375
  case SHOW_OPTION_YES:
376
    {
377
      uint tmp;
378
      /* now check the db_type for conflict */
379
      if (hton->db_type <= DB_TYPE_UNKNOWN ||
380
          hton->db_type >= DB_TYPE_DEFAULT ||
381
          installed_htons[hton->db_type])
382
      {
383
        int idx= (int) DB_TYPE_FIRST_DYNAMIC;
384
385
        while (idx < (int) DB_TYPE_DEFAULT && installed_htons[idx])
386
          idx++;
387
388
        if (idx == (int) DB_TYPE_DEFAULT)
389
        {
338 by Monty Taylor
Tagged more strings.
390
          sql_print_warning(_("Too many storage engines!"));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
391
          return(1);
1 by brian
clean slate
392
        }
393
        if (hton->db_type != DB_TYPE_UNKNOWN)
338 by Monty Taylor
Tagged more strings.
394
          sql_print_warning(_("Storage engine '%s' has conflicting typecode. "
395
                            "Assigning value %d."), plugin->plugin->name, idx);
1 by brian
clean slate
396
        hton->db_type= (enum legacy_db_type) idx;
397
      }
398
      installed_htons[hton->db_type]= hton;
399
      tmp= hton->savepoint_offset;
400
      hton->savepoint_offset= savepoint_alloc_size;
401
      savepoint_alloc_size+= tmp;
402
      hton->slot= total_ha++;
403
      hton2plugin[hton->slot]=plugin;
404
      if (hton->prepare)
405
        total_ha_2pc++;
406
      break;
407
    }
408
    /* fall through */
409
  default:
410
    hton->state= SHOW_OPTION_DISABLED;
411
    break;
412
  }
413
  
414
  /* 
415
    This is entirely for legacy. We will create a new "disk based" hton and a 
416
    "memory" hton which will be configurable longterm. We should be able to 
417
    remove partition and myisammrg.
418
  */
419
  switch (hton->db_type) {
420
  case DB_TYPE_HEAP:
421
    heap_hton= hton;
422
    break;
423
  case DB_TYPE_MYISAM:
424
    myisam_hton= hton;
425
    break;
426
  default:
427
    break;
428
  };
429
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
430
  return(0);
1 by brian
clean slate
431
err:
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
432
  return(1);
1 by brian
clean slate
433
}
434
435
int ha_init()
436
{
437
  int error= 0;
438
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
439
  assert(total_ha < MAX_HA);
1 by brian
clean slate
440
  /*
441
    Check if there is a transaction-capable storage engine besides the
442
    binary log (which is considered a transaction-capable storage engine in
443
    counting total_ha)
444
  */
61 by Brian Aker
Conversion of handler type.
445
  opt_using_transactions= total_ha>(uint32_t)opt_bin_log;
1 by brian
clean slate
446
  savepoint_alloc_size+= sizeof(SAVEPOINT);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
447
  return(error);
1 by brian
clean slate
448
}
449
450
int ha_end()
451
{
452
  int error= 0;
453
454
  /* 
455
    This should be eventualy based  on the graceful shutdown flag.
456
    So if flag is equal to HA_PANIC_CLOSE, the deallocate
457
    the errors.
458
  */
459
  if (ha_finish_errors())
460
    error= 1;
461
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
462
  return(error);
1 by brian
clean slate
463
}
464
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
465
static bool dropdb_handlerton(THD *unused1 __attribute__((unused)),
149 by Brian Aker
More bool conversion.
466
                              plugin_ref plugin,
467
                              void *path)
1 by brian
clean slate
468
{
469
  handlerton *hton= plugin_data(plugin, handlerton *);
470
  if (hton->state == SHOW_OPTION_YES && hton->drop_database)
471
    hton->drop_database(hton, (char *)path);
56 by brian
Next pass of true/false update.
472
  return false;
1 by brian
clean slate
473
}
474
475
476
void ha_drop_database(char* path)
477
{
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
478
  plugin_foreach(NULL, dropdb_handlerton, DRIZZLE_STORAGE_ENGINE_PLUGIN, path);
1 by brian
clean slate
479
}
480
481
149 by Brian Aker
More bool conversion.
482
static bool closecon_handlerton(THD *thd, plugin_ref plugin,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
483
                                void *unused __attribute__((unused)))
1 by brian
clean slate
484
{
485
  handlerton *hton= plugin_data(plugin, handlerton *);
486
  /*
487
    there's no need to rollback here as all transactions must
488
    be rolled back already
489
  */
490
  if (hton->state == SHOW_OPTION_YES && hton->close_connection &&
491
      thd_get_ha_data(thd, hton))
492
    hton->close_connection(hton, thd);
56 by brian
Next pass of true/false update.
493
  return false;
1 by brian
clean slate
494
}
495
496
497
/**
498
  @note
499
    don't bother to rollback here, it's done already
500
*/
501
void ha_close_connection(THD* thd)
502
{
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
503
  plugin_foreach(thd, closecon_handlerton, DRIZZLE_STORAGE_ENGINE_PLUGIN, 0);
1 by brian
clean slate
504
}
505
506
/* ========================================================================
507
 ======================= TRANSACTIONS ===================================*/
508
509
/**
510
  Transaction handling in the server
511
  ==================================
512
513
  In each client connection, MySQL maintains two transactional
514
  states:
515
  - a statement transaction,
516
  - a standard, also called normal transaction.
517
518
  Historical note
519
  ---------------
520
  "Statement transaction" is a non-standard term that comes
521
  from the times when MySQL supported BerkeleyDB storage engine.
522
523
  First of all, it should be said that in BerkeleyDB auto-commit
524
  mode auto-commits operations that are atomic to the storage
525
  engine itself, such as a write of a record, and are too
526
  high-granular to be atomic from the application perspective
527
  (MySQL). One SQL statement could involve many BerkeleyDB
528
  auto-committed operations and thus BerkeleyDB auto-commit was of
529
  little use to MySQL.
530
531
  Secondly, instead of SQL standard savepoints, BerkeleyDB
532
  provided the concept of "nested transactions". In a nutshell,
533
  transactions could be arbitrarily nested, but when the parent
534
  transaction was committed or aborted, all its child (nested)
535
  transactions were handled committed or aborted as well.
536
  Commit of a nested transaction, in turn, made its changes
537
  visible, but not durable: it destroyed the nested transaction,
538
  all its changes would become available to the parent and
539
  currently active nested transactions of this parent.
540
541
  So the mechanism of nested transactions was employed to
542
  provide "all or nothing" guarantee of SQL statements
543
  required by the standard.
544
  A nested transaction would be created at start of each SQL
545
  statement, and destroyed (committed or aborted) at statement
546
  end. Such nested transaction was internally referred to as
547
  a "statement transaction" and gave birth to the term.
548
549
  <Historical note ends>
550
551
  Since then a statement transaction is started for each statement
552
  that accesses transactional tables or uses the binary log.  If
553
  the statement succeeds, the statement transaction is committed.
554
  If the statement fails, the transaction is rolled back. Commits
555
  of statement transactions are not durable -- each such
556
  transaction is nested in the normal transaction, and if the
557
  normal transaction is rolled back, the effects of all enclosed
558
  statement transactions are undone as well.  Technically,
559
  a statement transaction can be viewed as a savepoint which is
560
  maintained automatically in order to make effects of one
561
  statement atomic.
562
563
  The normal transaction is started by the user and is ended
564
  usually upon a user request as well. The normal transaction
565
  encloses transactions of all statements issued between
566
  its beginning and its end.
567
  In autocommit mode, the normal transaction is equivalent
568
  to the statement transaction.
569
570
  Since MySQL supports PSEA (pluggable storage engine
571
  architecture), more than one transactional engine can be
572
  active at a time. Hence transactions, from the server
573
  point of view, are always distributed. In particular,
574
  transactional state is maintained independently for each
575
  engine. In order to commit a transaction the two phase
576
  commit protocol is employed.
577
578
  Not all statements are executed in context of a transaction.
579
  Administrative and status information statements do not modify
580
  engine data, and thus do not start a statement transaction and
581
  also have no effect on the normal transaction. Examples of such
582
  statements are SHOW STATUS and RESET SLAVE.
583
584
  Similarly DDL statements are not transactional,
585
  and therefore a transaction is [almost] never started for a DDL
586
  statement. The difference between a DDL statement and a purely
587
  administrative statement though is that a DDL statement always
588
  commits the current transaction before proceeding, if there is
589
  any.
590
591
  At last, SQL statements that work with non-transactional
592
  engines also have no effect on the transaction state of the
593
  connection. Even though they are written to the binary log,
594
  and the binary log is, overall, transactional, the writes
595
  are done in "write-through" mode, directly to the binlog
596
  file, followed with a OS cache sync, in other words,
597
  bypassing the binlog undo log (translog).
598
  They do not commit the current normal transaction.
599
  A failure of a statement that uses non-transactional tables
600
  would cause a rollback of the statement transaction, but
601
  in case there no non-transactional tables are used,
602
  no statement transaction is started.
603
604
  Data layout
605
  -----------
606
607
  The server stores its transaction-related data in
608
  thd->transaction. This structure has two members of type
609
  THD_TRANS. These members correspond to the statement and
610
  normal transactions respectively:
611
612
  - thd->transaction.stmt contains a list of engines
613
  that are participating in the given statement
614
  - thd->transaction.all contains a list of engines that
615
  have participated in any of the statement transactions started
616
  within the context of the normal transaction.
617
  Each element of the list contains a pointer to the storage
618
  engine, engine-specific transactional data, and engine-specific
619
  transaction flags.
620
621
  In autocommit mode thd->transaction.all is empty.
622
  Instead, data of thd->transaction.stmt is
623
  used to commit/rollback the normal transaction.
624
625
  The list of registered engines has a few important properties:
626
  - no engine is registered in the list twice
627
  - engines are present in the list a reverse temporal order --
628
  new participants are always added to the beginning of the list.
629
630
  Transaction life cycle
631
  ----------------------
632
633
  When a new connection is established, thd->transaction
634
  members are initialized to an empty state.
635
  If a statement uses any tables, all affected engines
636
  are registered in the statement engine list. In
637
  non-autocommit mode, the same engines are registered in
638
  the normal transaction list.
639
  At the end of the statement, the server issues a commit
640
  or a roll back for all engines in the statement list.
641
  At this point transaction flags of an engine, if any, are
642
  propagated from the statement list to the list of the normal
643
  transaction.
644
  When commit/rollback is finished, the statement list is
645
  cleared. It will be filled in again by the next statement,
646
  and emptied again at the next statement's end.
647
648
  The normal transaction is committed in a similar way
649
  (by going over all engines in thd->transaction.all list)
650
  but at different times:
651
  - upon COMMIT SQL statement is issued by the user
652
  - implicitly, by the server, at the beginning of a DDL statement
653
  or SET AUTOCOMMIT={0|1} statement.
654
655
  The normal transaction can be rolled back as well:
656
  - if the user has requested so, by issuing ROLLBACK SQL
657
  statement
658
  - if one of the storage engines requested a rollback
659
  by setting thd->transaction_rollback_request. This may
660
  happen in case, e.g., when the transaction in the engine was
661
  chosen a victim of the internal deadlock resolution algorithm
662
  and rolled back internally. When such a situation happens, there
663
  is little the server can do and the only option is to rollback
664
  transactions in all other participating engines.  In this case
665
  the rollback is accompanied by an error sent to the user.
666
667
  As follows from the use cases above, the normal transaction
668
  is never committed when there is an outstanding statement
669
  transaction. In most cases there is no conflict, since
670
  commits of the normal transaction are issued by a stand-alone
671
  administrative or DDL statement, thus no outstanding statement
672
  transaction of the previous statement exists. Besides,
673
  all statements that manipulate with the normal transaction
674
  are prohibited in stored functions and triggers, therefore
675
  no conflicting situation can occur in a sub-statement either.
676
  The remaining rare cases when the server explicitly has
677
  to commit the statement transaction prior to committing the normal
678
  one cover error-handling scenarios (see for example
679
  SQLCOM_LOCK_TABLES).
680
681
  When committing a statement or a normal transaction, the server
682
  either uses the two-phase commit protocol, or issues a commit
683
  in each engine independently. The two-phase commit protocol
684
  is used only if:
685
  - all participating engines support two-phase commit (provide
686
    handlerton::prepare PSEA API call) and
687
  - transactions in at least two engines modify data (i.e. are
688
  not read-only).
689
690
  Note that the two phase commit is used for
691
  statement transactions, even though they are not durable anyway.
692
  This is done to ensure logical consistency of data in a multiple-
693
  engine transaction.
694
  For example, imagine that some day MySQL supports unique
695
  constraint checks deferred till the end of statement. In such
696
  case a commit in one of the engines may yield ER_DUP_KEY,
697
  and MySQL should be able to gracefully abort statement
698
  transactions of other participants.
699
700
  After the normal transaction has been committed,
701
  thd->transaction.all list is cleared.
702
703
  When a connection is closed, the current normal transaction, if
704
  any, is rolled back.
705
706
  Roles and responsibilities
707
  --------------------------
708
709
  The server has no way to know that an engine participates in
710
  the statement and a transaction has been started
711
  in it unless the engine says so. Thus, in order to be
712
  a part of a transaction, the engine must "register" itself.
713
  This is done by invoking trans_register_ha() server call.
714
  Normally the engine registers itself whenever handler::external_lock()
715
  is called. trans_register_ha() can be invoked many times: if
716
  an engine is already registered, the call does nothing.
717
  In case autocommit is not set, the engine must register itself
718
  twice -- both in the statement list and in the normal transaction
719
  list.
720
  In which list to register is a parameter of trans_register_ha().
721
722
  Note, that although the registration interface in itself is
723
  fairly clear, the current usage practice often leads to undesired
724
  effects. E.g. since a call to trans_register_ha() in most engines
725
  is embedded into implementation of handler::external_lock(), some
726
  DDL statements start a transaction (at least from the server
727
  point of view) even though they are not expected to. E.g.
728
  CREATE TABLE does not start a transaction, since
729
  handler::external_lock() is never called during CREATE TABLE. But
730
  CREATE TABLE ... SELECT does, since handler::external_lock() is
731
  called for the table that is being selected from. This has no
732
  practical effects currently, but must be kept in mind
733
  nevertheless.
734
735
  Once an engine is registered, the server will do the rest
736
  of the work.
737
738
  During statement execution, whenever any of data-modifying
739
  PSEA API methods is used, e.g. handler::write_row() or
740
  handler::update_row(), the read-write flag is raised in the
741
  statement transaction for the involved engine.
742
  Currently All PSEA calls are "traced", and the data can not be
743
  changed in a way other than issuing a PSEA call. Important:
744
  unless this invariant is preserved the server will not know that
745
  a transaction in a given engine is read-write and will not
746
  involve the two-phase commit protocol!
747
748
  At the end of a statement, server call
749
  ha_autocommit_or_rollback() is invoked. This call in turn
750
  invokes handlerton::prepare() for every involved engine.
751
  Prepare is followed by a call to handlerton::commit_one_phase()
752
  If a one-phase commit will suffice, handlerton::prepare() is not
753
  invoked and the server only calls handlerton::commit_one_phase().
754
  At statement commit, the statement-related read-write engine
755
  flag is propagated to the corresponding flag in the normal
756
  transaction.  When the commit is complete, the list of registered
757
  engines is cleared.
758
759
  Rollback is handled in a similar fashion.
760
761
  Additional notes on DDL and the normal transaction.
762
  ---------------------------------------------------
763
764
  DDLs and operations with non-transactional engines
765
  do not "register" in thd->transaction lists, and thus do not
766
  modify the transaction state. Besides, each DDL in
767
  MySQL is prefixed with an implicit normal transaction commit
768
  (a call to end_active_trans()), and thus leaves nothing
769
  to modify.
770
  However, as it has been pointed out with CREATE TABLE .. SELECT,
771
  some DDL statements can start a *new* transaction.
772
773
  Behaviour of the server in this case is currently badly
774
  defined.
775
  DDL statements use a form of "semantic" logging
776
  to maintain atomicity: if CREATE TABLE .. SELECT failed,
777
  the newly created table is deleted.
778
  In addition, some DDL statements issue interim transaction
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
779
  commits: e.g. ALTER Table issues a commit after data is copied
1 by brian
clean slate
780
  from the original table to the internal temporary table. Other
781
  statements, e.g. CREATE TABLE ... SELECT do not always commit
782
  after itself.
783
  And finally there is a group of DDL statements such as
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
784
  RENAME/DROP Table that doesn't start a new transaction
1 by brian
clean slate
785
  and doesn't commit.
786
787
  This diversity makes it hard to say what will happen if
788
  by chance a stored function is invoked during a DDL --
789
  whether any modifications it makes will be committed or not
790
  is not clear. Fortunately, SQL grammar of few DDLs allows
791
  invocation of a stored function.
792
793
  A consistent behaviour is perhaps to always commit the normal
794
  transaction after all DDLs, just like the statement transaction
795
  is always committed at the end of all statements.
796
*/
797
798
/**
799
  Register a storage engine for a transaction.
800
801
  Every storage engine MUST call this function when it starts
802
  a transaction or a statement (that is it must be called both for the
803
  "beginning of transaction" and "beginning of statement").
804
  Only storage engines registered for the transaction/statement
805
  will know when to commit/rollback it.
806
807
  @note
808
    trans_register_ha is idempotent - storage engine may register many
809
    times per transaction.
810
811
*/
812
void trans_register_ha(THD *thd, bool all, handlerton *ht_arg)
813
{
814
  THD_TRANS *trans;
815
  Ha_trx_info *ha_info;
816
817
  if (all)
818
  {
819
    trans= &thd->transaction.all;
820
    thd->server_status|= SERVER_STATUS_IN_TRANS;
821
  }
822
  else
823
    trans= &thd->transaction.stmt;
824
825
  ha_info= thd->ha_data[ht_arg->slot].ha_info + static_cast<unsigned>(all);
826
827
  if (ha_info->is_started())
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
828
    return; /* already registered, return */
1 by brian
clean slate
829
830
  ha_info->register_ha(trans, ht_arg);
831
832
  trans->no_2pc|=(ht_arg->prepare==0);
833
  if (thd->transaction.xid_state.xid.is_null())
834
    thd->transaction.xid_state.xid.set(thd->query_id);
835
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
836
  return;
1 by brian
clean slate
837
}
838
839
/**
840
  @retval
841
    0   ok
842
  @retval
843
    1   error, transaction was rolled back
844
*/
845
int ha_prepare(THD *thd)
846
{
847
  int error=0, all=1;
848
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
849
  Ha_trx_info *ha_info= trans->ha_list;
850
  if (ha_info)
851
  {
852
    for (; ha_info; ha_info= ha_info->next())
853
    {
854
      int err;
855
      handlerton *ht= ha_info->ht();
856
      status_var_increment(thd->status_var.ha_prepare_count);
857
      if (ht->prepare)
858
      {
859
        if ((err= ht->prepare(ht, thd, all)))
860
        {
861
          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
862
          ha_rollback_trans(thd, all);
863
          error=1;
864
          break;
865
        }
866
      }
867
      else
868
      {
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
869
        push_warning_printf(thd, DRIZZLE_ERROR::WARN_LEVEL_WARN,
1 by brian
clean slate
870
                            ER_ILLEGAL_HA, ER(ER_ILLEGAL_HA),
871
                            ha_resolve_storage_engine_name(ht));
872
      }
873
    }
874
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
875
  return(error);
1 by brian
clean slate
876
}
877
878
/**
879
  Check if we can skip the two-phase commit.
880
881
  A helper function to evaluate if two-phase commit is mandatory.
882
  As a side effect, propagates the read-only/read-write flags
883
  of the statement transaction to its enclosing normal transaction.
884
56 by brian
Next pass of true/false update.
885
  @retval true   we must run a two-phase commit. Returned
1 by brian
clean slate
886
                 if we have at least two engines with read-write changes.
56 by brian
Next pass of true/false update.
887
  @retval false  Don't need two-phase commit. Even if we have two
1 by brian
clean slate
888
                 transactional engines, we can run two independent
889
                 commits if changes in one of the engines are read-only.
890
*/
891
892
static
893
bool
894
ha_check_and_coalesce_trx_read_only(THD *thd, Ha_trx_info *ha_list,
895
                                    bool all)
896
{
897
  /* The number of storage engines that have actual changes. */
898
  unsigned rw_ha_count= 0;
899
  Ha_trx_info *ha_info;
900
901
  for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
902
  {
903
    if (ha_info->is_trx_read_write())
904
      ++rw_ha_count;
905
906
    if (! all)
907
    {
908
      Ha_trx_info *ha_info_all= &thd->ha_data[ha_info->ht()->slot].ha_info[1];
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
909
      assert(ha_info != ha_info_all);
1 by brian
clean slate
910
      /*
911
        Merge read-only/read-write information about statement
912
        transaction to its enclosing normal transaction. Do this
913
        only if in a real transaction -- that is, if we know
914
        that ha_info_all is registered in thd->transaction.all.
915
        Since otherwise we only clutter the normal transaction flags.
916
      */
56 by brian
Next pass of true/false update.
917
      if (ha_info_all->is_started()) /* false if autocommit. */
1 by brian
clean slate
918
        ha_info_all->coalesce_trx_with(ha_info);
919
    }
920
    else if (rw_ha_count > 1)
921
    {
922
      /*
923
        It is a normal transaction, so we don't need to merge read/write
924
        information up, and the need for two-phase commit has been
925
        already established. Break the loop prematurely.
926
      */
927
      break;
928
    }
929
  }
930
  return rw_ha_count > 1;
931
}
932
933
934
/**
935
  @retval
936
    0   ok
937
  @retval
938
    1   transaction was rolled back
939
  @retval
940
    2   error during commit, data may be inconsistent
941
942
  @todo
943
    Since we don't support nested statement transactions in 5.0,
944
    we can't commit or rollback stmt transactions while we are inside
945
    stored functions or triggers. So we simply do nothing now.
946
    TODO: This should be fixed in later ( >= 5.1) releases.
947
*/
948
int ha_commit_trans(THD *thd, bool all)
949
{
950
  int error= 0, cookie= 0;
951
  /*
952
    'all' means that this is either an explicit commit issued by
953
    user, or an implicit commit issued by a DDL.
954
  */
955
  THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
956
  bool is_real_trans= all || thd->transaction.all.ha_list == 0;
957
  Ha_trx_info *ha_info= trans->ha_list;
958
  my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
959
960
  /*
961
    We must not commit the normal transaction if a statement
962
    transaction is pending. Otherwise statement transaction
963
    flags will not get propagated to its normal transaction's
964
    counterpart.
965
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
966
  assert(thd->transaction.stmt.ha_list == NULL ||
1 by brian
clean slate
967
              trans == &thd->transaction.stmt);
968
969
  if (thd->in_sub_stmt)
970
  {
971
    /*
972
      Since we don't support nested statement transactions in 5.0,
973
      we can't commit or rollback stmt transactions while we are inside
974
      stored functions or triggers. So we simply do nothing now.
975
      TODO: This should be fixed in later ( >= 5.1) releases.
976
    */
977
    if (!all)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
978
      return(0);
1 by brian
clean slate
979
    /*
980
      We assume that all statements which commit or rollback main transaction
981
      are prohibited inside of stored functions or triggers. So they should
982
      bail out with error even before ha_commit_trans() call. To be 100% safe
983
      let us throw error in non-debug builds.
984
    */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
985
    assert(0);
1 by brian
clean slate
986
    my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
987
    return(2);
1 by brian
clean slate
988
  }
989
  if (ha_info)
990
  {
991
    bool must_2pc;
992
993
    if (is_real_trans && wait_if_global_read_lock(thd, 0, 0))
994
    {
995
      ha_rollback_trans(thd, all);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
996
      return(1);
1 by brian
clean slate
997
    }
998
999
    if (   is_real_trans
1000
        && opt_readonly
1001
        && ! thd->slave_thread
1002
       )
1003
    {
1004
      my_error(ER_OPTION_PREVENTS_STATEMENT, MYF(0), "--read-only");
1005
      ha_rollback_trans(thd, all);
1006
      error= 1;
1007
      goto end;
1008
    }
1009
1010
    must_2pc= ha_check_and_coalesce_trx_read_only(thd, ha_info, all);
1011
1012
    if (!trans->no_2pc && must_2pc)
1013
    {
1014
      for (; ha_info && !error; ha_info= ha_info->next())
1015
      {
1016
        int err;
1017
        handlerton *ht= ha_info->ht();
1018
        /*
1019
          Do not call two-phase commit if this particular
1020
          transaction is read-only. This allows for simpler
1021
          implementation in engines that are always read-only.
1022
        */
1023
        if (! ha_info->is_trx_read_write())
1024
          continue;
1025
        /*
1026
          Sic: we know that prepare() is not NULL since otherwise
1027
          trans->no_2pc would have been set.
1028
        */
1029
        if ((err= ht->prepare(ht, thd, all)))
1030
        {
1031
          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1032
          error= 1;
1033
        }
1034
        status_var_increment(thd->status_var.ha_prepare_count);
1035
      }
1036
      if (error || (is_real_trans && xid &&
1037
                    (error= !(cookie= tc_log->log_xid(thd, xid)))))
1038
      {
1039
        ha_rollback_trans(thd, all);
1040
        error= 1;
1041
        goto end;
1042
      }
1043
    }
1044
    error=ha_commit_one_phase(thd, all) ? (cookie ? 2 : 1) : 0;
1045
    if (cookie)
1046
      tc_log->unlog(cookie, xid);
1047
end:
1048
    if (is_real_trans)
1049
      start_waiting_global_read_lock(thd);
1050
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1051
  return(error);
1 by brian
clean slate
1052
}
1053
1054
/**
1055
  @note
1056
  This function does not care about global read lock. A caller should.
1057
*/
1058
int ha_commit_one_phase(THD *thd, bool all)
1059
{
1060
  int error=0;
1061
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1062
  bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1063
  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1064
  if (ha_info)
1065
  {
1066
    for (; ha_info; ha_info= ha_info_next)
1067
    {
1068
      int err;
1069
      handlerton *ht= ha_info->ht();
1070
      if ((err= ht->commit(ht, thd, all)))
1071
      {
1072
        my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
1073
        error=1;
1074
      }
1075
      status_var_increment(thd->status_var.ha_commit_count);
1076
      ha_info_next= ha_info->next();
1077
      ha_info->reset(); /* keep it conveniently zero-filled */
1078
    }
1079
    trans->ha_list= 0;
1080
    trans->no_2pc=0;
1081
    if (is_real_trans)
1082
      thd->transaction.xid_state.xid.null();
1083
    if (all)
1084
    {
1085
      thd->variables.tx_isolation=thd->session_tx_isolation;
1086
      thd->transaction.cleanup();
1087
    }
1088
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1089
  return(error);
1 by brian
clean slate
1090
}
1091
1092
1093
int ha_rollback_trans(THD *thd, bool all)
1094
{
1095
  int error=0;
1096
  THD_TRANS *trans=all ? &thd->transaction.all : &thd->transaction.stmt;
1097
  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
1098
  bool is_real_trans=all || thd->transaction.all.ha_list == 0;
1099
1100
  /*
1101
    We must not rollback the normal transaction if a statement
1102
    transaction is pending.
1103
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1104
  assert(thd->transaction.stmt.ha_list == NULL ||
1 by brian
clean slate
1105
              trans == &thd->transaction.stmt);
1106
1107
  if (thd->in_sub_stmt)
1108
  {
1109
    /*
1110
      If we are inside stored function or trigger we should not commit or
1111
      rollback current statement transaction. See comment in ha_commit_trans()
1112
      call for more information.
1113
    */
1114
    if (!all)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1115
      return(0);
1116
    assert(0);
1 by brian
clean slate
1117
    my_error(ER_COMMIT_NOT_ALLOWED_IN_SF_OR_TRG, MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1118
    return(1);
1 by brian
clean slate
1119
  }
1120
  if (ha_info)
1121
  {
1122
    for (; ha_info; ha_info= ha_info_next)
1123
    {
1124
      int err;
1125
      handlerton *ht= ha_info->ht();
1126
      if ((err= ht->rollback(ht, thd, all)))
1127
      { // cannot happen
1128
        my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1129
        error=1;
1130
      }
1131
      status_var_increment(thd->status_var.ha_rollback_count);
1132
      ha_info_next= ha_info->next();
1133
      ha_info->reset(); /* keep it conveniently zero-filled */
1134
    }
1135
    trans->ha_list= 0;
1136
    trans->no_2pc=0;
1137
    if (is_real_trans)
1138
      thd->transaction.xid_state.xid.null();
1139
    if (all)
1140
    {
1141
      thd->variables.tx_isolation=thd->session_tx_isolation;
1142
      thd->transaction.cleanup();
1143
    }
1144
  }
1145
  if (all)
56 by brian
Next pass of true/false update.
1146
    thd->transaction_rollback_request= false;
1 by brian
clean slate
1147
1148
  /*
1149
    If a non-transactional table was updated, warn; don't warn if this is a
1150
    slave thread (because when a slave thread executes a ROLLBACK, it has
1151
    been read from the binary log, so it's 100% sure and normal to produce
1152
    error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
1153
    slave SQL thread, it would not stop the thread but just be printed in
1154
    the error log; but we don't want users to wonder why they have this
1155
    message in the error log, so we don't send it.
1156
  */
1157
  if (is_real_trans && thd->transaction.all.modified_non_trans_table &&
1158
      !thd->slave_thread && thd->killed != THD::KILL_CONNECTION)
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
1159
    push_warning(thd, DRIZZLE_ERROR::WARN_LEVEL_WARN,
1 by brian
clean slate
1160
                 ER_WARNING_NOT_COMPLETE_ROLLBACK,
1161
                 ER(ER_WARNING_NOT_COMPLETE_ROLLBACK));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1162
  return(error);
1 by brian
clean slate
1163
}
1164
1165
/**
1166
  This is used to commit or rollback a single statement depending on
1167
  the value of error.
1168
1169
  @note
1170
    Note that if the autocommit is on, then the following call inside
1171
    InnoDB will commit or rollback the whole transaction (= the statement). The
1172
    autocommit mechanism built into InnoDB is based on counting locks, but if
1173
    the user has used LOCK TABLES then that mechanism does not know to do the
1174
    commit.
1175
*/
1176
int ha_autocommit_or_rollback(THD *thd, int error)
1177
{
1178
  if (thd->transaction.stmt.ha_list)
1179
  {
1180
    if (!error)
1181
    {
1182
      if (ha_commit_trans(thd, 0))
1183
	error=1;
1184
    }
1185
    else 
1186
    {
1187
      (void) ha_rollback_trans(thd, 0);
1188
      if (thd->transaction_rollback_request && !thd->in_sub_stmt)
1189
        (void) ha_rollback(thd);
1190
    }
1191
1192
    thd->variables.tx_isolation=thd->session_tx_isolation;
1193
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1194
  return(error);
1 by brian
clean slate
1195
}
1196
1197
1198
struct xahton_st {
1199
  XID *xid;
1200
  int result;
1201
};
1202
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1203
static bool xacommit_handlerton(THD *unused1 __attribute__((unused)),
149 by Brian Aker
More bool conversion.
1204
                                plugin_ref plugin,
1205
                                void *arg)
1 by brian
clean slate
1206
{
1207
  handlerton *hton= plugin_data(plugin, handlerton *);
1208
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1209
  {
1210
    hton->commit_by_xid(hton, ((struct xahton_st *)arg)->xid);
1211
    ((struct xahton_st *)arg)->result= 0;
1212
  }
56 by brian
Next pass of true/false update.
1213
  return false;
1 by brian
clean slate
1214
}
1215
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1216
static bool xarollback_handlerton(THD *unused1 __attribute__((unused)),
149 by Brian Aker
More bool conversion.
1217
                                  plugin_ref plugin,
1218
                                  void *arg)
1 by brian
clean slate
1219
{
1220
  handlerton *hton= plugin_data(plugin, handlerton *);
1221
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1222
  {
1223
    hton->rollback_by_xid(hton, ((struct xahton_st *)arg)->xid);
1224
    ((struct xahton_st *)arg)->result= 0;
1225
  }
56 by brian
Next pass of true/false update.
1226
  return false;
1 by brian
clean slate
1227
}
1228
1229
1230
int ha_commit_or_rollback_by_xid(XID *xid, bool commit)
1231
{
1232
  struct xahton_st xaop;
1233
  xaop.xid= xid;
1234
  xaop.result= 1;
1235
1236
  plugin_foreach(NULL, commit ? xacommit_handlerton : xarollback_handlerton,
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
1237
                 DRIZZLE_STORAGE_ENGINE_PLUGIN, &xaop);
1 by brian
clean slate
1238
1239
  return xaop.result;
1240
}
1241
1242
/**
1243
  recover() step of xa.
1244
1245
  @note
1246
    there are three modes of operation:
1247
    - automatic recover after a crash
1248
    in this case commit_list != 0, tc_heuristic_recover==0
1249
    all xids from commit_list are committed, others are rolled back
1250
    - manual (heuristic) recover
1251
    in this case commit_list==0, tc_heuristic_recover != 0
1252
    DBA has explicitly specified that all prepared transactions should
1253
    be committed (or rolled back).
1254
    - no recovery (MySQL did not detect a crash)
1255
    in this case commit_list==0, tc_heuristic_recover == 0
1256
    there should be no prepared transactions in this case.
1257
*/
1258
struct xarecover_st
1259
{
1260
  int len, found_foreign_xids, found_my_xids;
1261
  XID *list;
1262
  HASH *commit_list;
1263
  bool dry_run;
1264
};
1265
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1266
static bool xarecover_handlerton(THD *unused __attribute__((unused)),
149 by Brian Aker
More bool conversion.
1267
                                 plugin_ref plugin,
1268
                                 void *arg)
1 by brian
clean slate
1269
{
1270
  handlerton *hton= plugin_data(plugin, handlerton *);
1271
  struct xarecover_st *info= (struct xarecover_st *) arg;
1272
  int got;
1273
1274
  if (hton->state == SHOW_OPTION_YES && hton->recover)
1275
  {
1276
    while ((got= hton->recover(hton, info->list, info->len)) > 0 )
1277
    {
338 by Monty Taylor
Tagged more strings.
1278
      sql_print_information(_("Found %d prepared transaction(s) in %s"),
1 by brian
clean slate
1279
                            got, ha_resolve_storage_engine_name(hton));
1280
      for (int i=0; i < got; i ++)
1281
      {
1282
        my_xid x=info->list[i].get_my_xid();
1283
        if (!x) // not "mine" - that is generated by external TM
1284
        {
1285
          xid_cache_insert(info->list+i, XA_PREPARED);
1286
          info->found_foreign_xids++;
1287
          continue;
1288
        }
1289
        if (info->dry_run)
1290
        {
1291
          info->found_my_xids++;
1292
          continue;
1293
        }
1294
        // recovery mode
1295
        if (info->commit_list ?
1296
            hash_search(info->commit_list, (uchar *)&x, sizeof(x)) != 0 :
1297
            tc_heuristic_recover == TC_HEURISTIC_RECOVER_COMMIT)
1298
        {
1299
          hton->commit_by_xid(hton, info->list+i);
1300
        }
1301
        else
1302
        {
1303
          hton->rollback_by_xid(hton, info->list+i);
1304
        }
1305
      }
1306
      if (got < info->len)
1307
        break;
1308
    }
1309
  }
56 by brian
Next pass of true/false update.
1310
  return false;
1 by brian
clean slate
1311
}
1312
1313
int ha_recover(HASH *commit_list)
1314
{
1315
  struct xarecover_st info;
1316
  info.found_foreign_xids= info.found_my_xids= 0;
1317
  info.commit_list= commit_list;
1318
  info.dry_run= (info.commit_list==0 && tc_heuristic_recover==0);
1319
  info.list= NULL;
1320
1321
  /* commit_list and tc_heuristic_recover cannot be set both */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1322
  assert(info.commit_list==0 || tc_heuristic_recover==0);
1 by brian
clean slate
1323
  /* if either is set, total_ha_2pc must be set too */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1324
  assert(info.dry_run || total_ha_2pc>(uint32_t)opt_bin_log);
1 by brian
clean slate
1325
61 by Brian Aker
Conversion of handler type.
1326
  if (total_ha_2pc <= (uint32_t)opt_bin_log)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1327
    return(0);
1 by brian
clean slate
1328
1329
  if (info.commit_list)
338 by Monty Taylor
Tagged more strings.
1330
    sql_print_information(_("Starting crash recovery..."));
1 by brian
clean slate
1331
1332
1333
#ifndef WILL_BE_DELETED_LATER
1334
1335
  /*
1336
    for now, only InnoDB supports 2pc. It means we can always safely
1337
    rollback all pending transactions, without risking inconsistent data
1338
  */
1339
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1340
  assert(total_ha_2pc == (uint32_t) opt_bin_log+1); // only InnoDB and binlog
1 by brian
clean slate
1341
  tc_heuristic_recover= TC_HEURISTIC_RECOVER_ROLLBACK; // forcing ROLLBACK
56 by brian
Next pass of true/false update.
1342
  info.dry_run=false;
1 by brian
clean slate
1343
#endif
1344
1345
1346
  for (info.len= MAX_XID_LIST_SIZE ; 
1347
       info.list==0 && info.len > MIN_XID_LIST_SIZE; info.len/=2)
1348
  {
1349
    info.list=(XID *)my_malloc(info.len*sizeof(XID), MYF(0));
1350
  }
1351
  if (!info.list)
1352
  {
1353
    sql_print_error(ER(ER_OUTOFMEMORY), info.len*sizeof(XID));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1354
    return(1);
1 by brian
clean slate
1355
  }
1356
1357
  plugin_foreach(NULL, xarecover_handlerton, 
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
1358
                 DRIZZLE_STORAGE_ENGINE_PLUGIN, &info);
1 by brian
clean slate
1359
1360
  my_free((uchar*)info.list, MYF(0));
1361
  if (info.found_foreign_xids)
338 by Monty Taylor
Tagged more strings.
1362
    sql_print_warning(_("Found %d prepared XA transactions"), 
1 by brian
clean slate
1363
                      info.found_foreign_xids);
1364
  if (info.dry_run && info.found_my_xids)
1365
  {
338 by Monty Taylor
Tagged more strings.
1366
    sql_print_error(_("Found %d prepared transactions! It means that drizzled "
1367
                    "was not shut down properly last time and critical "
1368
                    "recovery information (last binlog or %s file) was "
1369
                    "manually deleted after a crash. You have to start "
1370
                    "drizzled with the --tc-heuristic-recover switch to "
1371
                    "commit or rollback pending transactions."),
1 by brian
clean slate
1372
                    info.found_my_xids, opt_tc_log_file);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1373
    return(1);
1 by brian
clean slate
1374
  }
1375
  if (info.commit_list)
338 by Monty Taylor
Tagged more strings.
1376
    sql_print_information(_("Crash recovery finished."));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1377
  return(0);
1 by brian
clean slate
1378
}
1379
1380
/**
1381
  return the list of XID's to a client, the same way SHOW commands do.
1382
1383
  @note
1384
    I didn't find in XA specs that an RM cannot return the same XID twice,
1385
    so mysql_xa_recover does not filter XID's to ensure uniqueness.
1386
    It can be easily fixed later, if necessary.
1387
*/
1388
bool mysql_xa_recover(THD *thd)
1389
{
1390
  List<Item> field_list;
1391
  Protocol *protocol= thd->protocol;
1392
  int i=0;
1393
  XID_STATE *xs;
1394
1395
  field_list.push_back(new Item_int("formatID", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1396
  field_list.push_back(new Item_int("gtrid_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1397
  field_list.push_back(new Item_int("bqual_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
1398
  field_list.push_back(new Item_empty_string("data",XIDDATASIZE));
1399
1400
  if (protocol->send_fields(&field_list,
1401
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1402
    return(1);
1 by brian
clean slate
1403
1404
  pthread_mutex_lock(&LOCK_xid_cache);
1405
  while ((xs= (XID_STATE*)hash_element(&xid_cache, i++)))
1406
  {
1407
    if (xs->xa_state==XA_PREPARED)
1408
    {
1409
      protocol->prepare_for_resend();
152 by Brian Aker
longlong replacement
1410
      protocol->store_int64_t((int64_t)xs->xid.formatID, false);
1411
      protocol->store_int64_t((int64_t)xs->xid.gtrid_length, false);
1412
      protocol->store_int64_t((int64_t)xs->xid.bqual_length, false);
1 by brian
clean slate
1413
      protocol->store(xs->xid.data, xs->xid.gtrid_length+xs->xid.bqual_length,
1414
                      &my_charset_bin);
1415
      if (protocol->write())
1416
      {
1417
        pthread_mutex_unlock(&LOCK_xid_cache);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1418
        return(1);
1 by brian
clean slate
1419
      }
1420
    }
1421
  }
1422
1423
  pthread_mutex_unlock(&LOCK_xid_cache);
1424
  my_eof(thd);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1425
  return(0);
1 by brian
clean slate
1426
}
1427
1428
/**
1429
  @details
1430
  This function should be called when MySQL sends rows of a SELECT result set
1431
  or the EOF mark to the client. It releases a possible adaptive hash index
1432
  S-latch held by thd in InnoDB and also releases a possible InnoDB query
1433
  FIFO ticket to enter InnoDB. To save CPU time, InnoDB allows a thd to
1434
  keep them over several calls of the InnoDB handler interface when a join
1435
  is executed. But when we let the control to pass to the client they have
1436
  to be released because if the application program uses mysql_use_result(),
1437
  it may deadlock on the S-latch if the application on another connection
1438
  performs another SQL query. In MySQL-4.1 this is even more important because
1439
  there a connection can have several SELECT queries open at the same time.
1440
1441
  @param thd           the thread handle of the current connection
1442
1443
  @return
1444
    always 0
1445
*/
149 by Brian Aker
More bool conversion.
1446
static bool release_temporary_latches(THD *thd, plugin_ref plugin,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1447
                                      void *unused __attribute__((unused)))
1 by brian
clean slate
1448
{
1449
  handlerton *hton= plugin_data(plugin, handlerton *);
1450
1451
  if (hton->state == SHOW_OPTION_YES && hton->release_temporary_latches)
1452
    hton->release_temporary_latches(hton, thd);
1453
56 by brian
Next pass of true/false update.
1454
  return false;
1 by brian
clean slate
1455
}
1456
1457
1458
int ha_release_temporary_latches(THD *thd)
1459
{
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
1460
  plugin_foreach(thd, release_temporary_latches, DRIZZLE_STORAGE_ENGINE_PLUGIN, 
1 by brian
clean slate
1461
                 NULL);
1462
1463
  return 0;
1464
}
1465
1466
int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv)
1467
{
1468
  int error=0;
1469
  THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
1470
                                        &thd->transaction.all);
1471
  Ha_trx_info *ha_info, *ha_info_next;
1472
1473
  trans->no_2pc=0;
1474
  /*
1475
    rolling back to savepoint in all storage engines that were part of the
1476
    transaction when the savepoint was set
1477
  */
1478
  for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
1479
  {
1480
    int err;
1481
    handlerton *ht= ha_info->ht();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1482
    assert(ht);
1483
    assert(ht->savepoint_set != 0);
1 by brian
clean slate
1484
    if ((err= ht->savepoint_rollback(ht, thd,
1485
                                     (uchar *)(sv+1)+ht->savepoint_offset)))
1486
    { // cannot happen
1487
      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1488
      error=1;
1489
    }
1490
    status_var_increment(thd->status_var.ha_savepoint_rollback_count);
1491
    trans->no_2pc|= ht->prepare == 0;
1492
  }
1493
  /*
1494
    rolling back the transaction in all storage engines that were not part of
1495
    the transaction when the savepoint was set
1496
  */
1497
  for (ha_info= trans->ha_list; ha_info != sv->ha_list;
1498
       ha_info= ha_info_next)
1499
  {
1500
    int err;
1501
    handlerton *ht= ha_info->ht();
1502
    if ((err= ht->rollback(ht, thd, !thd->in_sub_stmt)))
1503
    { // cannot happen
1504
      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
1505
      error=1;
1506
    }
1507
    status_var_increment(thd->status_var.ha_rollback_count);
1508
    ha_info_next= ha_info->next();
1509
    ha_info->reset(); /* keep it conveniently zero-filled */
1510
  }
1511
  trans->ha_list= sv->ha_list;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1512
  return(error);
1 by brian
clean slate
1513
}
1514
1515
/**
1516
  @note
1517
  according to the sql standard (ISO/IEC 9075-2:2003)
1518
  section "4.33.4 SQL-statements and transaction states",
1519
  SAVEPOINT is *not* transaction-initiating SQL-statement
1520
*/
1521
int ha_savepoint(THD *thd, SAVEPOINT *sv)
1522
{
1523
  int error=0;
1524
  THD_TRANS *trans= (thd->in_sub_stmt ? &thd->transaction.stmt :
1525
                                        &thd->transaction.all);
1526
  Ha_trx_info *ha_info= trans->ha_list;
1527
  for (; ha_info; ha_info= ha_info->next())
1528
  {
1529
    int err;
1530
    handlerton *ht= ha_info->ht();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1531
    assert(ht);
1 by brian
clean slate
1532
    if (! ht->savepoint_set)
1533
    {
1534
      my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
1535
      error=1;
1536
      break;
1537
    }
1538
    if ((err= ht->savepoint_set(ht, thd, (uchar *)(sv+1)+ht->savepoint_offset)))
1539
    { // cannot happen
1540
      my_error(ER_GET_ERRNO, MYF(0), err);
1541
      error=1;
1542
    }
1543
    status_var_increment(thd->status_var.ha_savepoint_count);
1544
  }
1545
  /*
1546
    Remember the list of registered storage engines. All new
1547
    engines are prepended to the beginning of the list.
1548
  */
1549
  sv->ha_list= trans->ha_list;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1550
  return(error);
1 by brian
clean slate
1551
}
1552
1553
int ha_release_savepoint(THD *thd, SAVEPOINT *sv)
1554
{
1555
  int error=0;
1556
  Ha_trx_info *ha_info= sv->ha_list;
1557
1558
  for (; ha_info; ha_info= ha_info->next())
1559
  {
1560
    int err;
1561
    handlerton *ht= ha_info->ht();
1562
    /* Savepoint life time is enclosed into transaction life time. */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1563
    assert(ht);
1 by brian
clean slate
1564
    if (!ht->savepoint_release)
1565
      continue;
1566
    if ((err= ht->savepoint_release(ht, thd,
1567
                                    (uchar *)(sv+1) + ht->savepoint_offset)))
1568
    { // cannot happen
1569
      my_error(ER_GET_ERRNO, MYF(0), err);
1570
      error=1;
1571
    }
1572
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1573
  return(error);
1 by brian
clean slate
1574
}
1575
1576
149 by Brian Aker
More bool conversion.
1577
static bool snapshot_handlerton(THD *thd, plugin_ref plugin, void *arg)
1 by brian
clean slate
1578
{
1579
  handlerton *hton= plugin_data(plugin, handlerton *);
1580
  if (hton->state == SHOW_OPTION_YES &&
1581
      hton->start_consistent_snapshot)
1582
  {
1583
    hton->start_consistent_snapshot(hton, thd);
1584
    *((bool *)arg)= false;
1585
  }
56 by brian
Next pass of true/false update.
1586
  return false;
1 by brian
clean slate
1587
}
1588
1589
int ha_start_consistent_snapshot(THD *thd)
1590
{
1591
  bool warn= true;
1592
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
1593
  plugin_foreach(thd, snapshot_handlerton, DRIZZLE_STORAGE_ENGINE_PLUGIN, &warn);
1 by brian
clean slate
1594
1595
  /*
1596
    Same idea as when one wants to CREATE TABLE in one engine which does not
1597
    exist:
1598
  */
1599
  if (warn)
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
1600
    push_warning(thd, DRIZZLE_ERROR::WARN_LEVEL_WARN, ER_UNKNOWN_ERROR,
1 by brian
clean slate
1601
                 "This MySQL server does not support any "
1602
                 "consistent-read capable storage engine");
1603
  return 0;
1604
}
1605
1606
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1607
static bool flush_handlerton(THD *thd __attribute__((unused)),
149 by Brian Aker
More bool conversion.
1608
                             plugin_ref plugin,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1609
                             void *arg __attribute__((unused)))
1 by brian
clean slate
1610
{
1611
  handlerton *hton= plugin_data(plugin, handlerton *);
1612
  if (hton->state == SHOW_OPTION_YES && hton->flush_logs && 
1613
      hton->flush_logs(hton))
56 by brian
Next pass of true/false update.
1614
    return true;
1615
  return false;
1 by brian
clean slate
1616
}
1617
1618
1619
bool ha_flush_logs(handlerton *db_type)
1620
{
1621
  if (db_type == NULL)
1622
  {
1623
    if (plugin_foreach(NULL, flush_handlerton,
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
1624
                          DRIZZLE_STORAGE_ENGINE_PLUGIN, 0))
56 by brian
Next pass of true/false update.
1625
      return true;
1 by brian
clean slate
1626
  }
1627
  else
1628
  {
1629
    if (db_type->state != SHOW_OPTION_YES ||
1630
        (db_type->flush_logs && db_type->flush_logs(db_type)))
56 by brian
Next pass of true/false update.
1631
      return true;
1 by brian
clean slate
1632
  }
56 by brian
Next pass of true/false update.
1633
  return false;
1 by brian
clean slate
1634
}
1635
1636
static const char *check_lowercase_names(handler *file, const char *path,
1637
                                         char *tmp_path)
1638
{
1639
  if (lower_case_table_names != 2 || (file->ha_table_flags() & HA_FILE_BASED))
1640
    return path;
1641
1642
  /* Ensure that table handler get path in lower case */
1643
  if (tmp_path != path)
266.1.21 by Monty Taylor
Removed references to strmov and strnmov
1644
    stpcpy(tmp_path, path);
1 by brian
clean slate
1645
1646
  /*
1647
    we only should turn into lowercase database/table part
1648
    so start the process after homedirectory
1649
  */
1650
  my_casedn_str(files_charset_info, tmp_path + mysql_data_home_len);
1651
  return tmp_path;
1652
}
1653
1654
1655
/**
1656
  An interceptor to hijack the text of the error message without
1657
  setting an error in the thread. We need the text to present it
1658
  in the form of a warning to the user.
1659
*/
1660
1661
struct Ha_delete_table_error_handler: public Internal_error_handler
1662
{
1663
public:
1664
  virtual bool handle_error(uint sql_errno,
1665
                            const char *message,
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
1666
                            DRIZZLE_ERROR::enum_warning_level level,
1 by brian
clean slate
1667
                            THD *thd);
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
1668
  char buff[DRIZZLE_ERRMSG_SIZE];
1 by brian
clean slate
1669
};
1670
1671
1672
bool
1673
Ha_delete_table_error_handler::
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1674
handle_error(uint sql_errno  __attribute__((unused)),
1 by brian
clean slate
1675
             const char *message,
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
1676
             DRIZZLE_ERROR::enum_warning_level level __attribute__((unused)),
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
1677
             THD *thd __attribute__((unused)))
1 by brian
clean slate
1678
{
1679
  /* Grab the error message */
1680
  strmake(buff, message, sizeof(buff)-1);
56 by brian
Next pass of true/false update.
1681
  return true;
1 by brian
clean slate
1682
}
1683
1684
1685
/**
1686
  This should return ENOENT if the file doesn't exists.
1687
  The .frm file will be deleted only if we return 0 or ENOENT
1688
*/
1689
int ha_delete_table(THD *thd, handlerton *table_type, const char *path,
1690
                    const char *db, const char *alias, bool generate_warning)
1691
{
1692
  handler *file;
1693
  char tmp_path[FN_REFLEN];
1694
  int error;
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
1695
  Table dummy_table;
1 by brian
clean slate
1696
  TABLE_SHARE dummy_share;
1697
212.6.6 by Mats Kindahl
Removing redundant use of casts in drizzled/ for memcmp(), memcpy(), memset(), and memmove().
1698
  memset(&dummy_table, 0, sizeof(dummy_table));
1699
  memset(&dummy_share, 0, sizeof(dummy_share));
1 by brian
clean slate
1700
  dummy_table.s= &dummy_share;
1701
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
1702
  /* DB_TYPE_UNKNOWN is used in ALTER Table when renaming only .frm files */
1 by brian
clean slate
1703
  if (table_type == NULL ||
1704
      ! (file=get_new_handler((TABLE_SHARE*)0, thd->mem_root, table_type)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1705
    return(ENOENT);
1 by brian
clean slate
1706
1707
  path= check_lowercase_names(file, path, tmp_path);
1708
  if ((error= file->ha_delete_table(path)) && generate_warning)
1709
  {
1710
    /*
1711
      Because file->print_error() use my_error() to generate the error message
1712
      we use an internal error handler to intercept it and store the text
1713
      in a temporary buffer. Later the message will be presented to user
1714
      as a warning.
1715
    */
1716
    Ha_delete_table_error_handler ha_delete_table_error_handler;
1717
1718
    /* Fill up strucutures that print_error may need */
1719
    dummy_share.path.str= (char*) path;
1720
    dummy_share.path.length= strlen(path);
1721
    dummy_share.db.str= (char*) db;
1722
    dummy_share.db.length= strlen(db);
1723
    dummy_share.table_name.str= (char*) alias;
1724
    dummy_share.table_name.length= strlen(alias);
1725
    dummy_table.alias= alias;
1726
1727
    file->change_table_ptr(&dummy_table, &dummy_share);
1728
1729
    thd->push_internal_handler(&ha_delete_table_error_handler);
1730
    file->print_error(error, 0);
1731
1732
    thd->pop_internal_handler();
1733
1734
    /*
1735
      XXX: should we convert *all* errors to warnings here?
1736
      What if the error is fatal?
1737
    */
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
1738
    push_warning(thd, DRIZZLE_ERROR::WARN_LEVEL_ERROR, error,
1 by brian
clean slate
1739
                ha_delete_table_error_handler.buff);
1740
  }
1741
  delete file;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1742
  return(error);
1 by brian
clean slate
1743
}
1744
1745
/****************************************************************************
1746
** General handler functions
1747
****************************************************************************/
1748
handler *handler::clone(MEM_ROOT *mem_root)
1749
{
1750
  handler *new_handler= get_new_handler(table->s, mem_root, table->s->db_type());
1751
  /*
1752
    Allocate handler->ref here because otherwise ha_open will allocate it
1753
    on this->table->mem_root and we will not be able to reclaim that memory 
1754
    when the clone handler object is destroyed.
1755
  */
1756
  if (!(new_handler->ref= (uchar*) alloc_root(mem_root, ALIGN_SIZE(ref_length)*2)))
1757
    return NULL;
1758
  if (new_handler && !new_handler->ha_open(table,
1759
                                           table->s->normalized_path.str,
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
1760
                                           table->getDBStat(),
1 by brian
clean slate
1761
                                           HA_OPEN_IGNORE_IF_LOCKED))
1762
    return new_handler;
1763
  return NULL;
1764
}
1765
1766
1767
1768
void handler::ha_statistic_increment(ulong SSV::*offset) const
1769
{
1770
  status_var_increment(table->in_use->status_var.*offset);
1771
}
1772
1773
void **handler::ha_data(THD *thd) const
1774
{
1775
  return thd_ha_data(thd, ht);
1776
}
1777
1778
THD *handler::ha_thd(void) const
1779
{
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1780
  assert(!table || !table->in_use || table->in_use == current_thd);
1 by brian
clean slate
1781
  return (table && table->in_use) ? table->in_use : current_thd;
1782
}
1783
1784
/**
1785
  Open database-handler.
1786
1787
  Try O_RDONLY if cannot open as O_RDWR
1788
  Don't wait for locks if not HA_OPEN_WAIT_IF_LOCKED is set
1789
*/
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
1790
int handler::ha_open(Table *table_arg, const char *name, int mode,
1 by brian
clean slate
1791
                     int test_if_locked)
1792
{
1793
  int error;
1794
1795
  table= table_arg;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1796
  assert(table->s == table_share);
1797
  assert(alloc_root_inited(&table->mem_root));
1 by brian
clean slate
1798
1799
  if ((error=open(name,mode,test_if_locked)))
1800
  {
1801
    if ((error == EACCES || error == EROFS) && mode == O_RDWR &&
1802
	(table->db_stat & HA_TRY_READ_ONLY))
1803
    {
1804
      table->db_stat|=HA_READ_ONLY;
1805
      error=open(name,O_RDONLY,test_if_locked);
1806
    }
1807
  }
1808
  if (error)
1809
  {
1810
    my_errno= error;                            /* Safeguard */
1811
  }
1812
  else
1813
  {
1814
    if (table->s->db_options_in_use & HA_OPTION_READ_ONLY_DATA)
1815
      table->db_stat|=HA_READ_ONLY;
1816
    (void) extra(HA_EXTRA_NO_READCHECK);	// Not needed in SQL
1817
1818
    /* ref is already allocated for us if we're called from handler::clone() */
1819
    if (!ref && !(ref= (uchar*) alloc_root(&table->mem_root, 
1820
                                          ALIGN_SIZE(ref_length)*2)))
1821
    {
1822
      close();
1823
      error=HA_ERR_OUT_OF_MEM;
1824
    }
1825
    else
1826
      dup_ref=ref+ALIGN_SIZE(ref_length);
1827
    cached_table_flags= table_flags();
1828
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1829
  return(error);
1 by brian
clean slate
1830
}
1831
1832
/**
1833
  one has to use this method when to find
1834
  random position by record as the plain
1835
  position() call doesn't work for some
1836
  handlers for random position
1837
*/
1838
1839
int handler::rnd_pos_by_record(uchar *record)
1840
{
1841
  register int error;
1842
1843
  position(record);
1844
  if (inited && (error= ha_index_end()))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1845
    return(error);
56 by brian
Next pass of true/false update.
1846
  if ((error= ha_rnd_init(false)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1847
    return(error);
1 by brian
clean slate
1848
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1849
  return(rnd_pos(record, ref));
1 by brian
clean slate
1850
}
1851
1852
/**
1853
  Read first row (only) from a table.
1854
1855
  This is never called for InnoDB tables, as these table types
1856
  has the HA_STATS_RECORDS_IS_EXACT set.
1857
*/
1858
int handler::read_first_row(uchar * buf, uint primary_key)
1859
{
1860
  register int error;
1861
1862
  ha_statistic_increment(&SSV::ha_read_first_count);
1863
1864
  /*
1865
    If there is very few deleted rows in the table, find the first row by
1866
    scanning the table.
1867
    TODO remove the test for HA_READ_ORDER
1868
  */
1869
  if (stats.deleted < 10 || primary_key >= MAX_KEY ||
1870
      !(index_flags(primary_key, 0, 0) & HA_READ_ORDER))
1871
  {
1872
    (void) ha_rnd_init(1);
1873
    while ((error= rnd_next(buf)) == HA_ERR_RECORD_DELETED) ;
1874
    (void) ha_rnd_end();
1875
  }
1876
  else
1877
  {
1878
    /* Find the first row through the primary key */
1879
    (void) ha_index_init(primary_key, 0);
1880
    error=index_first(buf);
1881
    (void) ha_index_end();
1882
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
1883
  return(error);
1 by brian
clean slate
1884
}
1885
1886
/**
1887
  Generate the next auto-increment number based on increment and offset.
1888
  computes the lowest number
1889
  - strictly greater than "nr"
1890
  - of the form: auto_increment_offset + N * auto_increment_increment
1891
1892
  In most cases increment= offset= 1, in which case we get:
1893
  @verbatim 1,2,3,4,5,... @endverbatim
1894
    If increment=10 and offset=5 and previous number is 1, we get:
1895
  @verbatim 1,5,15,25,35,... @endverbatim
1896
*/
1897
inline uint64_t
1898
compute_next_insert_id(uint64_t nr,struct system_variables *variables)
1899
{
1900
  if (variables->auto_increment_increment == 1)
1901
    return (nr+1); // optimization of the formula below
1902
  nr= (((nr+ variables->auto_increment_increment -
1903
         variables->auto_increment_offset)) /
1904
       (uint64_t) variables->auto_increment_increment);
1905
  return (nr* (uint64_t) variables->auto_increment_increment +
1906
          variables->auto_increment_offset);
1907
}
1908
1909
1910
void handler::adjust_next_insert_id_after_explicit_value(uint64_t nr)
1911
{
1912
  /*
1913
    If we have set THD::next_insert_id previously and plan to insert an
1914
    explicitely-specified value larger than this, we need to increase
1915
    THD::next_insert_id to be greater than the explicit value.
1916
  */
1917
  if ((next_insert_id > 0) && (nr >= next_insert_id))
1918
    set_next_insert_id(compute_next_insert_id(nr, &table->in_use->variables));
1919
}
1920
1921
1922
/**
1923
  Compute a previous insert id
1924
1925
  Computes the largest number X:
1926
  - smaller than or equal to "nr"
1927
  - of the form: auto_increment_offset + N * auto_increment_increment
1928
    where N>=0.
1929
1930
  @param nr            Number to "round down"
1931
  @param variables     variables struct containing auto_increment_increment and
1932
                       auto_increment_offset
1933
1934
  @return
1935
    The number X if it exists, "nr" otherwise.
1936
*/
1937
inline uint64_t
1938
prev_insert_id(uint64_t nr, struct system_variables *variables)
1939
{
1940
  if (unlikely(nr < variables->auto_increment_offset))
1941
  {
1942
    /*
1943
      There's nothing good we can do here. That is a pathological case, where
1944
      the offset is larger than the column's max possible value, i.e. not even
1945
      the first sequence value may be inserted. User will receive warning.
1946
    */
1947
    return nr;
1948
  }
1949
  if (variables->auto_increment_increment == 1)
1950
    return nr; // optimization of the formula below
1951
  nr= (((nr - variables->auto_increment_offset)) /
1952
       (uint64_t) variables->auto_increment_increment);
1953
  return (nr * (uint64_t) variables->auto_increment_increment +
1954
          variables->auto_increment_offset);
1955
}
1956
1957
1958
/**
1959
  Update the auto_increment field if necessary.
1960
1961
  Updates columns with type NEXT_NUMBER if:
1962
1963
  - If column value is set to NULL (in which case
1964
    auto_increment_field_not_null is 0)
1965
  - If column is set to 0 and (sql_mode & MODE_NO_AUTO_VALUE_ON_ZERO) is not
1966
    set. In the future we will only set NEXT_NUMBER fields if one sets them
1967
    to NULL (or they are not included in the insert list).
1968
1969
    In those cases, we check if the currently reserved interval still has
1970
    values we have not used. If yes, we pick the smallest one and use it.
1971
    Otherwise:
1972
1973
  - If a list of intervals has been provided to the statement via SET
1974
    INSERT_ID or via an Intvar_log_event (in a replication slave), we pick the
1975
    first unused interval from this list, consider it as reserved.
1976
1977
  - Otherwise we set the column for the first row to the value
1978
    next_insert_id(get_auto_increment(column))) which is usually
1979
    max-used-column-value+1.
1980
    We call get_auto_increment() for the first row in a multi-row
1981
    statement. get_auto_increment() will tell us the interval of values it
1982
    reserved for us.
1983
1984
  - In both cases, for the following rows we use those reserved values without
1985
    calling the handler again (we just progress in the interval, computing
1986
    each new value from the previous one). Until we have exhausted them, then
1987
    we either take the next provided interval or call get_auto_increment()
1988
    again to reserve a new interval.
1989
1990
  - In both cases, the reserved intervals are remembered in
1991
    thd->auto_inc_intervals_in_cur_stmt_for_binlog if statement-based
1992
    binlogging; the last reserved interval is remembered in
1993
    auto_inc_interval_for_cur_row.
1994
1995
    The idea is that generated auto_increment values are predictable and
1996
    independent of the column values in the table.  This is needed to be
1997
    able to replicate into a table that already has rows with a higher
1998
    auto-increment value than the one that is inserted.
1999
2000
    After we have already generated an auto-increment number and the user
2001
    inserts a column with a higher value than the last used one, we will
2002
    start counting from the inserted value.
2003
2004
    This function's "outputs" are: the table's auto_increment field is filled
2005
    with a value, thd->next_insert_id is filled with the value to use for the
2006
    next row, if a value was autogenerated for the current row it is stored in
2007
    thd->insert_id_for_cur_row, if get_auto_increment() was called
2008
    thd->auto_inc_interval_for_cur_row is modified, if that interval is not
2009
    present in thd->auto_inc_intervals_in_cur_stmt_for_binlog it is added to
2010
    this list.
2011
2012
  @todo
2013
    Replace all references to "next number" or NEXT_NUMBER to
2014
    "auto_increment", everywhere (see below: there is
2015
    table->auto_increment_field_not_null, and there also exists
2016
    table->next_number_field, it's not consistent).
2017
2018
  @retval
2019
    0	ok
2020
  @retval
2021
    HA_ERR_AUTOINC_READ_FAILED  get_auto_increment() was called and
2022
    returned ~(uint64_t) 0
2023
  @retval
2024
    HA_ERR_AUTOINC_ERANGE storing value in field caused strict mode
2025
    failure.
2026
*/
2027
2028
#define AUTO_INC_DEFAULT_NB_ROWS 1 // Some prefer 1024 here
2029
#define AUTO_INC_DEFAULT_NB_MAX_BITS 16
2030
#define AUTO_INC_DEFAULT_NB_MAX ((1 << AUTO_INC_DEFAULT_NB_MAX_BITS) - 1)
2031
2032
int handler::update_auto_increment()
2033
{
2034
  uint64_t nr, nb_reserved_values;
56 by brian
Next pass of true/false update.
2035
  bool append= false;
1 by brian
clean slate
2036
  THD *thd= table->in_use;
2037
  struct system_variables *variables= &thd->variables;
2038
2039
  /*
2040
    next_insert_id is a "cursor" into the reserved interval, it may go greater
2041
    than the interval, but not smaller.
2042
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2043
  assert(next_insert_id >= auto_inc_interval_for_cur_row.minimum());
1 by brian
clean slate
2044
359 by Brian Aker
More modes removed. 0 always becomes new number again
2045
  if ((nr= table->next_number_field->val_int()) != 0)
1 by brian
clean slate
2046
  {
2047
    /*
2048
      Update next_insert_id if we had already generated a value in this
2049
      statement (case of INSERT VALUES(null),(3763),(null):
2050
      the last NULL needs to insert 3764, not the value of the first NULL plus
2051
      1).
2052
    */
2053
    adjust_next_insert_id_after_explicit_value(nr);
2054
    insert_id_for_cur_row= 0; // didn't generate anything
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2055
    return(0);
1 by brian
clean slate
2056
  }
2057
2058
  if ((nr= next_insert_id) >= auto_inc_interval_for_cur_row.maximum())
2059
  {
2060
    /* next_insert_id is beyond what is reserved, so we reserve more. */
2061
    const Discrete_interval *forced=
2062
      thd->auto_inc_intervals_forced.get_next();
2063
    if (forced != NULL)
2064
    {
2065
      nr= forced->minimum();
2066
      nb_reserved_values= forced->values();
2067
    }
2068
    else
2069
    {
2070
      /*
2071
        handler::estimation_rows_to_insert was set by
2072
        handler::ha_start_bulk_insert(); if 0 it means "unknown".
2073
      */
2074
      uint nb_already_reserved_intervals=
2075
        thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements();
2076
      uint64_t nb_desired_values;
2077
      /*
2078
        If an estimation was given to the engine:
2079
        - use it.
2080
        - if we already reserved numbers, it means the estimation was
2081
        not accurate, then we'll reserve 2*AUTO_INC_DEFAULT_NB_ROWS the 2nd
2082
        time, twice that the 3rd time etc.
2083
        If no estimation was given, use those increasing defaults from the
2084
        start, starting from AUTO_INC_DEFAULT_NB_ROWS.
2085
        Don't go beyond a max to not reserve "way too much" (because
2086
        reservation means potentially losing unused values).
2087
      */
2088
      if (nb_already_reserved_intervals == 0 &&
2089
          (estimation_rows_to_insert > 0))
2090
        nb_desired_values= estimation_rows_to_insert;
2091
      else /* go with the increasing defaults */
2092
      {
2093
        /* avoid overflow in formula, with this if() */
2094
        if (nb_already_reserved_intervals <= AUTO_INC_DEFAULT_NB_MAX_BITS)
2095
        {
2096
          nb_desired_values= AUTO_INC_DEFAULT_NB_ROWS * 
2097
            (1 << nb_already_reserved_intervals);
2098
          set_if_smaller(nb_desired_values, AUTO_INC_DEFAULT_NB_MAX);
2099
        }
2100
        else
2101
          nb_desired_values= AUTO_INC_DEFAULT_NB_MAX;
2102
      }
2103
      /* This call ignores all its parameters but nr, currently */
2104
      get_auto_increment(variables->auto_increment_offset,
2105
                         variables->auto_increment_increment,
2106
                         nb_desired_values, &nr,
2107
                         &nb_reserved_values);
2108
      if (nr == ~(uint64_t) 0)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2109
        return(HA_ERR_AUTOINC_READ_FAILED);  // Mark failure
1 by brian
clean slate
2110
      
2111
      /*
2112
        That rounding below should not be needed when all engines actually
2113
        respect offset and increment in get_auto_increment(). But they don't
2114
        so we still do it. Wonder if for the not-first-in-index we should do
2115
        it. Hope that this rounding didn't push us out of the interval; even
2116
        if it did we cannot do anything about it (calling the engine again
2117
        will not help as we inserted no row).
2118
      */
2119
      nr= compute_next_insert_id(nr-1, variables);
2120
    }
2121
    
2122
    if (table->s->next_number_keypart == 0)
2123
    {
2124
      /* We must defer the appending until "nr" has been possibly truncated */
56 by brian
Next pass of true/false update.
2125
      append= true;
1 by brian
clean slate
2126
    }
2127
  }
2128
152 by Brian Aker
longlong replacement
2129
  if (unlikely(table->next_number_field->store((int64_t) nr, true)))
1 by brian
clean slate
2130
  {
2131
    /*
2132
      first test if the query was aborted due to strict mode constraints
2133
    */
2134
    if (thd->killed == THD::KILL_BAD_DATA)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2135
      return(HA_ERR_AUTOINC_ERANGE);
1 by brian
clean slate
2136
2137
    /*
2138
      field refused this value (overflow) and truncated it, use the result of
2139
      the truncation (which is going to be inserted); however we try to
2140
      decrease it to honour auto_increment_* variables.
2141
      That will shift the left bound of the reserved interval, we don't
2142
      bother shifting the right bound (anyway any other value from this
2143
      interval will cause a duplicate key).
2144
    */
2145
    nr= prev_insert_id(table->next_number_field->val_int(), variables);
152 by Brian Aker
longlong replacement
2146
    if (unlikely(table->next_number_field->store((int64_t) nr, true)))
1 by brian
clean slate
2147
      nr= table->next_number_field->val_int();
2148
  }
2149
  if (append)
2150
  {
2151
    auto_inc_interval_for_cur_row.replace(nr, nb_reserved_values,
2152
                                          variables->auto_increment_increment);
2153
    /* Row-based replication does not need to store intervals in binlog */
2154
    if (!thd->current_stmt_binlog_row_based)
2155
        thd->auto_inc_intervals_in_cur_stmt_for_binlog.append(auto_inc_interval_for_cur_row.minimum(),
2156
                                                              auto_inc_interval_for_cur_row.values(),
2157
                                                              variables->auto_increment_increment);
2158
  }
2159
2160
  /*
2161
    Record this autogenerated value. If the caller then
2162
    succeeds to insert this value, it will call
2163
    record_first_successful_insert_id_in_cur_stmt()
2164
    which will set first_successful_insert_id_in_cur_stmt if it's not
2165
    already set.
2166
  */
2167
  insert_id_for_cur_row= nr;
2168
  /*
2169
    Set next insert id to point to next auto-increment value to be able to
2170
    handle multi-row statements.
2171
  */
2172
  set_next_insert_id(compute_next_insert_id(nr, variables));
2173
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2174
  return(0);
1 by brian
clean slate
2175
}
2176
2177
2178
/**
2179
  MySQL signal that it changed the column bitmap
2180
2181
  This is for handlers that needs to setup their own column bitmaps.
2182
  Normally the handler should set up their own column bitmaps in
2183
  index_init() or rnd_init() and in any column_bitmaps_signal() call after
2184
  this.
2185
2186
  The handler is allowed to do changes to the bitmap after a index_init or
2187
  rnd_init() call is made as after this, MySQL will not use the bitmap
2188
  for any program logic checking.
2189
*/
2190
void handler::column_bitmaps_signal()
2191
{
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2192
  return;
1 by brian
clean slate
2193
}
2194
2195
2196
/**
2197
  Reserves an interval of auto_increment values from the handler.
2198
2199
  offset and increment means that we want values to be of the form
2200
  offset + N * increment, where N>=0 is integer.
2201
  If the function sets *first_value to ~(uint64_t)0 it means an error.
163 by Brian Aker
Merge Monty's code.
2202
  If the function sets *nb_reserved_values to UINT64_MAX it means it has
1 by brian
clean slate
2203
  reserved to "positive infinite".
2204
2205
  @param offset
2206
  @param increment
2207
  @param nb_desired_values   how many values we want
2208
  @param first_value         (OUT) the first value reserved by the handler
2209
  @param nb_reserved_values  (OUT) how many values the handler reserved
2210
*/
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
2211
void handler::get_auto_increment(uint64_t offset __attribute__((unused)),
2212
                                 uint64_t increment __attribute__((unused)),
2213
                                 uint64_t nb_desired_values __attribute__((unused)),
1 by brian
clean slate
2214
                                 uint64_t *first_value,
2215
                                 uint64_t *nb_reserved_values)
2216
{
2217
  uint64_t nr;
2218
  int error;
2219
2220
  (void) extra(HA_EXTRA_KEYREAD);
2221
  table->mark_columns_used_by_index_no_reset(table->s->next_number_index,
2222
                                        table->read_set);
2223
  column_bitmaps_signal();
2224
  index_init(table->s->next_number_index, 1);
2225
  if (table->s->next_number_keypart == 0)
2226
  {						// Autoincrement at key-start
2227
    error=index_last(table->record[1]);
2228
    /*
2229
      MySQL implicitely assumes such method does locking (as MySQL decides to
2230
      use nr+increment without checking again with the handler, in
2231
      handler::update_auto_increment()), so reserves to infinite.
2232
    */
163 by Brian Aker
Merge Monty's code.
2233
    *nb_reserved_values= UINT64_MAX;
1 by brian
clean slate
2234
  }
2235
  else
2236
  {
2237
    uchar key[MAX_KEY_LENGTH];
2238
    key_copy(key, table->record[0],
2239
             table->key_info + table->s->next_number_index,
2240
             table->s->next_number_key_offset);
2241
    error= index_read_map(table->record[1], key,
2242
                          make_prev_keypart_map(table->s->next_number_keypart),
2243
                          HA_READ_PREFIX_LAST);
2244
    /*
2245
      MySQL needs to call us for next row: assume we are inserting ("a",null)
2246
      here, we return 3, and next this statement will want to insert
2247
      ("b",null): there is no reason why ("b",3+1) would be the good row to
2248
      insert: maybe it already exists, maybe 3+1 is too large...
2249
    */
2250
    *nb_reserved_values= 1;
2251
  }
2252
2253
  if (error)
2254
    nr=1;
2255
  else
2256
    nr= ((uint64_t) table->next_number_field->
2257
         val_int_offset(table->s->rec_buff_length)+1);
2258
  index_end();
2259
  (void) extra(HA_EXTRA_NO_KEYREAD);
2260
  *first_value= nr;
2261
}
2262
2263
2264
void handler::ha_release_auto_increment()
2265
{
2266
  release_auto_increment();
2267
  insert_id_for_cur_row= 0;
2268
  auto_inc_interval_for_cur_row.replace(0, 0, 0);
2269
  if (next_insert_id > 0)
2270
  {
2271
    next_insert_id= 0;
2272
    /*
2273
      this statement used forced auto_increment values if there were some,
2274
      wipe them away for other statements.
2275
    */
2276
    table->in_use->auto_inc_intervals_forced.empty();
2277
  }
2278
}
2279
2280
2281
void handler::print_keydup_error(uint key_nr, const char *msg)
2282
{
2283
  /* Write the duplicated key in the error message */
2284
  char key[MAX_KEY_LENGTH];
2285
  String str(key,sizeof(key),system_charset_info);
2286
2287
  if (key_nr == MAX_KEY)
2288
  {
2289
    /* Key is unknown */
2290
    str.copy("", 0, system_charset_info);
2291
    my_printf_error(ER_DUP_ENTRY, msg, MYF(0), str.c_ptr(), "*UNKNOWN*");
2292
  }
2293
  else
2294
  {
2295
    /* Table is opened and defined at this point */
2296
    key_unpack(&str,table,(uint) key_nr);
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
2297
    uint max_length=DRIZZLE_ERRMSG_SIZE-(uint) strlen(msg);
1 by brian
clean slate
2298
    if (str.length() >= max_length)
2299
    {
2300
      str.length(max_length-4);
2301
      str.append(STRING_WITH_LEN("..."));
2302
    }
2303
    my_printf_error(ER_DUP_ENTRY, msg,
2304
		    MYF(0), str.c_ptr(), table->key_info[key_nr].name);
2305
  }
2306
}
2307
2308
2309
/**
2310
  Print error that we got from handler function.
2311
2312
  @note
2313
    In case of delete table it's only safe to use the following parts of
2314
    the 'table' structure:
2315
    - table->s->path
2316
    - table->alias
2317
*/
2318
void handler::print_error(int error, myf errflag)
2319
{
2320
  int textno=ER_GET_ERRNO;
2321
  switch (error) {
2322
  case EACCES:
2323
    textno=ER_OPEN_AS_READONLY;
2324
    break;
2325
  case EAGAIN:
2326
    textno=ER_FILE_USED;
2327
    break;
2328
  case ENOENT:
2329
    textno=ER_FILE_NOT_FOUND;
2330
    break;
2331
  case HA_ERR_KEY_NOT_FOUND:
2332
  case HA_ERR_NO_ACTIVE_RECORD:
2333
  case HA_ERR_END_OF_FILE:
2334
    textno=ER_KEY_NOT_FOUND;
2335
    break;
2336
  case HA_ERR_WRONG_MRG_TABLE_DEF:
2337
    textno=ER_WRONG_MRG_TABLE;
2338
    break;
2339
  case HA_ERR_FOUND_DUPP_KEY:
2340
  {
2341
    uint key_nr=get_dup_key(error);
2342
    if ((int) key_nr >= 0)
2343
    {
2344
      print_keydup_error(key_nr, ER(ER_DUP_ENTRY_WITH_KEY_NAME));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2345
      return;
1 by brian
clean slate
2346
    }
2347
    textno=ER_DUP_KEY;
2348
    break;
2349
  }
2350
  case HA_ERR_FOREIGN_DUPLICATE_KEY:
2351
  {
2352
    uint key_nr= get_dup_key(error);
2353
    if ((int) key_nr >= 0)
2354
    {
2355
      uint max_length;
2356
      /* Write the key in the error message */
2357
      char key[MAX_KEY_LENGTH];
2358
      String str(key,sizeof(key),system_charset_info);
2359
      /* Table is opened and defined at this point */
2360
      key_unpack(&str,table,(uint) key_nr);
261.4.1 by Felipe
- Renamed MYSQL_ERROR to DRIZZLE_ERROR.
2361
      max_length= (DRIZZLE_ERRMSG_SIZE-
1 by brian
clean slate
2362
                   (uint) strlen(ER(ER_FOREIGN_DUPLICATE_KEY)));
2363
      if (str.length() >= max_length)
2364
      {
2365
        str.length(max_length-4);
2366
        str.append(STRING_WITH_LEN("..."));
2367
      }
2368
      my_error(ER_FOREIGN_DUPLICATE_KEY, MYF(0), table_share->table_name.str,
2369
        str.c_ptr(), key_nr+1);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2370
      return;
1 by brian
clean slate
2371
    }
2372
    textno= ER_DUP_KEY;
2373
    break;
2374
  }
2375
  case HA_ERR_FOUND_DUPP_UNIQUE:
2376
    textno=ER_DUP_UNIQUE;
2377
    break;
2378
  case HA_ERR_RECORD_CHANGED:
2379
    textno=ER_CHECKREAD;
2380
    break;
2381
  case HA_ERR_CRASHED:
2382
    textno=ER_NOT_KEYFILE;
2383
    break;
2384
  case HA_ERR_WRONG_IN_RECORD:
2385
    textno= ER_CRASHED_ON_USAGE;
2386
    break;
2387
  case HA_ERR_CRASHED_ON_USAGE:
2388
    textno=ER_CRASHED_ON_USAGE;
2389
    break;
2390
  case HA_ERR_NOT_A_TABLE:
2391
    textno= error;
2392
    break;
2393
  case HA_ERR_CRASHED_ON_REPAIR:
2394
    textno=ER_CRASHED_ON_REPAIR;
2395
    break;
2396
  case HA_ERR_OUT_OF_MEM:
2397
    textno=ER_OUT_OF_RESOURCES;
2398
    break;
2399
  case HA_ERR_WRONG_COMMAND:
2400
    textno=ER_ILLEGAL_HA;
2401
    break;
2402
  case HA_ERR_OLD_FILE:
2403
    textno=ER_OLD_KEYFILE;
2404
    break;
2405
  case HA_ERR_UNSUPPORTED:
2406
    textno=ER_UNSUPPORTED_EXTENSION;
2407
    break;
2408
  case HA_ERR_RECORD_FILE_FULL:
2409
  case HA_ERR_INDEX_FILE_FULL:
2410
    textno=ER_RECORD_FILE_FULL;
2411
    break;
2412
  case HA_ERR_LOCK_WAIT_TIMEOUT:
2413
    textno=ER_LOCK_WAIT_TIMEOUT;
2414
    break;
2415
  case HA_ERR_LOCK_TABLE_FULL:
2416
    textno=ER_LOCK_TABLE_FULL;
2417
    break;
2418
  case HA_ERR_LOCK_DEADLOCK:
2419
    textno=ER_LOCK_DEADLOCK;
2420
    break;
2421
  case HA_ERR_READ_ONLY_TRANSACTION:
2422
    textno=ER_READ_ONLY_TRANSACTION;
2423
    break;
2424
  case HA_ERR_CANNOT_ADD_FOREIGN:
2425
    textno=ER_CANNOT_ADD_FOREIGN;
2426
    break;
2427
  case HA_ERR_ROW_IS_REFERENCED:
2428
  {
2429
    String str;
2430
    get_error_message(error, &str);
2431
    my_error(ER_ROW_IS_REFERENCED_2, MYF(0), str.c_ptr_safe());
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2432
    return;
1 by brian
clean slate
2433
  }
2434
  case HA_ERR_NO_REFERENCED_ROW:
2435
  {
2436
    String str;
2437
    get_error_message(error, &str);
2438
    my_error(ER_NO_REFERENCED_ROW_2, MYF(0), str.c_ptr_safe());
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2439
    return;
1 by brian
clean slate
2440
  }
2441
  case HA_ERR_TABLE_DEF_CHANGED:
2442
    textno=ER_TABLE_DEF_CHANGED;
2443
    break;
2444
  case HA_ERR_NO_SUCH_TABLE:
2445
    my_error(ER_NO_SUCH_TABLE, MYF(0), table_share->db.str,
2446
             table_share->table_name.str);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2447
    return;
1 by brian
clean slate
2448
  case HA_ERR_RBR_LOGGING_FAILED:
2449
    textno= ER_BINLOG_ROW_LOGGING_FAILED;
2450
    break;
2451
  case HA_ERR_DROP_INDEX_FK:
2452
  {
2453
    const char *ptr= "???";
2454
    uint key_nr= get_dup_key(error);
2455
    if ((int) key_nr >= 0)
2456
      ptr= table->key_info[key_nr].name;
2457
    my_error(ER_DROP_INDEX_FK, MYF(0), ptr);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2458
    return;
1 by brian
clean slate
2459
  }
2460
  case HA_ERR_TABLE_NEEDS_UPGRADE:
2461
    textno=ER_TABLE_NEEDS_UPGRADE;
2462
    break;
2463
  case HA_ERR_TABLE_READONLY:
2464
    textno= ER_OPEN_AS_READONLY;
2465
    break;
2466
  case HA_ERR_AUTOINC_READ_FAILED:
2467
    textno= ER_AUTOINC_READ_FAILED;
2468
    break;
2469
  case HA_ERR_AUTOINC_ERANGE:
2470
    textno= ER_WARN_DATA_OUT_OF_RANGE;
2471
    break;
2472
  case HA_ERR_LOCK_OR_ACTIVE_TRANSACTION:
2473
    my_message(ER_LOCK_OR_ACTIVE_TRANSACTION,
2474
               ER(ER_LOCK_OR_ACTIVE_TRANSACTION), MYF(0));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2475
    return;
1 by brian
clean slate
2476
    break;
2477
  default:
2478
    {
2479
      /* The error was "unknown" to this function.
2480
	 Ask handler if it has got a message for this error */
56 by brian
Next pass of true/false update.
2481
      bool temporary= false;
1 by brian
clean slate
2482
      String str;
2483
      temporary= get_error_message(error, &str);
2484
      if (!str.is_empty())
2485
      {
2486
	const char* engine= table_type();
2487
	if (temporary)
2488
	  my_error(ER_GET_TEMPORARY_ERRMSG, MYF(0), error, str.ptr(), engine);
2489
	else
2490
	  my_error(ER_GET_ERRMSG, MYF(0), error, str.ptr(), engine);
2491
      }
2492
      else
2493
	my_error(ER_GET_ERRNO,errflag,error);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2494
      return;
1 by brian
clean slate
2495
    }
2496
  }
2497
  my_error(textno, errflag, table_share->table_name.str, error);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2498
  return;
1 by brian
clean slate
2499
}
2500
2501
2502
/**
2503
  Return an error message specific to this handler.
2504
2505
  @param error  error code previously returned by handler
2506
  @param buf    pointer to String where to add error message
2507
2508
  @return
2509
    Returns true if this is a temporary error
2510
*/
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
2511
bool handler::get_error_message(int error __attribute__((unused)),
2512
                                String* buf __attribute__((unused)))
1 by brian
clean slate
2513
{
56 by brian
Next pass of true/false update.
2514
  return false;
1 by brian
clean slate
2515
}
2516
2517
2518
int handler::ha_check_for_upgrade(HA_CHECK_OPT *check_opt)
2519
{
2520
  KEY *keyinfo, *keyend;
2521
  KEY_PART_INFO *keypart, *keypartend;
2522
2523
  if (!table->s->mysql_version)
2524
  {
2525
    /* check for blob-in-key error */
2526
    keyinfo= table->key_info;
2527
    keyend= table->key_info + table->s->keys;
2528
    for (; keyinfo < keyend; keyinfo++)
2529
    {
2530
      keypart= keyinfo->key_part;
2531
      keypartend= keypart + keyinfo->key_parts;
2532
      for (; keypart < keypartend; keypart++)
2533
      {
2534
        if (!keypart->fieldnr)
2535
          continue;
2536
        Field *field= table->field[keypart->fieldnr-1];
212.2.2 by Patrick Galbraith
Renamed FIELD_TYPE to DRIZZLE_TYPE
2537
        if (field->type() == DRIZZLE_TYPE_BLOB)
1 by brian
clean slate
2538
        {
2539
          if (check_opt->sql_flags & TT_FOR_UPGRADE)
2540
            check_opt->flags= T_MEDIUM;
2541
          return HA_ADMIN_NEEDS_CHECK;
2542
        }
2543
      }
2544
    }
2545
  }
2546
  return check_for_upgrade(check_opt);
2547
}
2548
2549
2550
/* Code left, but Drizzle has no legacy yet (while MySQL did) */
2551
int handler::check_old_types()
2552
{
2553
  return 0;
2554
}
2555
2556
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
2557
static bool update_frm_version(Table *table)
1 by brian
clean slate
2558
{
2559
  char path[FN_REFLEN];
2560
  File file;
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2561
  bool result= true;
1 by brian
clean slate
2562
2563
  /*
2564
    No need to update frm version in case table was created or checked
2565
    by server with the same version. This also ensures that we do not
2566
    update frm version for temporary tables as this code doesn't support
2567
    temporary tables.
2568
  */
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
2569
  if (table->s->mysql_version == DRIZZLE_VERSION_ID)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2570
    return(0);
1 by brian
clean slate
2571
2572
  strxmov(path, table->s->normalized_path.str, reg_ext, NullS);
2573
2574
  if ((file= my_open(path, O_RDWR|O_BINARY, MYF(MY_WME))) >= 0)
2575
  {
2576
    uchar version[4];
2577
    char *key= table->s->table_cache_key.str;
2578
    uint key_length= table->s->table_cache_key.length;
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
2579
    Table *entry;
1 by brian
clean slate
2580
    HASH_SEARCH_STATE state;
2581
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
2582
    int4store(version, DRIZZLE_VERSION_ID);
1 by brian
clean slate
2583
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2584
    if (pwrite(file, (uchar*)version, 4, 51L) == 0)
2585
    {
2586
      result= false;
1 by brian
clean slate
2587
      goto err;
31 by Brian Aker
Removed my versions of pread/pwrite from the Kernel
2588
    }
1 by brian
clean slate
2589
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
2590
    for (entry=(Table*) hash_first(&open_cache,(uchar*) key,key_length, &state);
1 by brian
clean slate
2591
         entry;
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
2592
         entry= (Table*) hash_next(&open_cache,(uchar*) key,key_length, &state))
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
2593
      entry->s->mysql_version= DRIZZLE_VERSION_ID;
1 by brian
clean slate
2594
  }
2595
err:
2596
  if (file >= 0)
2597
    VOID(my_close(file,MYF(MY_WME)));
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2598
  return(result);
1 by brian
clean slate
2599
}
2600
2601
2602
2603
/**
2604
  @return
2605
    key if error because of duplicated keys
2606
*/
2607
uint handler::get_dup_key(int error)
2608
{
2609
  table->file->errkey  = (uint) -1;
2610
  if (error == HA_ERR_FOUND_DUPP_KEY || error == HA_ERR_FOREIGN_DUPLICATE_KEY ||
2611
      error == HA_ERR_FOUND_DUPP_UNIQUE ||
2612
      error == HA_ERR_DROP_INDEX_FK)
2613
    info(HA_STATUS_ERRKEY | HA_STATUS_NO_LOCK);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2614
  return(table->file->errkey);
1 by brian
clean slate
2615
}
2616
2617
2618
/**
2619
  Delete all files with extension from bas_ext().
2620
2621
  @param name		Base name of table
2622
2623
  @note
2624
    We assume that the handler may return more extensions than
2625
    was actually used for the file.
2626
2627
  @retval
2628
    0   If we successfully deleted at least one file from base_ext and
2629
    didn't get any other errors than ENOENT
2630
  @retval
2631
    !0  Error
2632
*/
2633
int handler::delete_table(const char *name)
2634
{
2635
  int error= 0;
2636
  int enoent_or_zero= ENOENT;                   // Error if no file was deleted
2637
  char buff[FN_REFLEN];
2638
2639
  for (const char **ext=bas_ext(); *ext ; ext++)
2640
  {
2641
    fn_format(buff, name, "", *ext, MY_UNPACK_FILENAME|MY_APPEND_EXT);
2642
    if (my_delete_with_symlink(buff, MYF(0)))
2643
    {
2644
      if ((error= my_errno) != ENOENT)
2645
	break;
2646
    }
2647
    else
2648
      enoent_or_zero= 0;                        // No error for ENOENT
2649
    error= enoent_or_zero;
2650
  }
2651
  return error;
2652
}
2653
2654
2655
int handler::rename_table(const char * from, const char * to)
2656
{
2657
  int error= 0;
2658
  for (const char **ext= bas_ext(); *ext ; ext++)
2659
  {
2660
    if (rename_file_ext(from, to, *ext))
2661
    {
2662
      if ((error=my_errno) != ENOENT)
2663
	break;
2664
      error= 0;
2665
    }
2666
  }
2667
  return error;
2668
}
2669
2670
2671
void handler::drop_table(const char *name)
2672
{
2673
  close();
2674
  delete_table(name);
2675
}
2676
2677
2678
/**
2679
  Performs checks upon the table.
2680
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
2681
  @param thd                thread doing CHECK Table operation
1 by brian
clean slate
2682
  @param check_opt          options from the parser
2683
2684
  @retval
2685
    HA_ADMIN_OK               Successful upgrade
2686
  @retval
2687
    HA_ADMIN_NEEDS_UPGRADE    Table has structures requiring upgrade
2688
  @retval
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
2689
    HA_ADMIN_NEEDS_ALTER      Table has structures requiring ALTER Table
1 by brian
clean slate
2690
  @retval
2691
    HA_ADMIN_NOT_IMPLEMENTED
2692
*/
2693
int handler::ha_check(THD *thd, HA_CHECK_OPT *check_opt)
2694
{
2695
  int error;
2696
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
2697
  if ((table->s->mysql_version >= DRIZZLE_VERSION_ID) &&
1 by brian
clean slate
2698
      (check_opt->sql_flags & TT_FOR_UPGRADE))
2699
    return 0;
2700
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
2701
  if (table->s->mysql_version < DRIZZLE_VERSION_ID)
1 by brian
clean slate
2702
  {
2703
    if ((error= check_old_types()))
2704
      return error;
2705
    error= ha_check_for_upgrade(check_opt);
2706
    if (error && (error != HA_ADMIN_NEEDS_CHECK))
2707
      return error;
2708
    if (!error && (check_opt->sql_flags & TT_FOR_UPGRADE))
2709
      return 0;
2710
  }
2711
  if ((error= check(thd, check_opt)))
2712
    return error;
2713
  return update_frm_version(table);
2714
}
2715
2716
/**
2717
  A helper function to mark a transaction read-write,
2718
  if it is started.
2719
*/
2720
2721
inline
2722
void
2723
handler::mark_trx_read_write()
2724
{
2725
  Ha_trx_info *ha_info= &ha_thd()->ha_data[ht->slot].ha_info[0];
2726
  /*
2727
    When a storage engine method is called, the transaction must
2728
    have been started, unless it's a DDL call, for which the
2729
    storage engine starts the transaction internally, and commits
2730
    it internally, without registering in the ha_list.
2731
    Unfortunately here we can't know know for sure if the engine
2732
    has registered the transaction or not, so we must check.
2733
  */
2734
  if (ha_info->is_started())
2735
  {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
2736
    assert(has_transactions());
1 by brian
clean slate
2737
    /*
2738
      table_share can be NULL in ha_delete_table(). See implementation
2739
      of standalone function ha_delete_table() in sql_base.cc.
2740
    */
2741
    if (table_share == NULL || table_share->tmp_table == NO_TMP_TABLE)
2742
      ha_info->set_trx_read_write();
2743
  }
2744
}
2745
2746
2747
/**
2748
  Repair table: public interface.
2749
2750
  @sa handler::repair()
2751
*/
2752
2753
int handler::ha_repair(THD* thd, HA_CHECK_OPT* check_opt)
2754
{
2755
  int result;
2756
2757
  mark_trx_read_write();
2758
2759
  if ((result= repair(thd, check_opt)))
2760
    return result;
2761
  return update_frm_version(table);
2762
}
2763
2764
2765
/**
2766
  Bulk update row: public interface.
2767
2768
  @sa handler::bulk_update_row()
2769
*/
2770
2771
int
2772
handler::ha_bulk_update_row(const uchar *old_data, uchar *new_data,
2773
                            uint *dup_key_found)
2774
{
2775
  mark_trx_read_write();
2776
2777
  return bulk_update_row(old_data, new_data, dup_key_found);
2778
}
2779
2780
2781
/**
2782
  Delete all rows: public interface.
2783
2784
  @sa handler::delete_all_rows()
2785
*/
2786
2787
int
2788
handler::ha_delete_all_rows()
2789
{
2790
  mark_trx_read_write();
2791
2792
  return delete_all_rows();
2793
}
2794
2795
2796
/**
2797
  Reset auto increment: public interface.
2798
2799
  @sa handler::reset_auto_increment()
2800
*/
2801
2802
int
2803
handler::ha_reset_auto_increment(uint64_t value)
2804
{
2805
  mark_trx_read_write();
2806
2807
  return reset_auto_increment(value);
2808
}
2809
2810
2811
/**
2812
  Optimize table: public interface.
2813
2814
  @sa handler::optimize()
2815
*/
2816
2817
int
2818
handler::ha_optimize(THD* thd, HA_CHECK_OPT* check_opt)
2819
{
2820
  mark_trx_read_write();
2821
2822
  return optimize(thd, check_opt);
2823
}
2824
2825
2826
/**
2827
  Analyze table: public interface.
2828
2829
  @sa handler::analyze()
2830
*/
2831
2832
int
2833
handler::ha_analyze(THD* thd, HA_CHECK_OPT* check_opt)
2834
{
2835
  mark_trx_read_write();
2836
2837
  return analyze(thd, check_opt);
2838
}
2839
2840
2841
/**
2842
  Check and repair table: public interface.
2843
2844
  @sa handler::check_and_repair()
2845
*/
2846
2847
bool
2848
handler::ha_check_and_repair(THD *thd)
2849
{
2850
  mark_trx_read_write();
2851
2852
  return check_and_repair(thd);
2853
}
2854
2855
2856
/**
2857
  Disable indexes: public interface.
2858
2859
  @sa handler::disable_indexes()
2860
*/
2861
2862
int
2863
handler::ha_disable_indexes(uint mode)
2864
{
2865
  mark_trx_read_write();
2866
2867
  return disable_indexes(mode);
2868
}
2869
2870
2871
/**
2872
  Enable indexes: public interface.
2873
2874
  @sa handler::enable_indexes()
2875
*/
2876
2877
int
2878
handler::ha_enable_indexes(uint mode)
2879
{
2880
  mark_trx_read_write();
2881
2882
  return enable_indexes(mode);
2883
}
2884
2885
2886
/**
2887
  Discard or import tablespace: public interface.
2888
2889
  @sa handler::discard_or_import_tablespace()
2890
*/
2891
2892
int
200 by Brian Aker
my_bool from handler and set_var
2893
handler::ha_discard_or_import_tablespace(bool discard)
1 by brian
clean slate
2894
{
2895
  mark_trx_read_write();
2896
2897
  return discard_or_import_tablespace(discard);
2898
}
2899
2900
2901
/**
2902
  Prepare for alter: public interface.
2903
2904
  Called to prepare an *online* ALTER.
2905
2906
  @sa handler::prepare_for_alter()
2907
*/
2908
2909
void
2910
handler::ha_prepare_for_alter()
2911
{
2912
  mark_trx_read_write();
2913
2914
  prepare_for_alter();
2915
}
2916
2917
2918
/**
2919
  Rename table: public interface.
2920
2921
  @sa handler::rename_table()
2922
*/
2923
2924
int
2925
handler::ha_rename_table(const char *from, const char *to)
2926
{
2927
  mark_trx_read_write();
2928
2929
  return rename_table(from, to);
2930
}
2931
2932
2933
/**
2934
  Delete table: public interface.
2935
2936
  @sa handler::delete_table()
2937
*/
2938
2939
int
2940
handler::ha_delete_table(const char *name)
2941
{
2942
  mark_trx_read_write();
2943
2944
  return delete_table(name);
2945
}
2946
2947
2948
/**
2949
  Drop table in the engine: public interface.
2950
2951
  @sa handler::drop_table()
2952
*/
2953
2954
void
2955
handler::ha_drop_table(const char *name)
2956
{
2957
  mark_trx_read_write();
2958
2959
  return drop_table(name);
2960
}
2961
2962
2963
/**
2964
  Create a table in the engine: public interface.
2965
2966
  @sa handler::create()
2967
*/
2968
2969
int
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
2970
handler::ha_create(const char *name, Table *form, HA_CREATE_INFO *info)
1 by brian
clean slate
2971
{
2972
  mark_trx_read_write();
2973
2974
  return create(name, form, info);
2975
}
2976
2977
2978
/**
2979
  Create handler files for CREATE TABLE: public interface.
2980
2981
  @sa handler::create_handler_files()
2982
*/
2983
2984
int
2985
handler::ha_create_handler_files(const char *name, const char *old_name,
2986
                        int action_flag, HA_CREATE_INFO *info)
2987
{
2988
  mark_trx_read_write();
2989
2990
  return create_handler_files(name, old_name, action_flag, info);
2991
}
2992
2993
2994
/**
2995
  Tell the storage engine that it is allowed to "disable transaction" in the
2996
  handler. It is a hint that ACID is not required - it is used in NDB for
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
2997
  ALTER Table, for example, when data are copied to temporary table.
1 by brian
clean slate
2998
  A storage engine may treat this hint any way it likes. NDB for example
2999
  starts to commit every now and then automatically.
3000
  This hint can be safely ignored.
3001
*/
3002
int ha_enable_transaction(THD *thd, bool on)
3003
{
3004
  int error=0;
3005
3006
  if ((thd->transaction.on= on))
3007
  {
3008
    /*
3009
      Now all storage engines should have transaction handling enabled.
3010
      But some may have it enabled all the time - "disabling" transactions
3011
      is an optimization hint that storage engine is free to ignore.
3012
      So, let's commit an open transaction (if any) now.
3013
    */
3014
    if (!(error= ha_commit_trans(thd, 0)))
3015
      error= end_trans(thd, COMMIT);
3016
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3017
  return(error);
1 by brian
clean slate
3018
}
3019
3020
int handler::index_next_same(uchar *buf, const uchar *key, uint keylen)
3021
{
3022
  int error;
3023
  if (!(error=index_next(buf)))
3024
  {
3025
    my_ptrdiff_t ptrdiff= buf - table->record[0];
3026
    uchar *save_record_0= NULL;
3027
    KEY *key_info= NULL;
3028
    KEY_PART_INFO *key_part;
3029
    KEY_PART_INFO *key_part_end= NULL;
3030
3031
    /*
3032
      key_cmp_if_same() compares table->record[0] against 'key'.
3033
      In parts it uses table->record[0] directly, in parts it uses
3034
      field objects with their local pointers into table->record[0].
3035
      If 'buf' is distinct from table->record[0], we need to move
3036
      all record references. This is table->record[0] itself and
3037
      the field pointers of the fields used in this key.
3038
    */
3039
    if (ptrdiff)
3040
    {
3041
      save_record_0= table->record[0];
3042
      table->record[0]= buf;
3043
      key_info= table->key_info + active_index;
3044
      key_part= key_info->key_part;
3045
      key_part_end= key_part + key_info->key_parts;
3046
      for (; key_part < key_part_end; key_part++)
3047
      {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3048
        assert(key_part->field);
1 by brian
clean slate
3049
        key_part->field->move_field_offset(ptrdiff);
3050
      }
3051
    }
3052
3053
    if (key_cmp_if_same(table, key, active_index, keylen))
3054
    {
3055
      table->status=STATUS_NOT_FOUND;
3056
      error=HA_ERR_END_OF_FILE;
3057
    }
3058
3059
    /* Move back if necessary. */
3060
    if (ptrdiff)
3061
    {
3062
      table->record[0]= save_record_0;
3063
      for (key_part= key_info->key_part; key_part < key_part_end; key_part++)
3064
        key_part->field->move_field_offset(-ptrdiff);
3065
    }
3066
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3067
  return(error);
1 by brian
clean slate
3068
}
3069
3070
3071
/****************************************************************************
3072
** Some general functions that isn't in the handler class
3073
****************************************************************************/
3074
3075
/**
3076
  Initiates table-file and calls appropriate database-creator.
3077
3078
  @retval
3079
   0  ok
3080
  @retval
3081
   1  error
3082
*/
3083
int ha_create_table(THD *thd, const char *path,
3084
                    const char *db, const char *table_name,
3085
                    HA_CREATE_INFO *create_info,
3086
		    bool update_create_info)
3087
{
3088
  int error= 1;
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
3089
  Table table;
1 by brian
clean slate
3090
  char name_buff[FN_REFLEN];
3091
  const char *name;
3092
  TABLE_SHARE share;
3093
  
3094
  init_tmp_table_share(thd, &share, db, 0, table_name, path);
3095
  if (open_table_def(thd, &share, 0) ||
3096
      open_table_from_share(thd, &share, "", 0, (uint) READ_ALL, 0, &table,
3097
                            OTM_CREATE))
3098
    goto err;
3099
3100
  if (update_create_info)
327.1.2 by Brian Aker
Commiting next pass of Table class cleanup.
3101
    table.updateCreateInfo(create_info);
1 by brian
clean slate
3102
3103
  name= check_lowercase_names(table.file, share.path.str, name_buff);
3104
3105
  error= table.file->ha_create(name, &table, create_info);
3106
  VOID(closefrm(&table, 0));
3107
  if (error)
3108
  {
3109
    strxmov(name_buff, db, ".", table_name, NullS);
3110
    my_error(ER_CANT_CREATE_TABLE, MYF(ME_BELL+ME_WAITTANG), name_buff, error);
3111
  }
3112
err:
3113
  free_table_share(&share);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3114
  return(error != 0);
1 by brian
clean slate
3115
}
3116
3117
/**
3118
  Try to discover table from engine.
3119
3120
  @note
3121
    If found, write the frm file to disk.
3122
3123
  @retval
3124
  -1    Table did not exists
3125
  @retval
3126
   0    Table created ok
3127
  @retval
3128
   > 0  Error, table existed but could not be created
3129
*/
3130
int ha_create_table_from_engine(THD* thd, const char *db, const char *name)
3131
{
3132
  int error;
3133
  uchar *frmblob;
3134
  size_t frmlen;
3135
  char path[FN_REFLEN];
3136
  HA_CREATE_INFO create_info;
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
3137
  Table table;
1 by brian
clean slate
3138
  TABLE_SHARE share;
3139
212.6.6 by Mats Kindahl
Removing redundant use of casts in drizzled/ for memcmp(), memcpy(), memset(), and memmove().
3140
  memset(&create_info, 0, sizeof(create_info));
1 by brian
clean slate
3141
  if ((error= ha_discover(thd, db, name, &frmblob, &frmlen)))
3142
  {
3143
    /* Table could not be discovered and thus not created */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3144
    return(error);
1 by brian
clean slate
3145
  }
3146
3147
  /*
3148
    Table exists in handler and could be discovered
3149
    frmblob and frmlen are set, write the frm to disk
3150
  */
3151
3152
  build_table_filename(path, FN_REFLEN-1, db, name, "", 0);
3153
  // Save the frm file
3154
  error= writefrm(path, frmblob, frmlen);
3155
  my_free(frmblob, MYF(0));
3156
  if (error)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3157
    return(2);
1 by brian
clean slate
3158
3159
  init_tmp_table_share(thd, &share, db, 0, name, path);
3160
  if (open_table_def(thd, &share, 0))
3161
  {
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3162
    return(3);
1 by brian
clean slate
3163
  }
3164
  if (open_table_from_share(thd, &share, "" ,0, 0, 0, &table, OTM_OPEN))
3165
  {
3166
    free_table_share(&share);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3167
    return(3);
1 by brian
clean slate
3168
  }
3169
327.1.2 by Brian Aker
Commiting next pass of Table class cleanup.
3170
  table.updateCreateInfo(&create_info);
1 by brian
clean slate
3171
  create_info.table_options|= HA_OPTION_CREATE_FROM_ENGINE;
3172
3173
  check_lowercase_names(table.file, path, path);
3174
  error=table.file->ha_create(path, &table, &create_info);
3175
  VOID(closefrm(&table, 1));
3176
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3177
  return(error != 0);
1 by brian
clean slate
3178
}
3179
3180
void st_ha_check_opt::init()
3181
{
3182
  flags= sql_flags= 0;
3183
  sort_buffer_size = current_thd->variables.myisam_sort_buff_size;
3184
}
3185
3186
3187
/*****************************************************************************
3188
  Key cache handling.
3189
3190
  This code is only relevant for ISAM/MyISAM tables
3191
3192
  key_cache->cache may be 0 only in the case where a key cache is not
3193
  initialized or when we where not able to init the key cache in a previous
3194
  call to ha_init_key_cache() (probably out of memory)
3195
*****************************************************************************/
3196
3197
/**
3198
  Init a key cache if it has not been initied before.
3199
*/
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
3200
int ha_init_key_cache(const char *name __attribute__((unused)),
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3201
                      KEY_CACHE *key_cache)
1 by brian
clean slate
3202
{
3203
  if (!key_cache->key_cache_inited)
3204
  {
3205
    pthread_mutex_lock(&LOCK_global_system_variables);
61 by Brian Aker
Conversion of handler type.
3206
    uint32_t tmp_buff_size= (uint32_t) key_cache->param_buff_size;
1 by brian
clean slate
3207
    uint tmp_block_size= (uint) key_cache->param_block_size;
3208
    uint division_limit= key_cache->param_division_limit;
3209
    uint age_threshold=  key_cache->param_age_threshold;
3210
    pthread_mutex_unlock(&LOCK_global_system_variables);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3211
    return(!init_key_cache(key_cache,
1 by brian
clean slate
3212
				tmp_block_size,
3213
				tmp_buff_size,
3214
				division_limit, age_threshold));
3215
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3216
  return(0);
1 by brian
clean slate
3217
}
3218
3219
3220
/**
3221
  Resize key cache.
3222
*/
3223
int ha_resize_key_cache(KEY_CACHE *key_cache)
3224
{
3225
  if (key_cache->key_cache_inited)
3226
  {
3227
    pthread_mutex_lock(&LOCK_global_system_variables);
3228
    long tmp_buff_size= (long) key_cache->param_buff_size;
3229
    long tmp_block_size= (long) key_cache->param_block_size;
3230
    uint division_limit= key_cache->param_division_limit;
3231
    uint age_threshold=  key_cache->param_age_threshold;
3232
    pthread_mutex_unlock(&LOCK_global_system_variables);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3233
    return(!resize_key_cache(key_cache, tmp_block_size,
1 by brian
clean slate
3234
				  tmp_buff_size,
3235
				  division_limit, age_threshold));
3236
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3237
  return(0);
1 by brian
clean slate
3238
}
3239
3240
3241
/**
3242
  Change parameters for key cache (like size)
3243
*/
3244
int ha_change_key_cache_param(KEY_CACHE *key_cache)
3245
{
3246
  if (key_cache->key_cache_inited)
3247
  {
3248
    pthread_mutex_lock(&LOCK_global_system_variables);
3249
    uint division_limit= key_cache->param_division_limit;
3250
    uint age_threshold=  key_cache->param_age_threshold;
3251
    pthread_mutex_unlock(&LOCK_global_system_variables);
3252
    change_key_cache_param(key_cache, division_limit, age_threshold);
3253
  }
3254
  return 0;
3255
}
3256
3257
/**
3258
  Free memory allocated by a key cache.
3259
*/
3260
int ha_end_key_cache(KEY_CACHE *key_cache)
3261
{
3262
  end_key_cache(key_cache, 1);		// Can never fail
3263
  return 0;
3264
}
3265
3266
/**
3267
  Move all tables from one key cache to another one.
3268
*/
3269
int ha_change_key_cache(KEY_CACHE *old_key_cache,
3270
			KEY_CACHE *new_key_cache)
3271
{
3272
  mi_change_key_cache(old_key_cache, new_key_cache);
3273
  return 0;
3274
}
3275
3276
3277
/**
3278
  Try to discover one table from handler(s).
3279
3280
  @retval
3281
    -1   Table did not exists
3282
  @retval
3283
    0   OK. In this case *frmblob and *frmlen are set
3284
  @retval
3285
    >0   error.  frmblob and frmlen may not be set
3286
*/
3287
struct st_discover_args
3288
{
3289
  const char *db;
3290
  const char *name;
3291
  uchar **frmblob; 
3292
  size_t *frmlen;
3293
};
3294
149 by Brian Aker
More bool conversion.
3295
static bool discover_handlerton(THD *thd, plugin_ref plugin,
3296
                                void *arg)
1 by brian
clean slate
3297
{
3298
  st_discover_args *vargs= (st_discover_args *)arg;
3299
  handlerton *hton= plugin_data(plugin, handlerton *);
3300
  if (hton->state == SHOW_OPTION_YES && hton->discover &&
3301
      (!(hton->discover(hton, thd, vargs->db, vargs->name, 
3302
                        vargs->frmblob, 
3303
                        vargs->frmlen))))
56 by brian
Next pass of true/false update.
3304
    return true;
1 by brian
clean slate
3305
56 by brian
Next pass of true/false update.
3306
  return false;
1 by brian
clean slate
3307
}
3308
3309
int ha_discover(THD *thd, const char *db, const char *name,
3310
		uchar **frmblob, size_t *frmlen)
3311
{
3312
  int error= -1; // Table does not exist in any handler
3313
  st_discover_args args= {db, name, frmblob, frmlen};
3314
3315
  if (is_prefix(name,tmp_file_prefix)) /* skip temporary tables */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3316
    return(error);
1 by brian
clean slate
3317
3318
  if (plugin_foreach(thd, discover_handlerton,
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
3319
                 DRIZZLE_STORAGE_ENGINE_PLUGIN, &args))
1 by brian
clean slate
3320
    error= 0;
3321
3322
  if (!error)
3323
    status_var_increment(thd->status_var.ha_discover_count);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3324
  return(error);
1 by brian
clean slate
3325
}
3326
3327
3328
/**
3329
  Call this function in order to give the handler the possiblity
3330
  to ask engine if there are any new tables that should be written to disk
3331
  or any dropped tables that need to be removed from disk
3332
*/
3333
struct st_find_files_args
3334
{
3335
  const char *db;
3336
  const char *path;
3337
  const char *wild;
3338
  bool dir;
3339
  List<LEX_STRING> *files;
3340
};
3341
3342
/**
3343
  Ask handler if the table exists in engine.
3344
  @retval
3345
    HA_ERR_NO_SUCH_TABLE     Table does not exist
3346
  @retval
3347
    HA_ERR_TABLE_EXIST       Table exists
3348
  @retval
3349
    \#                  Error code
3350
*/
3351
struct st_table_exists_in_engine_args
3352
{
3353
  const char *db;
3354
  const char *name;
3355
  int err;
3356
};
3357
149 by Brian Aker
More bool conversion.
3358
static bool table_exists_in_engine_handlerton(THD *thd, plugin_ref plugin,
3359
                                              void *arg)
1 by brian
clean slate
3360
{
3361
  st_table_exists_in_engine_args *vargs= (st_table_exists_in_engine_args *)arg;
3362
  handlerton *hton= plugin_data(plugin, handlerton *);
3363
3364
  int err= HA_ERR_NO_SUCH_TABLE;
3365
3366
  if (hton->state == SHOW_OPTION_YES && hton->table_exists_in_engine)
3367
    err = hton->table_exists_in_engine(hton, thd, vargs->db, vargs->name);
3368
3369
  vargs->err = err;
3370
  if (vargs->err == HA_ERR_TABLE_EXIST)
56 by brian
Next pass of true/false update.
3371
    return true;
1 by brian
clean slate
3372
56 by brian
Next pass of true/false update.
3373
  return false;
1 by brian
clean slate
3374
}
3375
3376
int ha_table_exists_in_engine(THD* thd, const char* db, const char* name)
3377
{
3378
  st_table_exists_in_engine_args args= {db, name, HA_ERR_NO_SUCH_TABLE};
3379
  plugin_foreach(thd, table_exists_in_engine_handlerton,
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
3380
                 DRIZZLE_STORAGE_ENGINE_PLUGIN, &args);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3381
  return(args.err);
1 by brian
clean slate
3382
}
3383
3384
/**
3385
  Calculate cost of 'index only' scan for given index and number of records
3386
3387
  @param keynr    Index number
3388
  @param records  Estimated number of records to be retrieved
3389
3390
  @note
3391
    It is assumed that we will read trough the whole key range and that all
3392
    key blocks are half full (normally things are much better). It is also
3393
    assumed that each time we read the next key from the index, the handler
3394
    performs a random seek, thus the cost is proportional to the number of
3395
    blocks read.
3396
3397
  @todo
3398
    Consider joining this function and handler::read_time() into one
3399
    handler::read_time(keynr, records, ranges, bool index_only) function.
3400
3401
  @return
3402
    Estimated cost of 'index only' scan
3403
*/
3404
3405
double handler::index_only_read_time(uint keynr, double records)
3406
{
3407
  double read_time;
3408
  uint keys_per_block= (stats.block_size/2/
3409
			(table->key_info[keynr].key_length + ref_length) + 1);
3410
  read_time=((double) (records + keys_per_block-1) /
3411
             (double) keys_per_block);
3412
  return read_time;
3413
}
3414
3415
3416
/****************************************************************************
3417
 * Default MRR implementation (MRR to non-MRR converter)
3418
 ***************************************************************************/
3419
3420
/**
3421
  Get cost and other information about MRR scan over a known list of ranges
3422
3423
  Calculate estimated cost and other information about an MRR scan for given
3424
  sequence of ranges.
3425
3426
  @param keyno           Index number
3427
  @param seq             Range sequence to be traversed
3428
  @param seq_init_param  First parameter for seq->init()
3429
  @param n_ranges_arg    Number of ranges in the sequence, or 0 if the caller
3430
                         can't efficiently determine it
3431
  @param bufsz    INOUT  IN:  Size of the buffer available for use
3432
                         OUT: Size of the buffer that is expected to be actually
3433
                              used, or 0 if buffer is not needed.
3434
  @param flags    INOUT  A combination of HA_MRR_* flags
3435
  @param cost     OUT    Estimated cost of MRR access
3436
3437
  @note
3438
    This method (or an overriding one in a derived class) must check for
3439
    thd->killed and return HA_POS_ERROR if it is not zero. This is required
3440
    for a user to be able to interrupt the calculation by killing the
3441
    connection/query.
3442
3443
  @retval
3444
    HA_POS_ERROR  Error or the engine is unable to perform the requested
3445
                  scan. Values of OUT parameters are undefined.
3446
  @retval
3447
    other         OK, *cost contains cost of the scan, *bufsz and *flags
3448
                  contain scan parameters.
3449
*/
3450
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3451
ha_rows
1 by brian
clean slate
3452
handler::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3453
                                     void *seq_init_param,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
3454
                                     uint n_ranges_arg __attribute__((unused)),
1 by brian
clean slate
3455
                                     uint *bufsz, uint *flags, COST_VECT *cost)
3456
{
3457
  KEY_MULTI_RANGE range;
3458
  range_seq_t seq_it;
3459
  ha_rows rows, total_rows= 0;
3460
  uint n_ranges=0;
3461
  THD *thd= current_thd;
3462
  
3463
  /* Default MRR implementation doesn't need buffer */
3464
  *bufsz= 0;
3465
3466
  seq_it= seq->init(seq_init_param, n_ranges, *flags);
3467
  while (!seq->next(seq_it, &range))
3468
  {
3469
    if (unlikely(thd->killed != 0))
3470
      return HA_POS_ERROR;
3471
    
3472
    n_ranges++;
3473
    key_range *min_endp, *max_endp;
3474
    {
3475
      min_endp= range.start_key.length? &range.start_key : NULL;
3476
      max_endp= range.end_key.length? &range.end_key : NULL;
3477
    }
3478
    if ((range.range_flag & UNIQUE_RANGE) && !(range.range_flag & NULL_RANGE))
3479
      rows= 1; /* there can be at most one row */
3480
    else
3481
    {
3482
      if (HA_POS_ERROR == (rows= this->records_in_range(keyno, min_endp, 
3483
                                                        max_endp)))
3484
      {
3485
        /* Can't scan one range => can't do MRR scan at all */
3486
        total_rows= HA_POS_ERROR;
3487
        break;
3488
      }
3489
    }
3490
    total_rows += rows;
3491
  }
3492
  
3493
  if (total_rows != HA_POS_ERROR)
3494
  {
3495
    /* The following calculation is the same as in multi_range_read_info(): */
3496
    *flags |= HA_MRR_USE_DEFAULT_IMPL;
3497
    cost->zero();
3498
    cost->avg_io_cost= 1; /* assume random seeks */
3499
    if ((*flags & HA_MRR_INDEX_ONLY) && total_rows > 2)
3500
      cost->io_count= index_only_read_time(keyno, (uint)total_rows);
3501
    else
3502
      cost->io_count= read_time(keyno, n_ranges, total_rows);
3503
    cost->cpu_cost= (double) total_rows / TIME_FOR_COMPARE + 0.01;
3504
  }
3505
  return total_rows;
3506
}
3507
3508
3509
/**
3510
  Get cost and other information about MRR scan over some sequence of ranges
3511
3512
  Calculate estimated cost and other information about an MRR scan for some
3513
  sequence of ranges.
3514
3515
  The ranges themselves will be known only at execution phase. When this
3516
  function is called we only know number of ranges and a (rough) E(#records)
3517
  within those ranges.
3518
3519
  Currently this function is only called for "n-keypart singlepoint" ranges,
3520
  i.e. each range is "keypart1=someconst1 AND ... AND keypartN=someconstN"
3521
3522
  The flags parameter is a combination of those flags: HA_MRR_SORTED,
3523
  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION, HA_MRR_LIMITS.
3524
3525
  @param keyno           Index number
3526
  @param n_ranges        Estimated number of ranges (i.e. intervals) in the
3527
                         range sequence.
3528
  @param n_rows          Estimated total number of records contained within all
3529
                         of the ranges
3530
  @param bufsz    INOUT  IN:  Size of the buffer available for use
3531
                         OUT: Size of the buffer that will be actually used, or
3532
                              0 if buffer is not needed.
3533
  @param flags    INOUT  A combination of HA_MRR_* flags
3534
  @param cost     OUT    Estimated cost of MRR access
3535
3536
  @retval
3537
    0     OK, *cost contains cost of the scan, *bufsz and *flags contain scan
3538
          parameters.
3539
  @retval
3540
    other Error or can't perform the requested scan
3541
*/
3542
3543
int handler::multi_range_read_info(uint keyno, uint n_ranges, uint n_rows,
3544
                                   uint *bufsz, uint *flags, COST_VECT *cost)
3545
{
3546
  *bufsz= 0; /* Default implementation doesn't need a buffer */
3547
3548
  *flags |= HA_MRR_USE_DEFAULT_IMPL;
3549
3550
  cost->zero();
3551
  cost->avg_io_cost= 1; /* assume random seeks */
3552
3553
  /* Produce the same cost as non-MRR code does */
3554
  if (*flags & HA_MRR_INDEX_ONLY)
3555
    cost->io_count= index_only_read_time(keyno, n_rows);
3556
  else
3557
    cost->io_count= read_time(keyno, n_ranges, n_rows);
3558
  return 0;
3559
}
3560
3561
3562
/**
3563
  Initialize the MRR scan
3564
3565
  Initialize the MRR scan. This function may do heavyweight scan 
3566
  initialization like row prefetching/sorting/etc (NOTE: but better not do
3567
  it here as we may not need it, e.g. if we never satisfy WHERE clause on
3568
  previous tables. For many implementations it would be natural to do such
3569
  initializations in the first multi_read_range_next() call)
3570
3571
  mode is a combination of the following flags: HA_MRR_SORTED,
3572
  HA_MRR_INDEX_ONLY, HA_MRR_NO_ASSOCIATION 
3573
3574
  @param seq             Range sequence to be traversed
3575
  @param seq_init_param  First parameter for seq->init()
3576
  @param n_ranges        Number of ranges in the sequence
3577
  @param mode            Flags, see the description section for the details
3578
  @param buf             INOUT: memory buffer to be used
3579
3580
  @note
3581
    One must have called index_init() before calling this function. Several
3582
    multi_range_read_init() calls may be made in course of one query.
3583
3584
    Until WL#2623 is done (see its text, section 3.2), the following will 
3585
    also hold:
3586
    The caller will guarantee that if "seq->init == mrr_ranges_array_init"
3587
    then seq_init_param is an array of n_ranges KEY_MULTI_RANGE structures.
3588
    This property will only be used by NDB handler until WL#2623 is done.
3589
     
3590
    Buffer memory management is done according to the following scenario:
3591
    The caller allocates the buffer and provides it to the callee by filling
3592
    the members of HANDLER_BUFFER structure.
3593
    The callee consumes all or some fraction of the provided buffer space, and
3594
    sets the HANDLER_BUFFER members accordingly.
3595
    The callee may use the buffer memory until the next multi_range_read_init()
3596
    call is made, all records have been read, or until index_end() call is
3597
    made, whichever comes first.
3598
3599
  @retval 0  OK
3600
  @retval 1  Error
3601
*/
3602
3603
int
3604
handler::multi_range_read_init(RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
77.1.15 by Monty Taylor
Bunch of warning cleanups.
3605
                               uint n_ranges, uint mode,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
3606
                               HANDLER_BUFFER *buf __attribute__((unused)))
1 by brian
clean slate
3607
{
3608
  mrr_iter= seq_funcs->init(seq_init_param, n_ranges, mode);
3609
  mrr_funcs= *seq_funcs;
3610
  mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
56 by brian
Next pass of true/false update.
3611
  mrr_have_range= false;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3612
  return(0);
1 by brian
clean slate
3613
}
3614
3615
3616
/**
3617
  Get next record in MRR scan
3618
3619
  Default MRR implementation: read the next record
3620
3621
  @param range_info  OUT  Undefined if HA_MRR_NO_ASSOCIATION flag is in effect
3622
                          Otherwise, the opaque value associated with the range
3623
                          that contains the returned record.
3624
3625
  @retval 0      OK
3626
  @retval other  Error code
3627
*/
3628
3629
int handler::multi_range_read_next(char **range_info)
3630
{
3631
  int result= 0;
236.2.2 by rbradfor
Using correct coding standards for variable initialization
3632
  int range_res= 0;
1 by brian
clean slate
3633
3634
  if (!mrr_have_range)
3635
  {
56 by brian
Next pass of true/false update.
3636
    mrr_have_range= true;
1 by brian
clean slate
3637
    goto start;
3638
  }
3639
3640
  do
3641
  {
3642
    /* Save a call if there can be only one row in range. */
3643
    if (mrr_cur_range.range_flag != (UNIQUE_RANGE | EQ_RANGE))
3644
    {
3645
      result= read_range_next();
3646
      /* On success or non-EOF errors jump to the end. */
3647
      if (result != HA_ERR_END_OF_FILE)
3648
        break;
3649
    }
3650
    else
3651
    {
3652
      if (was_semi_consistent_read())
3653
        goto scan_it_again;
3654
      /*
3655
        We need to set this for the last range only, but checking this
3656
        condition is more expensive than just setting the result code.
3657
      */
3658
      result= HA_ERR_END_OF_FILE;
3659
    }
3660
3661
start:
3662
    /* Try the next range(s) until one matches a record. */
3663
    while (!(range_res= mrr_funcs.next(mrr_iter, &mrr_cur_range)))
3664
    {
3665
scan_it_again:
3666
      result= read_range_first(mrr_cur_range.start_key.keypart_map ?
3667
                                 &mrr_cur_range.start_key : 0,
3668
                               mrr_cur_range.end_key.keypart_map ?
3669
                                 &mrr_cur_range.end_key : 0,
3670
                               test(mrr_cur_range.range_flag & EQ_RANGE),
3671
                               mrr_is_output_sorted);
3672
      if (result != HA_ERR_END_OF_FILE)
3673
        break;
3674
    }
3675
  }
3676
  while ((result == HA_ERR_END_OF_FILE) && !range_res);
3677
3678
  *range_info= mrr_cur_range.ptr;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3679
  return(result);
1 by brian
clean slate
3680
}
3681
3682
3683
/* **************************************************************************
3684
 * DS-MRR implementation 
3685
 ***************************************************************************/
3686
3687
/**
3688
  DS-MRR: Initialize and start MRR scan
3689
3690
  Initialize and start the MRR scan. Depending on the mode parameter, this
3691
  may use default or DS-MRR implementation.
3692
3693
  @param h               Table handler to be used
3694
  @param key             Index to be used
3695
  @param seq_funcs       Interval sequence enumeration functions
3696
  @param seq_init_param  Interval sequence enumeration parameter
3697
  @param n_ranges        Number of ranges in the sequence.
3698
  @param mode            HA_MRR_* modes to use
3699
  @param buf             INOUT Buffer to use
3700
3701
  @retval 0     Ok, Scan started.
3702
  @retval other Error
3703
*/
3704
3705
int DsMrr_impl::dsmrr_init(handler *h, KEY *key,
3706
                           RANGE_SEQ_IF *seq_funcs, void *seq_init_param,
3707
                           uint n_ranges, uint mode, HANDLER_BUFFER *buf)
3708
{
3709
  uint elem_size;
3710
  uint keyno;
3711
  Item *pushed_cond= NULL;
3712
  handler *new_h2;
3713
  keyno= h->active_index;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3714
  assert(h2 == NULL);
1 by brian
clean slate
3715
  if (mode & HA_MRR_USE_DEFAULT_IMPL || mode & HA_MRR_SORTED)
3716
  {
56 by brian
Next pass of true/false update.
3717
    use_default_impl= true;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3718
    return(h->handler::multi_range_read_init(seq_funcs, seq_init_param,
1 by brian
clean slate
3719
                                                  n_ranges, mode, buf));
3720
  }
3721
  rowids_buf= buf->buffer;
3722
  //psergey-todo: don't add key_length as it is not needed anymore
3723
  rowids_buf += key->key_length + h->ref_length;
3724
3725
  is_mrr_assoc= !test(mode & HA_MRR_NO_ASSOCIATION);
3726
  rowids_buf_end= buf->buffer_end;
3727
  
3728
  elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
3729
  rowids_buf_last= rowids_buf + 
3730
                      ((rowids_buf_end - rowids_buf)/ elem_size)*
3731
                      elem_size;
3732
  rowids_buf_end= rowids_buf_last;
3733
3734
  /* Create a separate handler object to do rndpos() calls. */
3735
  THD *thd= current_thd;
3736
  if (!(new_h2= h->clone(thd->mem_root)) || 
3737
      new_h2->ha_external_lock(thd, F_RDLCK))
3738
  {
3739
    delete new_h2;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3740
    return(1);
1 by brian
clean slate
3741
  }
3742
3743
  if (keyno == h->pushed_idx_cond_keyno)
3744
    pushed_cond= h->pushed_idx_cond;
3745
  if (h->ha_index_end())
3746
  {
3747
    new_h2= h2;
3748
    goto error;
3749
  }
3750
3751
  h2= new_h2;
3752
  table->prepare_for_position();
3753
  new_h2->extra(HA_EXTRA_KEYREAD);
3754
56 by brian
Next pass of true/false update.
3755
  if (h2->ha_index_init(keyno, false) || 
1 by brian
clean slate
3756
      h2->handler::multi_range_read_init(seq_funcs, seq_init_param, n_ranges,
3757
                                         mode, buf))
3758
    goto error;
56 by brian
Next pass of true/false update.
3759
  use_default_impl= false;
1 by brian
clean slate
3760
  
3761
  if (pushed_cond)
3762
    h2->idx_cond_push(keyno, pushed_cond);
3763
  if (dsmrr_fill_buffer(new_h2))
3764
    goto error;
3765
3766
  /*
3767
    If the above call has scanned through all intervals in *seq, then
3768
    adjust *buf to indicate that the remaining buffer space will not be used.
3769
  */
3770
  if (dsmrr_eof) 
3771
    buf->end_of_used_area= rowids_buf_last;
3772
56 by brian
Next pass of true/false update.
3773
  if (h->ha_rnd_init(false))
1 by brian
clean slate
3774
    goto error;
3775
  
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3776
  return(0);
1 by brian
clean slate
3777
error:
3778
  h2->ha_index_or_rnd_end();
3779
  h2->ha_external_lock(thd, F_UNLCK);
3780
  h2->close();
3781
  delete h2;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3782
  return(1);
1 by brian
clean slate
3783
}
3784
3785
3786
void DsMrr_impl::dsmrr_close()
3787
{
3788
  if (h2)
3789
  {
3790
    h2->ha_external_lock(current_thd, F_UNLCK);
3791
    h2->close();
3792
    delete h2;
3793
    h2= NULL;
3794
  }
56 by brian
Next pass of true/false update.
3795
  use_default_impl= true;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3796
  return;
1 by brian
clean slate
3797
}
3798
3799
3800
static int rowid_cmp(void *h, uchar *a, uchar *b)
3801
{
3802
  return ((handler*)h)->cmp_ref(a, b);
3803
}
3804
3805
3806
/**
3807
  DS-MRR: Fill the buffer with rowids and sort it by rowid
3808
3809
  {This is an internal function of DiskSweep MRR implementation}
3810
  Scan the MRR ranges and collect ROWIDs (or {ROWID, range_id} pairs) into 
3811
  buffer. When the buffer is full or scan is completed, sort the buffer by 
3812
  rowid and return.
3813
  
3814
  The function assumes that rowids buffer is empty when it is invoked. 
3815
  
3816
  @param h  Table handler
3817
3818
  @retval 0      OK, the next portion of rowids is in the buffer,
3819
                 properly ordered
3820
  @retval other  Error
3821
*/
3822
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
3823
int DsMrr_impl::dsmrr_fill_buffer(handler *unused __attribute__((unused)))
1 by brian
clean slate
3824
{
3825
  char *range_info;
236.2.1 by rbradfor
Mac OS/X with darwin ports corrected uninitialized variable warnings
3826
  int res = 0;
1 by brian
clean slate
3827
3828
  rowids_buf_cur= rowids_buf;
3829
  while ((rowids_buf_cur < rowids_buf_end) && 
3830
         !(res= h2->handler::multi_range_read_next(&range_info)))
3831
  {
3832
    /* Put rowid, or {rowid, range_id} pair into the buffer */
3833
    h2->position(table->record[0]);
3834
    memcpy(rowids_buf_cur, h2->ref, h2->ref_length);
3835
    rowids_buf_cur += h->ref_length;
3836
3837
    if (is_mrr_assoc)
3838
    {
3839
      memcpy(rowids_buf_cur, &range_info, sizeof(void*));
3840
      rowids_buf_cur += sizeof(void*);
3841
    }
3842
  }
3843
3844
  if (res && res != HA_ERR_END_OF_FILE)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3845
    return(res); 
1 by brian
clean slate
3846
  dsmrr_eof= test(res == HA_ERR_END_OF_FILE);
3847
3848
  /* Sort the buffer contents by rowid */
3849
  uint elem_size= h->ref_length + (int)is_mrr_assoc * sizeof(void*);
3850
  uint n_rowids= (rowids_buf_cur - rowids_buf) / elem_size;
3851
  
3852
  my_qsort2(rowids_buf, n_rowids, elem_size, (qsort2_cmp)rowid_cmp,
3853
            (void*)h);
3854
  rowids_buf_last= rowids_buf_cur;
3855
  rowids_buf_cur=  rowids_buf;
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3856
  return(0);
1 by brian
clean slate
3857
}
3858
3859
3860
/**
3861
  DS-MRR implementation: multi_range_read_next() function
3862
*/
3863
3864
int DsMrr_impl::dsmrr_next(handler *h, char **range_info)
3865
{
3866
  int res;
3867
  
3868
  if (use_default_impl)
3869
    return h->handler::multi_range_read_next(range_info);
3870
    
3871
  if (rowids_buf_cur == rowids_buf_last)
3872
  {
3873
    if (dsmrr_eof)
3874
    {
3875
      res= HA_ERR_END_OF_FILE;
3876
      goto end;
3877
    }
3878
    res= dsmrr_fill_buffer(h);
3879
    if (res)
3880
      goto end;
3881
  }
3882
  
3883
  /* Return EOF if there are no rowids in the buffer after re-fill attempt */
3884
  if (rowids_buf_cur == rowids_buf_last)
3885
  {
3886
    res= HA_ERR_END_OF_FILE;
3887
    goto end;
3888
  }
3889
3890
  res= h->rnd_pos(table->record[0], rowids_buf_cur);
3891
  rowids_buf_cur += h->ref_length;
3892
  if (is_mrr_assoc)
3893
  {
3894
    memcpy(range_info, rowids_buf_cur, sizeof(void*));
3895
    rowids_buf_cur += sizeof(void*);
3896
  }
3897
3898
end:
3899
  if (res)
3900
    dsmrr_close();
3901
  return res;
3902
}
3903
3904
3905
/**
3906
  DS-MRR implementation: multi_range_read_info() function
3907
*/
3908
int DsMrr_impl::dsmrr_info(uint keyno, uint n_ranges, uint rows, uint *bufsz,
3909
                           uint *flags, COST_VECT *cost)
3910
{  
3911
  int res;
3912
  uint def_flags= *flags;
3913
  uint def_bufsz= *bufsz;
3914
3915
  /* Get cost/flags/mem_usage of default MRR implementation */
3916
  res= h->handler::multi_range_read_info(keyno, n_ranges, rows, &def_bufsz,
3917
                                         &def_flags, cost);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
3918
  assert(!res);
1 by brian
clean slate
3919
3920
  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) || 
3921
      choose_mrr_impl(keyno, rows, &def_flags, &def_bufsz, cost))
3922
  {
3923
    /* Default implementation is choosen */
3924
    *flags= def_flags;
3925
    *bufsz= def_bufsz;
3926
  }
3927
  return 0;
3928
}
3929
3930
3931
/**
3932
  DS-MRR Implementation: multi_range_read_info_const() function
3933
*/
3934
3935
ha_rows DsMrr_impl::dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq,
3936
                                 void *seq_init_param, uint n_ranges, 
3937
                                 uint *bufsz, uint *flags, COST_VECT *cost)
3938
{
3939
  ha_rows rows;
3940
  uint def_flags= *flags;
3941
  uint def_bufsz= *bufsz;
3942
  /* Get cost/flags/mem_usage of default MRR implementation */
3943
  rows= h->handler::multi_range_read_info_const(keyno, seq, seq_init_param,
3944
                                                n_ranges, &def_bufsz, 
3945
                                                &def_flags, cost);
3946
  if (rows == HA_POS_ERROR)
3947
  {
3948
    /* Default implementation can't perform MRR scan => we can't either */
3949
    return rows;
3950
  }
3951
3952
  /*
3953
    If HA_MRR_USE_DEFAULT_IMPL has been passed to us, that is an order to
3954
    use the default MRR implementation (we need it for UPDATE/DELETE).
3955
    Otherwise, make a choice based on cost and @@optimizer_use_mrr.
3956
  */
3957
  if ((*flags & HA_MRR_USE_DEFAULT_IMPL) ||
3958
      choose_mrr_impl(keyno, rows, flags, bufsz, cost))
3959
  {
3960
    *flags= def_flags;
3961
    *bufsz= def_bufsz;
3962
  }
3963
  else
3964
  {
3965
    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;
3966
  }
3967
  return rows;
3968
}
3969
3970
3971
/**
3972
  Check if key has partially-covered columns
3973
3974
  We can't use DS-MRR to perform range scans when the ranges are over
3975
  partially-covered keys, because we'll not have full key part values
3976
  (we'll have their prefixes from the index) and will not be able to check
3977
  if we've reached the end the range.
3978
3979
  @param keyno  Key to check
3980
3981
  @todo
3982
    Allow use of DS-MRR in cases where the index has partially-covered
3983
    components but they are not used for scanning.
3984
56 by brian
Next pass of true/false update.
3985
  @retval true   Yes
3986
  @retval false  No
1 by brian
clean slate
3987
*/
3988
3989
bool DsMrr_impl::key_uses_partial_cols(uint keyno)
3990
{
3991
  KEY_PART_INFO *kp= table->key_info[keyno].key_part;
3992
  KEY_PART_INFO *kp_end= kp + table->key_info[keyno].key_parts;
3993
  for (; kp != kp_end; kp++)
3994
  {
3995
    if (!kp->field->part_of_key.is_set(keyno))
56 by brian
Next pass of true/false update.
3996
      return true;
1 by brian
clean slate
3997
  }
56 by brian
Next pass of true/false update.
3998
  return false;
1 by brian
clean slate
3999
}
4000
4001
4002
/**
4003
  DS-MRR Internals: Choose between Default MRR implementation and DS-MRR
4004
4005
  Make the choice between using Default MRR implementation and DS-MRR.
4006
  This function contains common functionality factored out of dsmrr_info()
4007
  and dsmrr_info_const(). The function assumes that the default MRR
4008
  implementation's applicability requirements are satisfied.
4009
4010
  @param keyno       Index number
4011
  @param rows        E(full rows to be retrieved)
4012
  @param flags  IN   MRR flags provided by the MRR user
4013
                OUT  If DS-MRR is choosen, flags of DS-MRR implementation
4014
                     else the value is not modified
4015
  @param bufsz  IN   If DS-MRR is choosen, buffer use of DS-MRR implementation
4016
                     else the value is not modified
4017
  @param cost   IN   Cost of default MRR implementation
4018
                OUT  If DS-MRR is choosen, cost of DS-MRR scan
4019
                     else the value is not modified
4020
56 by brian
Next pass of true/false update.
4021
  @retval true   Default MRR implementation should be used
4022
  @retval false  DS-MRR implementation should be used
1 by brian
clean slate
4023
*/
4024
4025
bool DsMrr_impl::choose_mrr_impl(uint keyno, ha_rows rows, uint *flags,
4026
                                 uint *bufsz, COST_VECT *cost)
4027
{
4028
  COST_VECT dsmrr_cost;
4029
  bool res;
4030
  THD *thd= current_thd;
4031
  if ((thd->variables.optimizer_use_mrr == 2) || 
4032
      (*flags & HA_MRR_INDEX_ONLY) || (*flags & HA_MRR_SORTED) ||
4033
      (keyno == table->s->primary_key && 
4034
       h->primary_key_is_clustered()) || 
4035
       key_uses_partial_cols(keyno))
4036
  {
4037
    /* Use the default implementation */
4038
    *flags |= HA_MRR_USE_DEFAULT_IMPL;
56 by brian
Next pass of true/false update.
4039
    return true;
1 by brian
clean slate
4040
  }
4041
  
4042
  uint add_len= table->key_info[keyno].key_length + h->ref_length; 
4043
  *bufsz -= add_len;
4044
  if (get_disk_sweep_mrr_cost(keyno, rows, *flags, bufsz, &dsmrr_cost))
56 by brian
Next pass of true/false update.
4045
    return true;
1 by brian
clean slate
4046
  *bufsz += add_len;
4047
  
4048
  bool force_dsmrr;
4049
  /* 
4050
    If @@optimizer_use_mrr==force, then set cost of DS-MRR to be minimum of
4051
    DS-MRR and Default implementations cost. This allows one to force use of
4052
    DS-MRR whenever it is applicable without affecting other cost-based
4053
    choices.
4054
  */
4055
  if ((force_dsmrr= (thd->variables.optimizer_use_mrr == 1)) &&
4056
      dsmrr_cost.total_cost() > cost->total_cost())
4057
    dsmrr_cost= *cost;
4058
4059
  if (force_dsmrr || dsmrr_cost.total_cost() <= cost->total_cost())
4060
  {
4061
    *flags &= ~HA_MRR_USE_DEFAULT_IMPL;  /* Use the DS-MRR implementation */
4062
    *flags &= ~HA_MRR_SORTED;          /* We will return unordered output */
4063
    *cost= dsmrr_cost;
56 by brian
Next pass of true/false update.
4064
    res= false;
1 by brian
clean slate
4065
  }
4066
  else
4067
  {
4068
    /* Use the default MRR implementation */
56 by brian
Next pass of true/false update.
4069
    res= true;
1 by brian
clean slate
4070
  }
4071
  return res;
4072
}
4073
4074
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
4075
static void get_sort_and_sweep_cost(Table *table, ha_rows nrows, COST_VECT *cost);
1 by brian
clean slate
4076
4077
4078
/**
4079
  Get cost of DS-MRR scan
4080
4081
  @param keynr              Index to be used
4082
  @param rows               E(Number of rows to be scanned)
4083
  @param flags              Scan parameters (HA_MRR_* flags)
4084
  @param buffer_size INOUT  Buffer size
4085
  @param cost        OUT    The cost
4086
56 by brian
Next pass of true/false update.
4087
  @retval false  OK
4088
  @retval true   Error, DS-MRR cannot be used (the buffer is too small
1 by brian
clean slate
4089
                 for even 1 rowid)
4090
*/
4091
4092
bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
4093
                                         uint *buffer_size, COST_VECT *cost)
4094
{
61 by Brian Aker
Conversion of handler type.
4095
  uint32_t max_buff_entries, elem_size;
1 by brian
clean slate
4096
  ha_rows rows_in_full_step, rows_in_last_step;
4097
  uint n_full_steps;
4098
  double index_read_cost;
4099
4100
  elem_size= h->ref_length + sizeof(void*) * (!test(flags & HA_MRR_NO_ASSOCIATION));
4101
  max_buff_entries = *buffer_size / elem_size;
4102
4103
  if (!max_buff_entries)
56 by brian
Next pass of true/false update.
4104
    return true; /* Buffer has not enough space for even 1 rowid */
1 by brian
clean slate
4105
4106
  /* Number of iterations we'll make with full buffer */
4107
  n_full_steps= (uint)floor(rows2double(rows) / max_buff_entries);
4108
  
4109
  /* 
4110
    Get numbers of rows we'll be processing in 
4111
     - non-last sweep, with full buffer 
4112
     - last iteration, with non-full buffer
4113
  */
4114
  rows_in_full_step= max_buff_entries;
4115
  rows_in_last_step= rows % max_buff_entries;
4116
  
4117
  /* Adjust buffer size if we expect to use only part of the buffer */
4118
  if (n_full_steps)
4119
  {
4120
    get_sort_and_sweep_cost(table, rows, cost);
4121
    cost->multiply(n_full_steps);
4122
  }
4123
  else
4124
  {
4125
    cost->zero();
287.3.8 by Monty Taylor
Oy. Replaced max and min macros with std::max and std::min so that we get
4126
    *buffer_size= max((ulong)*buffer_size, 
1 by brian
clean slate
4127
                      (size_t)(1.2*rows_in_last_step) * elem_size + 
4128
                      h->ref_length + table->key_info[keynr].key_length);
4129
  }
4130
  
4131
  COST_VECT last_step_cost;
4132
  get_sort_and_sweep_cost(table, rows_in_last_step, &last_step_cost);
4133
  cost->add(&last_step_cost);
4134
 
4135
  if (n_full_steps != 0)
4136
    cost->mem_cost= *buffer_size;
4137
  else
4138
    cost->mem_cost= (double)rows_in_last_step * elem_size;
4139
  
4140
  /* Total cost of all index accesses */
4141
  index_read_cost= h->index_only_read_time(keynr, (double)rows);
4142
  cost->add_io(index_read_cost, 1 /* Random seeks */);
56 by brian
Next pass of true/false update.
4143
  return false;
1 by brian
clean slate
4144
}
4145
4146
4147
/* 
4148
  Get cost of one sort-and-sweep step
4149
4150
  SYNOPSIS
4151
    get_sort_and_sweep_cost()
4152
      table       Table being accessed
4153
      nrows       Number of rows to be sorted and retrieved
4154
      cost   OUT  The cost
4155
4156
  DESCRIPTION
4157
    Get cost of these operations:
4158
     - sort an array of #nrows ROWIDs using qsort
4159
     - read #nrows records from table in a sweep.
4160
*/
4161
4162
static 
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
4163
void get_sort_and_sweep_cost(Table *table, ha_rows nrows, COST_VECT *cost)
1 by brian
clean slate
4164
{
4165
  if (nrows)
4166
  {
56 by brian
Next pass of true/false update.
4167
    get_sweep_read_cost(table, nrows, false, cost);
1 by brian
clean slate
4168
    /* Add cost of qsort call: n * log2(n) * cost(rowid_comparison) */
4169
    double cmp_op= rows2double(nrows) * (1.0 / TIME_FOR_COMPARE_ROWID);
4170
    if (cmp_op < 3)
4171
      cmp_op= 3;
4172
    cost->cpu_cost += cmp_op * log2(cmp_op);
4173
  }
4174
  else
4175
    cost->zero();
4176
}
4177
4178
4179
/**
4180
  Get cost of reading nrows table records in a "disk sweep"
4181
4182
  A disk sweep read is a sequence of handler->rnd_pos(rowid) calls that made
4183
  for an ordered sequence of rowids.
4184
4185
  We assume hard disk IO. The read is performed as follows:
4186
4187
   1. The disk head is moved to the needed cylinder
4188
   2. The controller waits for the plate to rotate
4189
   3. The data is transferred
4190
4191
  Time to do #3 is insignificant compared to #2+#1.
4192
4193
  Time to move the disk head is proportional to head travel distance.
4194
4195
  Time to wait for the plate to rotate depends on whether the disk head
4196
  was moved or not. 
4197
4198
  If disk head wasn't moved, the wait time is proportional to distance
4199
  between the previous block and the block we're reading.
4200
4201
  If the head was moved, we don't know how much we'll need to wait for the
4202
  plate to rotate. We assume the wait time to be a variate with a mean of
4203
  0.5 of full rotation time.
4204
4205
  Our cost units are "random disk seeks". The cost of random disk seek is
4206
  actually not a constant, it depends one range of cylinders we're going
4207
  to access. We make it constant by introducing a fuzzy concept of "typical 
4208
  datafile length" (it's fuzzy as it's hard to tell whether it should
4209
  include index file, temp.tables etc). Then random seek cost is:
4210
4211
    1 = half_rotation_cost + move_cost * 1/3 * typical_data_file_length
4212
4213
  We define half_rotation_cost as DISK_SEEK_BASE_COST=0.9.
4214
4215
  @param table             Table to be accessed
4216
  @param nrows             Number of rows to retrieve
56 by brian
Next pass of true/false update.
4217
  @param interrupted       true <=> Assume that the disk sweep will be
4218
                           interrupted by other disk IO. false - otherwise.
1 by brian
clean slate
4219
  @param cost         OUT  The cost.
4220
*/
4221
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
4222
void get_sweep_read_cost(Table *table, ha_rows nrows, bool interrupted, 
1 by brian
clean slate
4223
                         COST_VECT *cost)
4224
{
4225
  cost->zero();
4226
  if (table->file->primary_key_is_clustered())
4227
  {
4228
    cost->io_count= table->file->read_time(table->s->primary_key,
4229
                                           (uint) nrows, nrows);
4230
  }
4231
  else
4232
  {
4233
    double n_blocks=
151 by Brian Aker
Ulonglong to uint64_t
4234
      ceil(uint64_t2double(table->file->stats.data_file_length) / IO_SIZE);
1 by brian
clean slate
4235
    double busy_blocks=
4236
      n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, rows2double(nrows)));
4237
    if (busy_blocks < 1.0)
4238
      busy_blocks= 1.0;
4239
4240
    cost->io_count= busy_blocks;
4241
4242
    if (!interrupted)
4243
    {
4244
      /* Assume reading is done in one 'sweep' */
4245
      cost->avg_io_cost= (DISK_SEEK_BASE_COST +
4246
                          DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
4247
    }
4248
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4249
  return;
1 by brian
clean slate
4250
}
4251
4252
4253
/* **************************************************************************
4254
 * DS-MRR implementation ends
4255
 ***************************************************************************/
4256
4257
/**
4258
  Read first row between two ranges.
4259
4260
  @param start_key		Start key. Is 0 if no min range
4261
  @param end_key		End key.  Is 0 if no max range
4262
  @param eq_range_arg	        Set to 1 if start_key == end_key
4263
  @param sorted		Set to 1 if result should be sorted per key
4264
4265
  @note
4266
    Record is read into table->record[0]
4267
4268
  @retval
4269
    0			Found row
4270
  @retval
4271
    HA_ERR_END_OF_FILE	No rows in range
4272
  @retval
4273
    \#			Error code
4274
*/
4275
int handler::read_range_first(const key_range *start_key,
4276
			      const key_range *end_key,
4277
			      bool eq_range_arg,
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
4278
                              bool sorted  __attribute__((unused)))
1 by brian
clean slate
4279
{
4280
  int result;
4281
4282
  eq_range= eq_range_arg;
4283
  end_range= 0;
4284
  if (end_key)
4285
  {
4286
    end_range= &save_end_range;
4287
    save_end_range= *end_key;
4288
    key_compare_result_on_equal= ((end_key->flag == HA_READ_BEFORE_KEY) ? 1 :
4289
				  (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
4290
  }
4291
  range_key_part= table->key_info[active_index].key_part;
4292
4293
  if (!start_key)			// Read first record
4294
    result= index_first(table->record[0]);
4295
  else
4296
    result= index_read_map(table->record[0],
4297
                           start_key->key,
4298
                           start_key->keypart_map,
4299
                           start_key->flag);
4300
  if (result)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4301
    return((result == HA_ERR_KEY_NOT_FOUND) 
1 by brian
clean slate
4302
		? HA_ERR_END_OF_FILE
4303
		: result);
4304
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4305
  return (compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
1 by brian
clean slate
4306
}
4307
4308
4309
/**
4310
  Read next row between two endpoints.
4311
4312
  @note
4313
    Record is read into table->record[0]
4314
4315
  @retval
4316
    0			Found row
4317
  @retval
4318
    HA_ERR_END_OF_FILE	No rows in range
4319
  @retval
4320
    \#			Error code
4321
*/
4322
int handler::read_range_next()
4323
{
4324
  int result;
4325
4326
  if (eq_range)
4327
  {
4328
    /* We trust that index_next_same always gives a row in range */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4329
    return(index_next_same(table->record[0],
1 by brian
clean slate
4330
                                end_range->key,
4331
                                end_range->length));
4332
  }
4333
  result= index_next(table->record[0]);
4334
  if (result)
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4335
    return(result);
4336
  return(compare_key(end_range) <= 0 ? 0 : HA_ERR_END_OF_FILE);
1 by brian
clean slate
4337
}
4338
4339
4340
/**
4341
  Compare if found key (in row) is over max-value.
4342
4343
  @param range		range to compare to row. May be 0 for no range
4344
4345
  @seealso
4346
    key.cc::key_cmp()
4347
4348
  @return
4349
    The return value is SIGN(key_in_row - range_key):
4350
4351
    - 0   : Key is equal to range or 'range' == 0 (no range)
4352
    - -1  : Key is less than range
4353
    - 1   : Key is larger than range
4354
*/
4355
int handler::compare_key(key_range *range)
4356
{
4357
  int cmp;
4358
  if (!range || in_range_check_pushed_down)
4359
    return 0;					// No max range
4360
  cmp= key_cmp(range_key_part, range->key, range->length);
4361
  if (!cmp)
4362
    cmp= key_compare_result_on_equal;
4363
  return cmp;
4364
}
4365
4366
4367
/*
4368
  Same as compare_key() but doesn't check have in_range_check_pushed_down.
4369
  This is used by index condition pushdown implementation.
4370
*/
4371
4372
int handler::compare_key2(key_range *range)
4373
{
4374
  int cmp;
4375
  if (!range)
4376
    return 0;					// no max range
4377
  cmp= key_cmp(range_key_part, range->key, range->length);
4378
  if (!cmp)
4379
    cmp= key_compare_result_on_equal;
4380
  return cmp;
4381
}
4382
4383
int handler::index_read_idx_map(uchar * buf, uint index, const uchar * key,
4384
                                key_part_map keypart_map,
4385
                                enum ha_rkey_function find_flag)
4386
{
4387
  int error, error1;
4388
  error= index_init(index, 0);
4389
  if (!error)
4390
  {
4391
    error= index_read_map(buf, key, keypart_map, find_flag);
4392
    error1= index_end();
4393
  }
4394
  return error ?  error : error1;
4395
}
4396
4397
4398
/**
4399
  Returns a list of all known extensions.
4400
4401
    No mutexes, worst case race is a minor surplus memory allocation
4402
    We have to recreate the extension map if mysqld is restarted (for example
4403
    within libmysqld)
4404
4405
  @retval
4406
    pointer		pointer to TYPELIB structure
4407
*/
212.1.3 by Monty Taylor
Renamed __attribute__((__unused__)) to __attribute__((unused)).
4408
static bool exts_handlerton(THD *unused __attribute__((unused)),
149 by Brian Aker
More bool conversion.
4409
                            plugin_ref plugin,
4410
                            void *arg)
1 by brian
clean slate
4411
{
4412
  List<char> *found_exts= (List<char> *) arg;
4413
  handlerton *hton= plugin_data(plugin, handlerton *);
4414
  handler *file;
4415
  if (hton->state == SHOW_OPTION_YES && hton->create &&
4416
      (file= hton->create(hton, (TABLE_SHARE*) 0, current_thd->mem_root)))
4417
  {
4418
    List_iterator_fast<char> it(*found_exts);
4419
    const char **ext, *old_ext;
4420
4421
    for (ext= file->bas_ext(); *ext; ext++)
4422
    {
4423
      while ((old_ext= it++))
4424
      {
4425
        if (!strcmp(old_ext, *ext))
4426
	  break;
4427
      }
4428
      if (!old_ext)
4429
        found_exts->push_back((char *) *ext);
4430
4431
      it.rewind();
4432
    }
4433
    delete file;
4434
  }
56 by brian
Next pass of true/false update.
4435
  return false;
1 by brian
clean slate
4436
}
4437
4438
TYPELIB *ha_known_exts(void)
4439
{
4440
  if (!known_extensions.type_names || mysys_usage_id != known_extensions_id)
4441
  {
4442
    List<char> found_exts;
4443
    const char **ext, *old_ext;
4444
4445
    known_extensions_id= mysys_usage_id;
4446
4447
    plugin_foreach(NULL, exts_handlerton,
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
4448
                   DRIZZLE_STORAGE_ENGINE_PLUGIN, &found_exts);
1 by brian
clean slate
4449
4450
    ext= (const char **) my_once_alloc(sizeof(char *)*
4451
                                       (found_exts.elements+1),
4452
                                       MYF(MY_WME | MY_FAE));
4453
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4454
    assert(ext != 0);
1 by brian
clean slate
4455
    known_extensions.count= found_exts.elements;
4456
    known_extensions.type_names= ext;
4457
4458
    List_iterator_fast<char> it(found_exts);
4459
    while ((old_ext= it++))
4460
      *ext++= old_ext;
4461
    *ext= 0;
4462
  }
4463
  return &known_extensions;
4464
}
4465
4466
4467
static bool stat_print(THD *thd, const char *type, uint type_len,
4468
                       const char *file, uint file_len,
4469
                       const char *status, uint status_len)
4470
{
4471
  Protocol *protocol= thd->protocol;
4472
  protocol->prepare_for_resend();
4473
  protocol->store(type, type_len, system_charset_info);
4474
  protocol->store(file, file_len, system_charset_info);
4475
  protocol->store(status, status_len, system_charset_info);
4476
  if (protocol->write())
56 by brian
Next pass of true/false update.
4477
    return true;
4478
  return false;
1 by brian
clean slate
4479
}
4480
4481
bool ha_show_status(THD *thd, handlerton *db_type, enum ha_stat_type stat)
4482
{
4483
  List<Item> field_list;
4484
  Protocol *protocol= thd->protocol;
4485
  bool result;
4486
4487
  field_list.push_back(new Item_empty_string("Type",10));
4488
  field_list.push_back(new Item_empty_string("Name",FN_REFLEN));
4489
  field_list.push_back(new Item_empty_string("Status",10));
4490
4491
  if (protocol->send_fields(&field_list,
4492
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
56 by brian
Next pass of true/false update.
4493
    return true;
1 by brian
clean slate
4494
12.1.1 by Brian Aker
Cleaned up show status.
4495
  result= db_type->show_status &&
4496
    db_type->show_status(db_type, thd, stat_print, stat) ? 1 : 0;
1 by brian
clean slate
4497
4498
  if (!result)
4499
    my_eof(thd);
4500
  return result;
4501
}
4502
4503
4504
/**
4505
  Check if the conditions for row-based binlogging is correct for the table.
4506
4507
  A row in the given table should be replicated if:
4508
  - Row-based replication is enabled in the current thread
4509
  - The binlog is enabled
4510
  - It is not a temporary table
4511
  - The binary log is open
4512
  - The database the table resides in shall be binlogged (binlog_*_db rules)
4513
  - table is not mysql.event
4514
*/
4515
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
4516
static bool check_table_binlog_row_based(THD *thd, Table *table)
1 by brian
clean slate
4517
{
4518
  if (table->s->cached_row_logging_check == -1)
4519
  {
4520
    int const check(table->s->tmp_table == NO_TMP_TABLE &&
4521
                    binlog_filter->db_ok(table->s->db.str));
4522
    table->s->cached_row_logging_check= check;
4523
  }
4524
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4525
  assert(table->s->cached_row_logging_check == 0 ||
1 by brian
clean slate
4526
              table->s->cached_row_logging_check == 1);
4527
4528
  return (thd->current_stmt_binlog_row_based &&
4529
          table->s->cached_row_logging_check &&
4530
          (thd->options & OPTION_BIN_LOG) &&
4531
          mysql_bin_log.is_open());
4532
}
4533
4534
4535
/**
4536
   Write table maps for all (manually or automatically) locked tables
4537
   to the binary log.
4538
4539
   This function will generate and write table maps for all tables
4540
   that are locked by the thread 'thd'.  Either manually locked
4541
   (stored in THD::locked_tables) and automatically locked (stored
4542
   in THD::lock) are considered.
4543
4544
   @param thd     Pointer to THD structure
4545
4546
   @retval 0   All OK
4547
   @retval 1   Failed to write all table maps
4548
4549
   @sa
4550
       THD::lock
4551
       THD::locked_tables
4552
*/
4553
4554
static int write_locked_table_maps(THD *thd)
4555
{
4556
  if (thd->get_binlog_table_maps() == 0)
4557
  {
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
4558
    DRIZZLE_LOCK *locks[3];
1 by brian
clean slate
4559
    locks[0]= thd->extra_lock;
4560
    locks[1]= thd->lock;
4561
    locks[2]= thd->locked_tables;
4562
    for (uint i= 0 ; i < sizeof(locks)/sizeof(*locks) ; ++i )
4563
    {
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
4564
      DRIZZLE_LOCK const *const lock= locks[i];
1 by brian
clean slate
4565
      if (lock == NULL)
4566
        continue;
4567
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
4568
      Table **const end_ptr= lock->table + lock->table_count;
4569
      for (Table **table_ptr= lock->table ; 
1 by brian
clean slate
4570
           table_ptr != end_ptr ;
4571
           ++table_ptr)
4572
      {
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
4573
        Table *const table= *table_ptr;
1 by brian
clean slate
4574
        if (table->current_lock == F_WRLCK &&
4575
            check_table_binlog_row_based(thd, table))
4576
        {
4577
          int const has_trans= table->file->has_transactions();
4578
          int const error= thd->binlog_write_table_map(table, has_trans);
4579
          /*
4580
            If an error occurs, it is the responsibility of the caller to
4581
            roll back the transaction.
4582
          */
4583
          if (unlikely(error))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4584
            return(1);
1 by brian
clean slate
4585
        }
4586
      }
4587
    }
4588
  }
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4589
  return(0);
1 by brian
clean slate
4590
}
4591
4592
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
4593
typedef bool Log_func(THD*, Table*, bool, const uchar*, const uchar*);
1 by brian
clean slate
4594
327.1.5 by Brian Aker
Refactor around classes. TABLE_LIST has been factored out of table.h
4595
static int binlog_log_row(Table* table,
1 by brian
clean slate
4596
                          const uchar *before_record,
4597
                          const uchar *after_record,
4598
                          Log_func *log_func)
4599
{
4600
  if (table->no_replicate)
4601
    return 0;
4602
  bool error= 0;
4603
  THD *const thd= table->in_use;
4604
4605
  if (check_table_binlog_row_based(thd, table))
4606
  {
4607
    /*
4608
      If there are no table maps written to the binary log, this is
4609
      the first row handled in this statement. In that case, we need
4610
      to write table maps for all locked tables to the binary log.
4611
    */
4612
    if (likely(!(error= write_locked_table_maps(thd))))
4613
    {
4614
      bool const has_trans= table->file->has_transactions();
4615
      error= (*log_func)(thd, table, has_trans, before_record, after_record);
4616
    }
4617
  }
4618
  return error ? HA_ERR_RBR_LOGGING_FAILED : 0;
4619
}
4620
4621
int handler::ha_external_lock(THD *thd, int lock_type)
4622
{
4623
  /*
4624
    Whether this is lock or unlock, this should be true, and is to verify that
4625
    if get_auto_increment() was called (thus may have reserved intervals or
4626
    taken a table lock), ha_release_auto_increment() was too.
4627
  */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4628
  assert(next_insert_id == 0);
1 by brian
clean slate
4629
4630
  /*
4631
    We cache the table flags if the locking succeeded. Otherwise, we
4632
    keep them as they were when they were fetched in ha_open().
4633
  */
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
4634
  DRIZZLE_EXTERNAL_LOCK(lock_type);
1 by brian
clean slate
4635
4636
  int error= external_lock(thd, lock_type);
4637
  if (error == 0)
4638
    cached_table_flags= table_flags();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4639
  return(error);
1 by brian
clean slate
4640
}
4641
4642
4643
/**
4644
  Check handler usage and reset state of file to after 'open'
4645
*/
4646
int handler::ha_reset()
4647
{
4648
  /* Check that we have called all proper deallocation functions */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4649
  assert((uchar*) table->def_read_set.bitmap +
1 by brian
clean slate
4650
              table->s->column_bitmap_size ==
4651
              (uchar*) table->def_write_set.bitmap);
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4652
  assert(bitmap_is_set_all(&table->s->all_set));
4653
  assert(table->key_read == 0);
1 by brian
clean slate
4654
  /* ensure that ha_index_end / ha_rnd_end has been called */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4655
  assert(inited == NONE);
1 by brian
clean slate
4656
  /* Free cache used by filesort */
4657
  free_io_cache(table);
4658
  /* reset the bitmaps to point to defaults */
4659
  table->default_column_bitmaps();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4660
  return(reset());
1 by brian
clean slate
4661
}
4662
4663
4664
int handler::ha_write_row(uchar *buf)
4665
{
4666
  int error;
4667
  Log_func *log_func= Write_rows_log_event::binlog_row_logging_function;
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
4668
  DRIZZLE_INSERT_ROW_START();
1 by brian
clean slate
4669
4670
  mark_trx_read_write();
4671
4672
  if (unlikely(error= write_row(buf)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4673
    return(error);
1 by brian
clean slate
4674
  if (unlikely(error= binlog_log_row(table, 0, buf, log_func)))
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4675
    return(error); /* purecov: inspected */
319.1.1 by Grant Limberg
renamed all instances of MYSQL_ to DRIZZLE_
4676
  DRIZZLE_INSERT_ROW_END();
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4677
  return(0);
1 by brian
clean slate
4678
}
4679
4680
4681
int handler::ha_update_row(const uchar *old_data, uchar *new_data)
4682
{
4683
  int error;
4684
  Log_func *log_func= Update_rows_log_event::binlog_row_logging_function;
4685
4686
  /*
4687
    Some storage engines require that the new record is in record[0]
4688
    (and the old record is in record[1]).
4689
   */
51.1.77 by Jay Pipes
Standardized TRUE/FALSE, removed/replaced DBUG symbols
4690
  assert(new_data == table->record[0]);
1 by brian
clean slate
4691
4692
  mark_trx_read_write();
4693
4694
  if (unlikely(error= update_row(old_data, new_data)))
4695
    return error;
4696
  if (unlikely(error= binlog_log_row(table, old_data, new_data, log_func)))
4697
    return error;
4698
  return 0;
4699
}
4700
4701
int handler::ha_delete_row(const uchar *buf)
4702
{
4703
  int error;
4704
  Log_func *log_func= Delete_rows_log_event::binlog_row_logging_function;
4705
4706
  mark_trx_read_write();
4707
4708
  if (unlikely(error= delete_row(buf)))
4709
    return error;
4710
  if (unlikely(error= binlog_log_row(table, buf, 0, log_func)))
4711
    return error;
4712
  return 0;
4713
}
4714
4715
4716
4717
/**
4718
  @details
4719
  use_hidden_primary_key() is called in case of an update/delete when
4720
  (table_flags() and HA_PRIMARY_KEY_REQUIRED_FOR_DELETE) is defined
4721
  but we don't have a primary key
4722
*/
4723
void handler::use_hidden_primary_key()
4724
{
4725
  /* fallback to use all columns in the table to identify row */
4726
  table->use_all_columns();
4727
}