~drizzle-trunk/drizzle/development

1208.3.3 by brian
Adding missingfile.
1
/* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2
 *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3
 *
4
 *  Copyright (C) 2008 Sun Microsystems
5
 *
6
 *  This program is free software; you can redistribute it and/or modify
7
 *  it under the terms of the GNU General Public License as published by
8
 *  the Free Software Foundation; version 2 of the License.
9
 *
10
 *  This program is distributed in the hope that it will be useful,
11
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 *  GNU General Public License for more details.
14
 *
15
 *  You should have received a copy of the GNU General Public License
16
 *  along with this program; if not, write to the Free Software
17
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18
 */
19
20
/**
21
  @file Cursor.cc
22
23
  Handler-calling-functions
24
*/
25
1241.9.36 by Monty Taylor
ZOMG. I deleted drizzled/server_includes.h.
26
#include "config.h"
1241.9.57 by Monty Taylor
Oy. Bigger change than I normally like - but this stuff is all intertwined.
27
#include "drizzled/my_hash.h"
1208.3.3 by brian
Adding missingfile.
28
#include "drizzled/error.h"
29
#include "drizzled/gettext.h"
30
#include "drizzled/probes.h"
31
#include "drizzled/sql_parse.h"
32
#include "drizzled/cost_vect.h"
33
#include "drizzled/session.h"
34
#include "drizzled/sql_base.h"
35
#include "drizzled/replication_services.h"
36
#include "drizzled/lock.h"
37
#include "drizzled/item/int.h"
38
#include "drizzled/item/empty_string.h"
39
#include "drizzled/unireg.h" // for mysql_frm_type
40
#include "drizzled/field/timestamp.h"
41
#include "drizzled/message/table.pb.h"
42
#include "drizzled/plugin/client.h"
1241.9.64 by Monty Taylor
Moved remaining non-public portions of mysys and mystrings to drizzled/internal.
43
#include "drizzled/internal/my_sys.h"
1208.3.3 by brian
Adding missingfile.
44
45
using namespace std;
46
using namespace drizzled;
47
48
KEY_CREATE_INFO default_key_create_info= { HA_KEY_ALG_UNDEF, 0, {NULL,0} };
49
50
/* number of entries in storage_engines[] */
51
uint32_t total_ha= 0;
52
/* number of storage engines (from storage_engines[]) that support 2pc */
53
uint32_t total_ha_2pc= 0;
54
/* size of savepoint storage area (see ha_init) */
55
uint32_t savepoint_alloc_size= 0;
56
57
const char *ha_row_type[] = {
58
  "", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE", "?","?","?"
59
};
60
61
62
63
/**
64
  Register Cursor error messages for use with my_error().
65
66
  @retval
67
    0           OK
68
  @retval
69
    !=0         Error
70
*/
71
72
int ha_init_errors(void)
73
{
74
#define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg)
75
  const char    **errmsgs;
76
77
  /* Allocate a pointer array for the error message strings. */
78
  /* Zerofill it to avoid uninitialized gaps. */
79
  if (! (errmsgs= (const char**) malloc(HA_ERR_ERRORS * sizeof(char*))))
80
    return 1;
81
  memset(errmsgs, 0, HA_ERR_ERRORS * sizeof(char *));
82
83
  /* Set the dedicated error messages. */
84
  SETMSG(HA_ERR_KEY_NOT_FOUND,          ER(ER_KEY_NOT_FOUND));
85
  SETMSG(HA_ERR_FOUND_DUPP_KEY,         ER(ER_DUP_KEY));
86
  SETMSG(HA_ERR_RECORD_CHANGED,         "Update wich is recoverable");
87
  SETMSG(HA_ERR_WRONG_INDEX,            "Wrong index given to function");
88
  SETMSG(HA_ERR_CRASHED,                ER(ER_NOT_KEYFILE));
89
  SETMSG(HA_ERR_WRONG_IN_RECORD,        ER(ER_CRASHED_ON_USAGE));
90
  SETMSG(HA_ERR_OUT_OF_MEM,             "Table Cursor out of memory");
91
  SETMSG(HA_ERR_NOT_A_TABLE,            "Incorrect file format '%.64s'");
92
  SETMSG(HA_ERR_WRONG_COMMAND,          "Command not supported");
93
  SETMSG(HA_ERR_OLD_FILE,               ER(ER_OLD_KEYFILE));
94
  SETMSG(HA_ERR_NO_ACTIVE_RECORD,       "No record read in update");
95
  SETMSG(HA_ERR_RECORD_DELETED,         "Intern record deleted");
96
  SETMSG(HA_ERR_RECORD_FILE_FULL,       ER(ER_RECORD_FILE_FULL));
97
  SETMSG(HA_ERR_INDEX_FILE_FULL,        "No more room in index file '%.64s'");
98
  SETMSG(HA_ERR_END_OF_FILE,            "End in next/prev/first/last");
99
  SETMSG(HA_ERR_UNSUPPORTED,            ER(ER_ILLEGAL_HA));
100
  SETMSG(HA_ERR_TO_BIG_ROW,             "Too big row");
101
  SETMSG(HA_WRONG_CREATE_OPTION,        "Wrong create option");
102
  SETMSG(HA_ERR_FOUND_DUPP_UNIQUE,      ER(ER_DUP_UNIQUE));
103
  SETMSG(HA_ERR_UNKNOWN_CHARSET,        "Can't open charset");
104
  SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF,    ER(ER_WRONG_MRG_TABLE));
105
  SETMSG(HA_ERR_CRASHED_ON_REPAIR,      ER(ER_CRASHED_ON_REPAIR));
106
  SETMSG(HA_ERR_CRASHED_ON_USAGE,       ER(ER_CRASHED_ON_USAGE));
107
  SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT,      ER(ER_LOCK_WAIT_TIMEOUT));
108
  SETMSG(HA_ERR_LOCK_TABLE_FULL,        ER(ER_LOCK_TABLE_FULL));
109
  SETMSG(HA_ERR_READ_ONLY_TRANSACTION,  ER(ER_READ_ONLY_TRANSACTION));
110
  SETMSG(HA_ERR_LOCK_DEADLOCK,          ER(ER_LOCK_DEADLOCK));
111
  SETMSG(HA_ERR_CANNOT_ADD_FOREIGN,     ER(ER_CANNOT_ADD_FOREIGN));
112
  SETMSG(HA_ERR_NO_REFERENCED_ROW,      ER(ER_NO_REFERENCED_ROW_2));
113
  SETMSG(HA_ERR_ROW_IS_REFERENCED,      ER(ER_ROW_IS_REFERENCED_2));
114
  SETMSG(HA_ERR_NO_SAVEPOINT,           "No savepoint with that name");
115
  SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE,  "Non unique key block size");
116
  SETMSG(HA_ERR_NO_SUCH_TABLE,          "No such table: '%.64s'");
117
  SETMSG(HA_ERR_TABLE_EXIST,            ER(ER_TABLE_EXISTS_ERROR));
118
  SETMSG(HA_ERR_NO_CONNECTION,          "Could not connect to storage engine");
119
  SETMSG(HA_ERR_TABLE_DEF_CHANGED,      ER(ER_TABLE_DEF_CHANGED));
120
  SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY,  "FK constraint would lead to duplicate key");
121
  SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE,    ER(ER_TABLE_NEEDS_UPGRADE));
122
  SETMSG(HA_ERR_TABLE_READONLY,         ER(ER_OPEN_AS_READONLY));
123
  SETMSG(HA_ERR_AUTOINC_READ_FAILED,    ER(ER_AUTOINC_READ_FAILED));
124
  SETMSG(HA_ERR_AUTOINC_ERANGE,         ER(ER_WARN_DATA_OUT_OF_RANGE));
125
126
  /* Register the error messages for use with my_error(). */
127
  return my_error_register(errmsgs, HA_ERR_FIRST, HA_ERR_LAST);
128
}
129
130
131
/**
132
  Unregister Cursor error messages.
133
134
  @retval
135
    0           OK
136
  @retval
137
    !=0         Error
138
*/
139
static int ha_finish_errors(void)
140
{
141
  const char    **errmsgs;
142
143
  /* Allocate a pointer array for the error message strings. */
144
  if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST)))
145
    return 1;
146
  free((unsigned char*) errmsgs);
147
  return 0;
148
}
149
150
int ha_init()
151
{
152
  int error= 0;
153
154
  assert(total_ha < MAX_HA);
155
  /*
156
    Check if there is a transaction-capable storage engine besides the
157
    binary log (which is considered a transaction-capable storage engine in
158
    counting total_ha)
159
  */
160
  savepoint_alloc_size+= sizeof(SAVEPOINT);
161
  return error;
162
}
163
164
int ha_end()
165
{
166
  int error= 0;
167
168
  /*
169
    This should be eventualy based  on the graceful shutdown flag.
170
    So if flag is equal to HA_PANIC_CLOSE, the deallocate
171
    the errors.
172
  */
173
  if (ha_finish_errors())
174
    error= 1;
175
176
  return error;
177
}
178
179
180
181
/* ========================================================================
182
 ======================= TRANSACTIONS ===================================*/
183
184
/**
185
  Transaction handling in the server
186
  ==================================
187
188
  In each client connection, MySQL maintains two transactional
189
  states:
190
  - a statement transaction,
191
  - a standard, also called normal transaction.
192
193
  Historical note
194
  ---------------
195
  "Statement transaction" is a non-standard term that comes
196
  from the times when MySQL supported BerkeleyDB storage engine.
197
198
  First of all, it should be said that in BerkeleyDB auto-commit
199
  mode auto-commits operations that are atomic to the storage
200
  engine itself, such as a write of a record, and are too
201
  high-granular to be atomic from the application perspective
202
  (MySQL). One SQL statement could involve many BerkeleyDB
203
  auto-committed operations and thus BerkeleyDB auto-commit was of
204
  little use to MySQL.
205
206
  Secondly, instead of SQL standard savepoints, BerkeleyDB
207
  provided the concept of "nested transactions". In a nutshell,
208
  transactions could be arbitrarily nested, but when the parent
209
  transaction was committed or aborted, all its child (nested)
210
  transactions were handled committed or aborted as well.
211
  Commit of a nested transaction, in turn, made its changes
212
  visible, but not durable: it destroyed the nested transaction,
213
  all its changes would become available to the parent and
214
  currently active nested transactions of this parent.
215
216
  So the mechanism of nested transactions was employed to
217
  provide "all or nothing" guarantee of SQL statements
218
  required by the standard.
219
  A nested transaction would be created at start of each SQL
220
  statement, and destroyed (committed or aborted) at statement
221
  end. Such nested transaction was internally referred to as
222
  a "statement transaction" and gave birth to the term.
223
224
  <Historical note ends>
225
226
  Since then a statement transaction is started for each statement
227
  that accesses transactional tables or uses the binary log.  If
228
  the statement succeeds, the statement transaction is committed.
229
  If the statement fails, the transaction is rolled back. Commits
230
  of statement transactions are not durable -- each such
231
  transaction is nested in the normal transaction, and if the
232
  normal transaction is rolled back, the effects of all enclosed
233
  statement transactions are undone as well.  Technically,
234
  a statement transaction can be viewed as a savepoint which is
235
  maintained automatically in order to make effects of one
236
  statement atomic.
237
238
  The normal transaction is started by the user and is ended
239
  usually upon a user request as well. The normal transaction
240
  encloses transactions of all statements issued between
241
  its beginning and its end.
242
  In autocommit mode, the normal transaction is equivalent
243
  to the statement transaction.
244
245
  Since MySQL supports PSEA (pluggable storage engine
246
  architecture), more than one transactional engine can be
247
  active at a time. Hence transactions, from the server
248
  point of view, are always distributed. In particular,
249
  transactional state is maintained independently for each
250
  engine. In order to commit a transaction the two phase
251
  commit protocol is employed.
252
253
  Not all statements are executed in context of a transaction.
254
  Administrative and status information statements do not modify
255
  engine data, and thus do not start a statement transaction and
256
  also have no effect on the normal transaction. Examples of such
257
  statements are SHOW STATUS and RESET SLAVE.
258
259
  Similarly DDL statements are not transactional,
260
  and therefore a transaction is [almost] never started for a DDL
261
  statement. The difference between a DDL statement and a purely
262
  administrative statement though is that a DDL statement always
263
  commits the current transaction before proceeding, if there is
264
  any.
265
266
  At last, SQL statements that work with non-transactional
267
  engines also have no effect on the transaction state of the
268
  connection. Even though they are written to the binary log,
269
  and the binary log is, overall, transactional, the writes
270
  are done in "write-through" mode, directly to the binlog
271
  file, followed with a OS cache sync, in other words,
272
  bypassing the binlog undo log (translog).
273
  They do not commit the current normal transaction.
274
  A failure of a statement that uses non-transactional tables
275
  would cause a rollback of the statement transaction, but
276
  in case there no non-transactional tables are used,
277
  no statement transaction is started.
278
279
  Data layout
280
  -----------
281
282
  The server stores its transaction-related data in
283
  session->transaction. This structure has two members of type
284
  Session_TRANS. These members correspond to the statement and
285
  normal transactions respectively:
286
287
  - session->transaction.stmt contains a list of engines
288
  that are participating in the given statement
289
  - session->transaction.all contains a list of engines that
290
  have participated in any of the statement transactions started
291
  within the context of the normal transaction.
292
  Each element of the list contains a pointer to the storage
293
  engine, engine-specific transactional data, and engine-specific
294
  transaction flags.
295
296
  In autocommit mode session->transaction.all is empty.
297
  Instead, data of session->transaction.stmt is
298
  used to commit/rollback the normal transaction.
299
300
  The list of registered engines has a few important properties:
301
  - no engine is registered in the list twice
302
  - engines are present in the list a reverse temporal order --
303
  new participants are always added to the beginning of the list.
304
305
  Transaction life cycle
306
  ----------------------
307
308
  When a new connection is established, session->transaction
309
  members are initialized to an empty state.
310
  If a statement uses any tables, all affected engines
311
  are registered in the statement engine list. In
312
  non-autocommit mode, the same engines are registered in
313
  the normal transaction list.
314
  At the end of the statement, the server issues a commit
315
  or a roll back for all engines in the statement list.
316
  At this point transaction flags of an engine, if any, are
317
  propagated from the statement list to the list of the normal
318
  transaction.
319
  When commit/rollback is finished, the statement list is
320
  cleared. It will be filled in again by the next statement,
321
  and emptied again at the next statement's end.
322
323
  The normal transaction is committed in a similar way
324
  (by going over all engines in session->transaction.all list)
325
  but at different times:
326
  - upon COMMIT SQL statement is issued by the user
327
  - implicitly, by the server, at the beginning of a DDL statement
328
  or SET AUTOCOMMIT={0|1} statement.
329
330
  The normal transaction can be rolled back as well:
331
  - if the user has requested so, by issuing ROLLBACK SQL
332
  statement
333
  - if one of the storage engines requested a rollback
334
  by setting session->transaction_rollback_request. This may
335
  happen in case, e.g., when the transaction in the engine was
336
  chosen a victim of the internal deadlock resolution algorithm
337
  and rolled back internally. When such a situation happens, there
338
  is little the server can do and the only option is to rollback
339
  transactions in all other participating engines.  In this case
340
  the rollback is accompanied by an error sent to the user.
341
342
  As follows from the use cases above, the normal transaction
343
  is never committed when there is an outstanding statement
344
  transaction. In most cases there is no conflict, since
345
  commits of the normal transaction are issued by a stand-alone
346
  administrative or DDL statement, thus no outstanding statement
347
  transaction of the previous statement exists. Besides,
348
  all statements that manipulate with the normal transaction
349
  are prohibited in stored functions and triggers, therefore
350
  no conflicting situation can occur in a sub-statement either.
351
  The remaining rare cases when the server explicitly has
352
  to commit the statement transaction prior to committing the normal
353
  one cover error-handling scenarios (see for example
354
  ?).
355
356
  When committing a statement or a normal transaction, the server
357
  either uses the two-phase commit protocol, or issues a commit
358
  in each engine independently. The two-phase commit protocol
359
  is used only if:
360
  - all participating engines support two-phase commit (provide
361
    plugin::StorageEngine::prepare PSEA API call) and
362
  - transactions in at least two engines modify data (i.e. are
363
  not read-only).
364
365
  Note that the two phase commit is used for
366
  statement transactions, even though they are not durable anyway.
367
  This is done to ensure logical consistency of data in a multiple-
368
  engine transaction.
369
  For example, imagine that some day MySQL supports unique
370
  constraint checks deferred till the end of statement. In such
371
  case a commit in one of the engines may yield ER_DUP_KEY,
372
  and MySQL should be able to gracefully abort statement
373
  transactions of other participants.
374
375
  After the normal transaction has been committed,
376
  session->transaction.all list is cleared.
377
378
  When a connection is closed, the current normal transaction, if
379
  any, is rolled back.
380
381
  Roles and responsibilities
382
  --------------------------
383
384
  The server has no way to know that an engine participates in
385
  the statement and a transaction has been started
386
  in it unless the engine says so. Thus, in order to be
387
  a part of a transaction, the engine must "register" itself.
388
  This is done by invoking trans_register_ha() server call.
389
  Normally the engine registers itself whenever Cursor::external_lock()
390
  is called. trans_register_ha() can be invoked many times: if
391
  an engine is already registered, the call does nothing.
392
  In case autocommit is not set, the engine must register itself
393
  twice -- both in the statement list and in the normal transaction
394
  list.
395
  In which list to register is a parameter of trans_register_ha().
396
397
  Note, that although the registration interface in itself is
398
  fairly clear, the current usage practice often leads to undesired
399
  effects. E.g. since a call to trans_register_ha() in most engines
400
  is embedded into implementation of Cursor::external_lock(), some
401
  DDL statements start a transaction (at least from the server
402
  point of view) even though they are not expected to. E.g.
403
  CREATE TABLE does not start a transaction, since
404
  Cursor::external_lock() is never called during CREATE TABLE. But
405
  CREATE TABLE ... SELECT does, since Cursor::external_lock() is
406
  called for the table that is being selected from. This has no
407
  practical effects currently, but must be kept in mind
408
  nevertheless.
409
410
  Once an engine is registered, the server will do the rest
411
  of the work.
412
413
  During statement execution, whenever any of data-modifying
414
  PSEA API methods is used, e.g. Cursor::write_row() or
415
  Cursor::update_row(), the read-write flag is raised in the
416
  statement transaction for the involved engine.
417
  Currently All PSEA calls are "traced", and the data can not be
418
  changed in a way other than issuing a PSEA call. Important:
419
  unless this invariant is preserved the server will not know that
420
  a transaction in a given engine is read-write and will not
421
  involve the two-phase commit protocol!
422
423
  At the end of a statement, server call
424
  ha_autocommit_or_rollback() is invoked. This call in turn
425
  invokes plugin::StorageEngine::prepare() for every involved engine.
426
  Prepare is followed by a call to plugin::StorageEngine::commit_one_phase()
427
  If a one-phase commit will suffice, plugin::StorageEngine::prepare() is not
428
  invoked and the server only calls plugin::StorageEngine::commit_one_phase().
429
  At statement commit, the statement-related read-write engine
430
  flag is propagated to the corresponding flag in the normal
431
  transaction.  When the commit is complete, the list of registered
432
  engines is cleared.
433
434
  Rollback is handled in a similar fashion.
435
436
  Additional notes on DDL and the normal transaction.
437
  ---------------------------------------------------
438
439
  DDLs and operations with non-transactional engines
440
  do not "register" in session->transaction lists, and thus do not
441
  modify the transaction state. Besides, each DDL in
442
  MySQL is prefixed with an implicit normal transaction commit
443
  (a call to Session::endActiveTransaction()), and thus leaves nothing
444
  to modify.
445
  However, as it has been pointed out with CREATE TABLE .. SELECT,
446
  some DDL statements can start a *new* transaction.
447
448
  Behaviour of the server in this case is currently badly
449
  defined.
450
  DDL statements use a form of "semantic" logging
451
  to maintain atomicity: if CREATE TABLE .. SELECT failed,
452
  the newly created table is deleted.
453
  In addition, some DDL statements issue interim transaction
454
  commits: e.g. ALTER Table issues a commit after data is copied
455
  from the original table to the internal temporary table. Other
456
  statements, e.g. CREATE TABLE ... SELECT do not always commit
457
  after itself.
458
  And finally there is a group of DDL statements such as
459
  RENAME/DROP Table that doesn't start a new transaction
460
  and doesn't commit.
461
462
  This diversity makes it hard to say what will happen if
463
  by chance a stored function is invoked during a DDL --
464
  whether any modifications it makes will be committed or not
465
  is not clear. Fortunately, SQL grammar of few DDLs allows
466
  invocation of a stored function.
467
468
  A consistent behaviour is perhaps to always commit the normal
469
  transaction after all DDLs, just like the statement transaction
470
  is always committed at the end of all statements.
471
*/
472
473
/**
474
  Register a storage engine for a transaction.
475
476
  Every storage engine MUST call this function when it starts
477
  a transaction or a statement (that is it must be called both for the
478
  "beginning of transaction" and "beginning of statement").
479
  Only storage engines registered for the transaction/statement
480
  will know when to commit/rollback it.
481
482
  @note
483
    trans_register_ha is idempotent - storage engine may register many
484
    times per transaction.
485
486
*/
487
void trans_register_ha(Session *session, bool all, plugin::StorageEngine *engine)
488
{
489
  Session_TRANS *trans;
490
  Ha_trx_info *ha_info;
491
492
  if (all)
493
  {
494
    trans= &session->transaction.all;
495
    session->server_status|= SERVER_STATUS_IN_TRANS;
496
  }
497
  else
498
    trans= &session->transaction.stmt;
499
1240.9.6 by Monty Taylor
Removed some casts- also removed a few c-interface functions and made them actual methods on session. Also made the ha_data private. (fancy that)
500
  ha_info= session->getEngineInfo(engine, all ? 1 : 0);
1208.3.3 by brian
Adding missingfile.
501
502
  if (ha_info->is_started())
503
    return; /* already registered, return */
504
505
  ha_info->register_ha(trans, engine);
506
507
  trans->no_2pc|= not engine->has_2pc();
508
  if (session->transaction.xid_state.xid.is_null())
509
    session->transaction.xid_state.xid.set(session->query_id);
510
}
511
512
/**
513
  Check if we can skip the two-phase commit.
514
515
  A helper function to evaluate if two-phase commit is mandatory.
516
  As a side effect, propagates the read-only/read-write flags
517
  of the statement transaction to its enclosing normal transaction.
518
519
  @retval true   we must run a two-phase commit. Returned
520
                 if we have at least two engines with read-write changes.
521
  @retval false  Don't need two-phase commit. Even if we have two
522
                 transactional engines, we can run two independent
523
                 commits if changes in one of the engines are read-only.
524
*/
525
526
static
527
bool
528
ha_check_and_coalesce_trx_read_only(Session *session, Ha_trx_info *ha_list,
529
                                    bool all)
530
{
531
  /* The number of storage engines that have actual changes. */
532
  unsigned rw_ha_count= 0;
533
  Ha_trx_info *ha_info;
534
535
  for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
536
  {
537
    if (ha_info->is_trx_read_write())
538
      ++rw_ha_count;
539
540
    if (! all)
541
    {
1240.9.6 by Monty Taylor
Removed some casts- also removed a few c-interface functions and made them actual methods on session. Also made the ha_data private. (fancy that)
542
      Ha_trx_info *ha_info_all= session->getEngineInfo(ha_info->engine(), 1);
1208.3.3 by brian
Adding missingfile.
543
      assert(ha_info != ha_info_all);
544
      /*
545
        Merge read-only/read-write information about statement
546
        transaction to its enclosing normal transaction. Do this
547
        only if in a real transaction -- that is, if we know
548
        that ha_info_all is registered in session->transaction.all.
549
        Since otherwise we only clutter the normal transaction flags.
550
      */
551
      if (ha_info_all->is_started()) /* false if autocommit. */
552
        ha_info_all->coalesce_trx_with(ha_info);
553
    }
554
    else if (rw_ha_count > 1)
555
    {
556
      /*
557
        It is a normal transaction, so we don't need to merge read/write
558
        information up, and the need for two-phase commit has been
559
        already established. Break the loop prematurely.
560
      */
561
      break;
562
    }
563
  }
564
  return rw_ha_count > 1;
565
}
566
567
568
/**
569
  @retval
570
    0   ok
571
  @retval
572
    1   transaction was rolled back
573
  @retval
574
    2   error during commit, data may be inconsistent
575
576
  @todo
577
    Since we don't support nested statement transactions in 5.0,
578
    we can't commit or rollback stmt transactions while we are inside
579
    stored functions or triggers. So we simply do nothing now.
580
    TODO: This should be fixed in later ( >= 5.1) releases.
581
*/
582
int ha_commit_trans(Session *session, bool all)
583
{
584
  int error= 0, cookie= 0;
585
  /*
586
    'all' means that this is either an explicit commit issued by
587
    user, or an implicit commit issued by a DDL.
588
  */
589
  Session_TRANS *trans= all ? &session->transaction.all : &session->transaction.stmt;
590
  bool is_real_trans= all || session->transaction.all.ha_list == 0;
591
  Ha_trx_info *ha_info= trans->ha_list;
592
593
  /*
594
    We must not commit the normal transaction if a statement
595
    transaction is pending. Otherwise statement transaction
596
    flags will not get propagated to its normal transaction's
597
    counterpart.
598
  */
599
  assert(session->transaction.stmt.ha_list == NULL ||
600
              trans == &session->transaction.stmt);
601
602
  if (ha_info)
603
  {
604
    bool must_2pc;
605
606
    if (is_real_trans && wait_if_global_read_lock(session, 0, 0))
607
    {
608
      ha_rollback_trans(session, all);
609
      return 1;
610
    }
611
612
    must_2pc= ha_check_and_coalesce_trx_read_only(session, ha_info, all);
613
614
    if (!trans->no_2pc && must_2pc)
615
    {
616
      for (; ha_info && !error; ha_info= ha_info->next())
617
      {
618
        int err;
619
        plugin::StorageEngine *engine= ha_info->engine();
620
        /*
621
          Do not call two-phase commit if this particular
622
          transaction is read-only. This allows for simpler
623
          implementation in engines that are always read-only.
624
        */
625
        if (! ha_info->is_trx_read_write())
626
          continue;
627
        /*
628
          Sic: we know that prepare() is not NULL since otherwise
629
          trans->no_2pc would have been set.
630
        */
631
        if ((err= engine->prepare(session, all)))
632
        {
633
          my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
634
          error= 1;
635
        }
636
        status_var_increment(session->status_var.ha_prepare_count);
637
      }
638
      if (error)
639
      {
640
        ha_rollback_trans(session, all);
641
        error= 1;
642
        goto end;
643
      }
644
    }
645
    error=ha_commit_one_phase(session, all) ? (cookie ? 2 : 1) : 0;
646
end:
647
    if (is_real_trans)
648
      start_waiting_global_read_lock(session);
649
  }
650
  return error;
651
}
652
653
/**
654
  @note
655
  This function does not care about global read lock. A caller should.
656
*/
657
int ha_commit_one_phase(Session *session, bool all)
658
{
659
  int error=0;
660
  Session_TRANS *trans=all ? &session->transaction.all : &session->transaction.stmt;
661
  bool is_real_trans=all || session->transaction.all.ha_list == 0;
662
  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
663
  if (ha_info)
664
  {
665
    for (; ha_info; ha_info= ha_info_next)
666
    {
667
      int err;
668
      plugin::StorageEngine *engine= ha_info->engine();
669
      if ((err= engine->commit(session, all)))
670
      {
671
        my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);
672
        error=1;
673
      }
674
      status_var_increment(session->status_var.ha_commit_count);
675
      ha_info_next= ha_info->next();
676
      ha_info->reset(); /* keep it conveniently zero-filled */
677
    }
678
    trans->ha_list= 0;
679
    trans->no_2pc=0;
680
    if (is_real_trans)
681
      session->transaction.xid_state.xid.null();
682
    if (all)
683
    {
684
      session->variables.tx_isolation=session->session_tx_isolation;
685
      session->transaction.cleanup();
686
    }
687
  }
688
  if (error == 0)
689
  {
690
    if (is_real_trans)
691
    {
692
      /* 
693
        * We commit the normal transaction by finalizing the transaction message
694
        * and propogating the message to all registered replicators.
695
        */
696
      ReplicationServices &replication_services= ReplicationServices::singleton();
1143.4.7 by Jay Pipes
Removes unused ReplicationServices::startNormalTransaction() and switches from while to for loop in evaluateActivePlugins().
697
      replication_services.commitTransaction(session);
1208.3.3 by brian
Adding missingfile.
698
    }
699
  }
700
  return error;
701
}
702
703
704
int ha_rollback_trans(Session *session, bool all)
705
{
706
  int error=0;
707
  Session_TRANS *trans=all ? &session->transaction.all : &session->transaction.stmt;
708
  Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;
709
  bool is_real_trans=all || session->transaction.all.ha_list == 0;
710
711
  /*
712
    We must not rollback the normal transaction if a statement
713
    transaction is pending.
714
  */
715
  assert(session->transaction.stmt.ha_list == NULL ||
716
              trans == &session->transaction.stmt);
717
718
  if (ha_info)
719
  {
720
    for (; ha_info; ha_info= ha_info_next)
721
    {
722
      int err;
723
      plugin::StorageEngine *engine= ha_info->engine();
724
      if ((err= engine->rollback(session, all)))
725
      { // cannot happen
726
        my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
727
        error=1;
728
      }
729
      status_var_increment(session->status_var.ha_rollback_count);
730
      ha_info_next= ha_info->next();
731
      ha_info->reset(); /* keep it conveniently zero-filled */
732
    }
733
    trans->ha_list= 0;
734
    trans->no_2pc=0;
1143.4.8 by Jay Pipes
This commit fixes issues raised by Joe Daly in Bug#489823.
735
    
736
    /* 
737
     * We need to signal the ROLLBACK to ReplicationServices here
738
     * BEFORE we set the transaction ID to NULL.  This is because
739
     * if a bulk segment was sent to replicators, we need to send
740
     * a rollback statement with the corresponding transaction ID
741
     * to rollback.
742
     */
743
    ReplicationServices &replication_services= ReplicationServices::singleton();
744
    replication_services.rollbackTransaction(session);
745
1208.3.3 by brian
Adding missingfile.
746
    if (is_real_trans)
747
      session->transaction.xid_state.xid.null();
748
    if (all)
749
    {
750
      session->variables.tx_isolation=session->session_tx_isolation;
751
      session->transaction.cleanup();
752
    }
753
  }
754
  if (all)
755
    session->transaction_rollback_request= false;
756
757
  /*
758
    If a non-transactional table was updated, warn; don't warn if this is a
759
    slave thread (because when a slave thread executes a ROLLBACK, it has
760
    been read from the binary log, so it's 100% sure and normal to produce
761
    error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the
762
    slave SQL thread, it would not stop the thread but just be printed in
763
    the error log; but we don't want users to wonder why they have this
764
    message in the error log, so we don't send it.
765
  */
766
  if (is_real_trans && session->transaction.all.modified_non_trans_table && session->killed != Session::KILL_CONNECTION)
767
    push_warning(session, DRIZZLE_ERROR::WARN_LEVEL_WARN,
768
                 ER_WARNING_NOT_COMPLETE_ROLLBACK,
769
                 ER(ER_WARNING_NOT_COMPLETE_ROLLBACK));
770
  return error;
771
}
772
773
/**
774
  This is used to commit or rollback a single statement depending on
775
  the value of error.
776
777
  @note
778
    Note that if the autocommit is on, then the following call inside
779
    InnoDB will commit or rollback the whole transaction (= the statement). The
780
    autocommit mechanism built into InnoDB is based on counting locks, but if
781
    the user has used LOCK TABLES then that mechanism does not know to do the
782
    commit.
783
*/
784
int ha_autocommit_or_rollback(Session *session, int error)
785
{
786
  if (session->transaction.stmt.ha_list)
787
  {
788
    if (!error)
789
    {
790
      if (ha_commit_trans(session, 0))
791
        error= 1;
792
    }
793
    else
794
    {
795
      (void) ha_rollback_trans(session, 0);
796
      if (session->transaction_rollback_request)
797
        (void) ha_rollback(session);
798
    }
799
800
    session->variables.tx_isolation=session->session_tx_isolation;
801
  }
802
803
  return error;
804
}
805
806
/**
807
  return the list of XID's to a client, the same way SHOW commands do.
808
809
  @note
810
    I didn't find in XA specs that an RM cannot return the same XID twice,
811
    so mysql_xa_recover does not filter XID's to ensure uniqueness.
812
    It can be easily fixed later, if necessary.
813
*/
814
bool mysql_xa_recover(Session *session)
815
{
816
  List<Item> field_list;
817
  int i= 0;
818
  XID_STATE *xs;
819
820
  field_list.push_back(new Item_int("formatID", 0, MY_INT32_NUM_DECIMAL_DIGITS));
821
  field_list.push_back(new Item_int("gtrid_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
822
  field_list.push_back(new Item_int("bqual_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));
823
  field_list.push_back(new Item_empty_string("data",XIDDATASIZE));
824
825
  if (session->client->sendFields(&field_list))
826
    return 1;
827
828
  pthread_mutex_lock(&LOCK_xid_cache);
829
  while ((xs= (XID_STATE*)hash_element(&xid_cache, i++)))
830
  {
831
    if (xs->xa_state==XA_PREPARED)
832
    {
833
      session->client->store((int64_t)xs->xid.formatID);
834
      session->client->store((int64_t)xs->xid.gtrid_length);
835
      session->client->store((int64_t)xs->xid.bqual_length);
836
      session->client->store(xs->xid.data,
837
                             xs->xid.gtrid_length+xs->xid.bqual_length);
838
      if (session->client->flush())
839
      {
840
        pthread_mutex_unlock(&LOCK_xid_cache);
841
        return 1;
842
      }
843
    }
844
  }
845
846
  pthread_mutex_unlock(&LOCK_xid_cache);
847
  session->my_eof();
848
  return 0;
849
}
850
851
852
int ha_rollback_to_savepoint(Session *session, SAVEPOINT *sv)
853
{
854
  int error= 0;
855
  Session_TRANS *trans= &session->transaction.all;
856
  Ha_trx_info *ha_info, *ha_info_next;
857
858
  trans->no_2pc=0;
859
  /*
860
    rolling back to savepoint in all storage engines that were part of the
861
    transaction when the savepoint was set
862
  */
863
  for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())
864
  {
865
    int err;
866
    plugin::StorageEngine *engine= ha_info->engine();
867
    assert(engine);
868
    if ((err= engine->savepoint_rollback(session,
869
                                         (void *)(sv+1))))
870
    { // cannot happen
871
      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
872
      error= 1;
873
    }
874
    status_var_increment(session->status_var.ha_savepoint_rollback_count);
875
    trans->no_2pc|= not engine->has_2pc();
876
  }
877
  /*
878
    rolling back the transaction in all storage engines that were not part of
879
    the transaction when the savepoint was set
880
  */
881
  for (ha_info= trans->ha_list; ha_info != sv->ha_list;
882
       ha_info= ha_info_next)
883
  {
884
    int err;
885
    plugin::StorageEngine *engine= ha_info->engine();
886
    if ((err= engine->rollback(session, !(0))))
887
    { // cannot happen
888
      my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);
889
      error= 1;
890
    }
891
    status_var_increment(session->status_var.ha_rollback_count);
892
    ha_info_next= ha_info->next();
893
    ha_info->reset(); /* keep it conveniently zero-filled */
894
  }
895
  trans->ha_list= sv->ha_list;
896
  return error;
897
}
898
899
/**
900
  @note
901
  according to the sql standard (ISO/IEC 9075-2:2003)
902
  section "4.33.4 SQL-statements and transaction states",
903
  SAVEPOINT is *not* transaction-initiating SQL-statement
904
*/
905
int ha_savepoint(Session *session, SAVEPOINT *sv)
906
{
907
  int error= 0;
908
  Session_TRANS *trans= &session->transaction.all;
909
  Ha_trx_info *ha_info= trans->ha_list;
910
  for (; ha_info; ha_info= ha_info->next())
911
  {
912
    int err;
913
    plugin::StorageEngine *engine= ha_info->engine();
914
    assert(engine);
915
#ifdef NOT_IMPLEMENTED /*- TODO (examine this againt the original code base) */
916
    if (! engine->savepoint_set)
917
    {
918
      my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");
919
      error= 1;
920
      break;
921
    } 
922
#endif
923
    if ((err= engine->savepoint_set(session, (void *)(sv+1))))
924
    { // cannot happen
925
      my_error(ER_GET_ERRNO, MYF(0), err);
926
      error= 1;
927
    }
928
    status_var_increment(session->status_var.ha_savepoint_count);
929
  }
930
  /*
931
    Remember the list of registered storage engines. All new
932
    engines are prepended to the beginning of the list.
933
  */
934
  sv->ha_list= trans->ha_list;
935
  return error;
936
}
937
938
int ha_release_savepoint(Session *session, SAVEPOINT *sv)
939
{
940
  int error= 0;
941
  Ha_trx_info *ha_info= sv->ha_list;
942
943
  for (; ha_info; ha_info= ha_info->next())
944
  {
945
    int err;
946
    plugin::StorageEngine *engine= ha_info->engine();
947
    /* Savepoint life time is enclosed into transaction life time. */
948
    assert(engine);
949
    if ((err= engine->savepoint_release(session,
950
                                        (void *)(sv+1))))
951
    { // cannot happen
952
      my_error(ER_GET_ERRNO, MYF(0), err);
953
      error= 1;
954
    }
955
  }
956
  return error;
957
}