~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to drizzled/ha_commands.cc

Committer: Monty Taylor
Date: 2009-11-10 20:56:43 UTC
mto: (1220.1.1 staging) (1192.7.1 pandora-build) (1238.2.1 build) (1240.1.5 build) (1228.1.6 new-plugin-dynamic-load) (1237.10.1 new-plugin-dynamic-load)
mto: This revision was merged to the branch mainline in revision 1221.
Revision ID: mordred@inaugust.com-20091110205643-dncfuoga5ayth5e6

Fixed bashism.

files added:
config/pandora-plugin.am

plugin/information_engine/information_share.cc

plugin/information_engine/information_share.h

files removed:
drizzled/ha_commands.cc

plugin/information_engine/information_cursor.cc

plugin/information_engine/information_cursor.h

plugin/mysql_protocol

plugin/mysql_protocol/errmsg.cc

plugin/mysql_protocol/errmsg.h

plugin/mysql_protocol/net_serv.cc

plugin/mysql_protocol/net_serv.h

plugin/mysql_protocol/oldlibdrizzle.cc

plugin/mysql_protocol/oldlibdrizzle.h

plugin/mysql_protocol/options.h

plugin/mysql_protocol/pack.cc

plugin/mysql_protocol/pack.h

plugin/mysql_protocol/plugin.ini

plugin/mysql_protocol/vio.cc

plugin/mysql_protocol/vio.h

plugin/mysql_protocol/viosocket.cc

tests/suite/transaction_log/r/blob.result

tests/suite/transaction_log/r/create_select.result

tests/suite/transaction_log/r/rand.result

tests/suite/transaction_log/t/blob-master.opt

tests/suite/transaction_log/t/blob.inc

tests/suite/transaction_log/t/blob.test

tests/suite/transaction_log/t/create_select-master.opt

tests/suite/transaction_log/t/create_select.inc

tests/suite/transaction_log/t/create_select.test

tests/suite/transaction_log/t/rand-master.opt

tests/suite/transaction_log/t/rand.test

files renamed:
plugin/drizzle_protocol/ => plugin/oldlibdrizzle/

files modified:
.bzrignore

Makefile.am

config/pre_hook.sh

config/register_plugins.py

drizzled/Makefile.am

drizzled/common.h

drizzled/create_field.cc

drizzled/cursor.cc

drizzled/cursor.h

drizzled/drizzled.cc

drizzled/enum.h

drizzled/field.cc

drizzled/field/decimal.cc

drizzled/field/decimal.h

drizzled/field_conv.cc

drizzled/filesort.cc

drizzled/function/func.cc

drizzled/function/math/decimal_typecast.h

drizzled/item.cc

drizzled/item/decimal.h

drizzled/item/subselect.cc

drizzled/item/sum.cc

drizzled/item/sum.h

drizzled/item/type_holder.cc

drizzled/join.cc

drizzled/join_cache.cc

drizzled/key.cc

drizzled/lock.cc

drizzled/message/statement_transform.cc

drizzled/message/table.proto

drizzled/message/transaction_reader.cc

drizzled/message/transaction_writer.cc

drizzled/opt_range.cc

drizzled/opt_range.h

drizzled/opt_sum.cc

drizzled/plugin/loader.cc

drizzled/plugin/storage_engine.cc

drizzled/plugin/storage_engine.h

drizzled/records.cc

drizzled/replication_services.cc

drizzled/server_includes.h

drizzled/session.cc

drizzled/session.h

drizzled/set_var.cc

drizzled/show.cc

drizzled/sql_base.cc

drizzled/sql_delete.cc

drizzled/sql_derived.cc

drizzled/sql_insert.cc

drizzled/sql_load.cc

drizzled/sql_select.cc

drizzled/sql_table.cc

drizzled/sql_union.cc

drizzled/sql_update.cc

drizzled/sql_yacc.yy

drizzled/statement/alter_table.cc

drizzled/statement/truncate.cc

drizzled/structs.h

drizzled/table.cc

drizzled/table.h

drizzled/table_proto_write.cc

extra/run_cpplint.sh

m4/pandora_plugins.m4

m4/pandora_with_gettext.m4

m4/po.m4

plugin/archive/ha_archive.cc

plugin/archive/ha_archive.h

plugin/blackhole/ha_blackhole.cc

plugin/blackhole/ha_blackhole.h

plugin/csv/ha_tina.cc

plugin/csv/ha_tina.h

plugin/heap/ha_heap.cc

plugin/heap/ha_heap.h

plugin/info_schema/info_schema_methods.cc

plugin/information_engine/information_engine.cc

plugin/information_engine/information_engine.h

plugin/information_engine/plugin.ini

plugin/innobase/handler/ha_innodb.cc

plugin/innobase/handler/ha_innodb.h

plugin/innobase/lock/lock0lock.c

plugin/innobase/row/row0mysql.c

plugin/logging_gearman/logging_gearman.cc

plugin/logging_query/logging_query.cc

plugin/myisam/ha_myisam.cc

plugin/myisam/ha_myisam.h

plugin/oldlibdrizzle/errmsg.h

plugin/oldlibdrizzle/net_serv.h

plugin/oldlibdrizzle/oldlibdrizzle.cc

plugin/oldlibdrizzle/oldlibdrizzle.h

plugin/oldlibdrizzle/options.h

plugin/oldlibdrizzle/pack.h

plugin/oldlibdrizzle/plugin.ini

plugin/oldlibdrizzle/vio.h

support-files/smf/drizzle.in

tests/Makefile.am

tests/include/mix2.inc

tests/lib/mtr_cases.pl

tests/r/alias.result

tests/r/alter_table.result

tests/r/binary.result

tests/r/cast.result

tests/r/csv.result

tests/r/ctype_utf8.result

tests/r/delete.result

tests/r/endspace.result

tests/r/func_test.result

tests/r/group_by.result

tests/r/innodb.result

tests/r/innodb_autoinc_lock_mode_zero.result

tests/r/key.result

tests/r/mix2_myisam.result

tests/r/select.result

tests/r/subselect2.result

tests/r/subselect_innodb.result

tests/r/type_enum.result

tests/r/variables.result

tests/suite/transaction_log/t/rand.inc

tests/t/alias.test

tests/t/alter_table.test

tests/t/binary.test

tests/t/cast.test

tests/t/connect.test

tests/t/ctype_utf8.test

tests/t/delete.test

tests/t/endspace.test

tests/t/func_test.test

tests/t/group_by.test

tests/t/information_schema.test

tests/t/innodb.test

tests/t/innodb_autoinc_lock_mode_zero.test

tests/t/key.test

tests/t/select.test

tests/t/show_check.test

tests/t/subselect2.test

tests/t/subselect_innodb.test

tests/t/type_enum.test

tests/t/variables.test

tests/test-run.pl

Show diffs side-by-side

added added

removed removed

drizzled/ha_commands.cc

/* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-

* vim:expandtab:shiftwidth=2:tabstop=2:smarttab:

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* the Free Software Foundation; version 2 of the License.

* This program is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with this program; if not, write to the Free Software

* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA

/**

@file Cursor.cc

Handler-calling-functions

#include "drizzled/server_includes.h"

#include "mysys/hash.h"

#include "drizzled/error.h"

#include "drizzled/gettext.h"

#include "drizzled/probes.h"

#include "drizzled/sql_parse.h"

#include "drizzled/cost_vect.h"

#include "drizzled/session.h"

#include "drizzled/sql_base.h"

#include "drizzled/replication_services.h"

#include "drizzled/lock.h"

#include "drizzled/item/int.h"

#include "drizzled/item/empty_string.h"

#include "drizzled/unireg.h" // for mysql_frm_type

#include "drizzled/field/timestamp.h"

#include "drizzled/message/table.pb.h"

#include "drizzled/plugin/client.h"

using namespace std;

using namespace drizzled;

KEY_CREATE_INFO default_key_create_info= { HA_KEY_ALG_UNDEF, 0, {NULL,0} };

/* number of entries in storage_engines[] */

uint32_t total_ha= 0;

/* number of storage engines (from storage_engines[]) that support 2pc */

uint32_t total_ha_2pc= 0;

/* size of savepoint storage area (see ha_init) */

uint32_t savepoint_alloc_size= 0;

const char *ha_row_type[] = {

"", "FIXED", "DYNAMIC", "COMPRESSED", "REDUNDANT", "COMPACT", "PAGE", "?","?","?"

};

const char *tx_isolation_names[] =

{ "READ-UNCOMMITTED", "READ-COMMITTED", "REPEATABLE-READ", "SERIALIZABLE",

NULL};

TYPELIB tx_isolation_typelib= {array_elements(tx_isolation_names)-1,"",

tx_isolation_names, NULL};

/**

@retval

0 OK

@retval

!=0 Error

int ha_init_errors(void)

{

#define SETMSG(nr, msg) errmsgs[(nr) - HA_ERR_FIRST]= (msg)

const char **errmsgs;

/* Allocate a pointer array for the error message strings. */

/* Zerofill it to avoid uninitialized gaps. */

if (! (errmsgs= (const char**) malloc(HA_ERR_ERRORS * sizeof(char*))))

return 1;

memset(errmsgs, 0, HA_ERR_ERRORS * sizeof(char *));

/* Set the dedicated error messages. */

SETMSG(HA_ERR_KEY_NOT_FOUND, ER(ER_KEY_NOT_FOUND));

SETMSG(HA_ERR_FOUND_DUPP_KEY, ER(ER_DUP_KEY));

SETMSG(HA_ERR_RECORD_CHANGED, "Update wich is recoverable");

SETMSG(HA_ERR_WRONG_INDEX, "Wrong index given to function");

SETMSG(HA_ERR_CRASHED, ER(ER_NOT_KEYFILE));

SETMSG(HA_ERR_WRONG_IN_RECORD, ER(ER_CRASHED_ON_USAGE));

SETMSG(HA_ERR_OUT_OF_MEM, "Table Cursor out of memory");

SETMSG(HA_ERR_NOT_A_TABLE, "Incorrect file format '%.64s'");

SETMSG(HA_ERR_WRONG_COMMAND, "Command not supported");

SETMSG(HA_ERR_OLD_FILE, ER(ER_OLD_KEYFILE));

SETMSG(HA_ERR_NO_ACTIVE_RECORD, "No record read in update");

100

SETMSG(HA_ERR_RECORD_DELETED, "Intern record deleted");

101

SETMSG(HA_ERR_RECORD_FILE_FULL, ER(ER_RECORD_FILE_FULL));

102

SETMSG(HA_ERR_INDEX_FILE_FULL, "No more room in index file '%.64s'");

103

SETMSG(HA_ERR_END_OF_FILE, "End in next/prev/first/last");

104

SETMSG(HA_ERR_UNSUPPORTED, ER(ER_ILLEGAL_HA));

105

SETMSG(HA_ERR_TO_BIG_ROW, "Too big row");

106

SETMSG(HA_WRONG_CREATE_OPTION, "Wrong create option");

107

SETMSG(HA_ERR_FOUND_DUPP_UNIQUE, ER(ER_DUP_UNIQUE));

108

SETMSG(HA_ERR_UNKNOWN_CHARSET, "Can't open charset");

109

SETMSG(HA_ERR_WRONG_MRG_TABLE_DEF, ER(ER_WRONG_MRG_TABLE));

110

SETMSG(HA_ERR_CRASHED_ON_REPAIR, ER(ER_CRASHED_ON_REPAIR));

111

SETMSG(HA_ERR_CRASHED_ON_USAGE, ER(ER_CRASHED_ON_USAGE));

112

SETMSG(HA_ERR_LOCK_WAIT_TIMEOUT, ER(ER_LOCK_WAIT_TIMEOUT));

113

SETMSG(HA_ERR_LOCK_TABLE_FULL, ER(ER_LOCK_TABLE_FULL));

114

SETMSG(HA_ERR_READ_ONLY_TRANSACTION, ER(ER_READ_ONLY_TRANSACTION));

115

SETMSG(HA_ERR_LOCK_DEADLOCK, ER(ER_LOCK_DEADLOCK));

116

SETMSG(HA_ERR_CANNOT_ADD_FOREIGN, ER(ER_CANNOT_ADD_FOREIGN));

117

SETMSG(HA_ERR_NO_REFERENCED_ROW, ER(ER_NO_REFERENCED_ROW_2));

118

SETMSG(HA_ERR_ROW_IS_REFERENCED, ER(ER_ROW_IS_REFERENCED_2));

119

SETMSG(HA_ERR_NO_SAVEPOINT, "No savepoint with that name");

120

SETMSG(HA_ERR_NON_UNIQUE_BLOCK_SIZE, "Non unique key block size");

121

SETMSG(HA_ERR_NO_SUCH_TABLE, "No such table: '%.64s'");

122

SETMSG(HA_ERR_TABLE_EXIST, ER(ER_TABLE_EXISTS_ERROR));

123

SETMSG(HA_ERR_NO_CONNECTION, "Could not connect to storage engine");

124

SETMSG(HA_ERR_TABLE_DEF_CHANGED, ER(ER_TABLE_DEF_CHANGED));

125

SETMSG(HA_ERR_FOREIGN_DUPLICATE_KEY, "FK constraint would lead to duplicate key");

126

SETMSG(HA_ERR_TABLE_NEEDS_UPGRADE, ER(ER_TABLE_NEEDS_UPGRADE));

127

SETMSG(HA_ERR_TABLE_READONLY, ER(ER_OPEN_AS_READONLY));

128

SETMSG(HA_ERR_AUTOINC_READ_FAILED, ER(ER_AUTOINC_READ_FAILED));

129

SETMSG(HA_ERR_AUTOINC_ERANGE, ER(ER_WARN_DATA_OUT_OF_RANGE));

130

131

/* Register the error messages for use with my_error(). */

132

return my_error_register(errmsgs, HA_ERR_FIRST, HA_ERR_LAST);

133

}

134

135

136

/**

137

Unregister Cursor error messages.

138

139

@retval

140

0 OK

141

@retval

142

!=0 Error

143

144

static int ha_finish_errors(void)

145

{

146

const char **errmsgs;

147

148

/* Allocate a pointer array for the error message strings. */

149

if (! (errmsgs= my_error_unregister(HA_ERR_FIRST, HA_ERR_LAST)))

150

return 1;

151

free((unsigned char*) errmsgs);

152

return 0;

153

}

154

155

int ha_init()

156

{

157

int error= 0;

158

159

assert(total_ha < MAX_HA);

160

161

Check if there is a transaction-capable storage engine besides the

162

binary log (which is considered a transaction-capable storage engine in

163

counting total_ha)

164

165

savepoint_alloc_size+= sizeof(SAVEPOINT);

166

return error;

167

}

168

169

int ha_end()

170

{

171

int error= 0;

172

173

174

This should be eventualy based on the graceful shutdown flag.

175

So if flag is equal to HA_PANIC_CLOSE, the deallocate

176

the errors.

177

178

if (ha_finish_errors())

179

error= 1;

180

181

return error;

182

}

183

184

185

186

/* ========================================================================

187

======================= TRANSACTIONS ===================================*/

188

189

/**

190

Transaction handling in the server

191

==================================

192

193

In each client connection, MySQL maintains two transactional

194

states:

195

- a statement transaction,

196

- a standard, also called normal transaction.

197

198

Historical note

199

---------------

200

"Statement transaction" is a non-standard term that comes

201

from the times when MySQL supported BerkeleyDB storage engine.

202

203

First of all, it should be said that in BerkeleyDB auto-commit

204

mode auto-commits operations that are atomic to the storage

205

engine itself, such as a write of a record, and are too

206

high-granular to be atomic from the application perspective

207

(MySQL). One SQL statement could involve many BerkeleyDB

208

auto-committed operations and thus BerkeleyDB auto-commit was of

209

little use to MySQL.

210

211

Secondly, instead of SQL standard savepoints, BerkeleyDB

212

provided the concept of "nested transactions". In a nutshell,

213

transactions could be arbitrarily nested, but when the parent

214

transaction was committed or aborted, all its child (nested)

215

transactions were handled committed or aborted as well.

216

Commit of a nested transaction, in turn, made its changes

217

visible, but not durable: it destroyed the nested transaction,

218

all its changes would become available to the parent and

219

currently active nested transactions of this parent.

220

221

So the mechanism of nested transactions was employed to

222

provide "all or nothing" guarantee of SQL statements

223

required by the standard.

224

A nested transaction would be created at start of each SQL

225

statement, and destroyed (committed or aborted) at statement

226

end. Such nested transaction was internally referred to as

227

a "statement transaction" and gave birth to the term.

228

229

230

231

Since then a statement transaction is started for each statement

232

that accesses transactional tables or uses the binary log. If

233

the statement succeeds, the statement transaction is committed.

234

If the statement fails, the transaction is rolled back. Commits

235

of statement transactions are not durable -- each such

236

transaction is nested in the normal transaction, and if the

237

normal transaction is rolled back, the effects of all enclosed

238

statement transactions are undone as well. Technically,

239

a statement transaction can be viewed as a savepoint which is

240

maintained automatically in order to make effects of one

241

statement atomic.

242

243

The normal transaction is started by the user and is ended

244

usually upon a user request as well. The normal transaction

245

encloses transactions of all statements issued between

246

its beginning and its end.

247

In autocommit mode, the normal transaction is equivalent

248

to the statement transaction.

249

250

Since MySQL supports PSEA (pluggable storage engine

251

architecture), more than one transactional engine can be

252

active at a time. Hence transactions, from the server

253

point of view, are always distributed. In particular,

254

transactional state is maintained independently for each

255

engine. In order to commit a transaction the two phase

256

commit protocol is employed.

257

258

Not all statements are executed in context of a transaction.

259

Administrative and status information statements do not modify

260

engine data, and thus do not start a statement transaction and

261

also have no effect on the normal transaction. Examples of such

262

statements are SHOW STATUS and RESET SLAVE.

263

264

Similarly DDL statements are not transactional,

265

and therefore a transaction is [almost] never started for a DDL

266

statement. The difference between a DDL statement and a purely

267

administrative statement though is that a DDL statement always

268

commits the current transaction before proceeding, if there is

269

any.

270

271

At last, SQL statements that work with non-transactional

272

engines also have no effect on the transaction state of the

273

connection. Even though they are written to the binary log,

274

and the binary log is, overall, transactional, the writes

275

are done in "write-through" mode, directly to the binlog

276

file, followed with a OS cache sync, in other words,

277

bypassing the binlog undo log (translog).

278

They do not commit the current normal transaction.

279

A failure of a statement that uses non-transactional tables

280

would cause a rollback of the statement transaction, but

281

in case there no non-transactional tables are used,

282

no statement transaction is started.

283

284

Data layout

285

-----------

286

287

The server stores its transaction-related data in

288

session->transaction. This structure has two members of type

289

Session_TRANS. These members correspond to the statement and

290

normal transactions respectively:

291

292

- session->transaction.stmt contains a list of engines

293

that are participating in the given statement

294

- session->transaction.all contains a list of engines that

295

have participated in any of the statement transactions started

296

within the context of the normal transaction.

297

Each element of the list contains a pointer to the storage

298

engine, engine-specific transactional data, and engine-specific

299

transaction flags.

300

301

In autocommit mode session->transaction.all is empty.

302

Instead, data of session->transaction.stmt is

303

used to commit/rollback the normal transaction.

304

305

The list of registered engines has a few important properties:

306

- no engine is registered in the list twice

307

- engines are present in the list a reverse temporal order --

308

new participants are always added to the beginning of the list.

309

310

Transaction life cycle

311

----------------------

312

313

When a new connection is established, session->transaction

314

members are initialized to an empty state.

315

If a statement uses any tables, all affected engines

316

are registered in the statement engine list. In

317

non-autocommit mode, the same engines are registered in

318

the normal transaction list.

319

At the end of the statement, the server issues a commit

320

or a roll back for all engines in the statement list.

321

At this point transaction flags of an engine, if any, are

322

propagated from the statement list to the list of the normal

323

transaction.

324

When commit/rollback is finished, the statement list is

325

cleared. It will be filled in again by the next statement,

326

and emptied again at the next statement's end.

327

328

The normal transaction is committed in a similar way

329

(by going over all engines in session->transaction.all list)

330

but at different times:

331

- upon COMMIT SQL statement is issued by the user

332

- implicitly, by the server, at the beginning of a DDL statement

333

or SET AUTOCOMMIT={0|1} statement.

334

335

The normal transaction can be rolled back as well:

336

- if the user has requested so, by issuing ROLLBACK SQL

337

statement

338

- if one of the storage engines requested a rollback

339

by setting session->transaction_rollback_request. This may

340

happen in case, e.g., when the transaction in the engine was

341

chosen a victim of the internal deadlock resolution algorithm

342

and rolled back internally. When such a situation happens, there

343

is little the server can do and the only option is to rollback

344

transactions in all other participating engines. In this case

345

the rollback is accompanied by an error sent to the user.

346

347

As follows from the use cases above, the normal transaction

348

is never committed when there is an outstanding statement

349

transaction. In most cases there is no conflict, since

350

commits of the normal transaction are issued by a stand-alone

351

administrative or DDL statement, thus no outstanding statement

352

transaction of the previous statement exists. Besides,

353

all statements that manipulate with the normal transaction

354

are prohibited in stored functions and triggers, therefore

355

no conflicting situation can occur in a sub-statement either.

356

The remaining rare cases when the server explicitly has

357

to commit the statement transaction prior to committing the normal

358

one cover error-handling scenarios (see for example

359

?).

360

361

When committing a statement or a normal transaction, the server

362

either uses the two-phase commit protocol, or issues a commit

363

in each engine independently. The two-phase commit protocol

364

is used only if:

365

- all participating engines support two-phase commit (provide

366

plugin::StorageEngine::prepare PSEA API call) and

367

- transactions in at least two engines modify data (i.e. are

368

not read-only).

369

370

Note that the two phase commit is used for

371

statement transactions, even though they are not durable anyway.

372

This is done to ensure logical consistency of data in a multiple-

373

engine transaction.

374

For example, imagine that some day MySQL supports unique

375

constraint checks deferred till the end of statement. In such

376

case a commit in one of the engines may yield ER_DUP_KEY,

377

and MySQL should be able to gracefully abort statement

378

transactions of other participants.

379

380

After the normal transaction has been committed,

381

session->transaction.all list is cleared.

382

383

When a connection is closed, the current normal transaction, if

384

any, is rolled back.

385

386

Roles and responsibilities

387

--------------------------

388

389

The server has no way to know that an engine participates in

390

the statement and a transaction has been started

391

in it unless the engine says so. Thus, in order to be

392

a part of a transaction, the engine must "register" itself.

393

This is done by invoking trans_register_ha() server call.

394

Normally the engine registers itself whenever Cursor::external_lock()

395

is called. trans_register_ha() can be invoked many times: if

396

an engine is already registered, the call does nothing.

397

In case autocommit is not set, the engine must register itself

398

twice -- both in the statement list and in the normal transaction

399

list.

400

In which list to register is a parameter of trans_register_ha().

401

402

Note, that although the registration interface in itself is

403

fairly clear, the current usage practice often leads to undesired

404

effects. E.g. since a call to trans_register_ha() in most engines

405

is embedded into implementation of Cursor::external_lock(), some

406

DDL statements start a transaction (at least from the server

407

point of view) even though they are not expected to. E.g.

408

CREATE TABLE does not start a transaction, since

409

Cursor::external_lock() is never called during CREATE TABLE. But

410

CREATE TABLE ... SELECT does, since Cursor::external_lock() is

411

called for the table that is being selected from. This has no

412

practical effects currently, but must be kept in mind

413

nevertheless.

414

415

Once an engine is registered, the server will do the rest

416

of the work.

417

418

During statement execution, whenever any of data-modifying

419

PSEA API methods is used, e.g. Cursor::write_row() or

420

Cursor::update_row(), the read-write flag is raised in the

421

statement transaction for the involved engine.

422

Currently All PSEA calls are "traced", and the data can not be

423

changed in a way other than issuing a PSEA call. Important:

424

unless this invariant is preserved the server will not know that

425

a transaction in a given engine is read-write and will not

426

involve the two-phase commit protocol!

427

428

At the end of a statement, server call

429

ha_autocommit_or_rollback() is invoked. This call in turn

430

invokes plugin::StorageEngine::prepare() for every involved engine.

431

Prepare is followed by a call to plugin::StorageEngine::commit_one_phase()

432

If a one-phase commit will suffice, plugin::StorageEngine::prepare() is not

433

invoked and the server only calls plugin::StorageEngine::commit_one_phase().

434

At statement commit, the statement-related read-write engine

435

flag is propagated to the corresponding flag in the normal

436

transaction. When the commit is complete, the list of registered

437

engines is cleared.

438

439

Rollback is handled in a similar fashion.

440

441

Additional notes on DDL and the normal transaction.

442

---------------------------------------------------

443

444

DDLs and operations with non-transactional engines

445

do not "register" in session->transaction lists, and thus do not

446

modify the transaction state. Besides, each DDL in

447

MySQL is prefixed with an implicit normal transaction commit

448

(a call to Session::endActiveTransaction()), and thus leaves nothing

449

to modify.

450

However, as it has been pointed out with CREATE TABLE .. SELECT,

451

some DDL statements can start a *new* transaction.

452

453

Behaviour of the server in this case is currently badly

454

defined.

455

DDL statements use a form of "semantic" logging

456

to maintain atomicity: if CREATE TABLE .. SELECT failed,

457

the newly created table is deleted.

458

In addition, some DDL statements issue interim transaction

459

commits: e.g. ALTER Table issues a commit after data is copied

460

from the original table to the internal temporary table. Other

461

statements, e.g. CREATE TABLE ... SELECT do not always commit

462

after itself.

463

And finally there is a group of DDL statements such as

464

RENAME/DROP Table that doesn't start a new transaction

465

and doesn't commit.

466

467

This diversity makes it hard to say what will happen if

468

by chance a stored function is invoked during a DDL --

469

whether any modifications it makes will be committed or not

470

is not clear. Fortunately, SQL grammar of few DDLs allows

471

invocation of a stored function.

472

473

A consistent behaviour is perhaps to always commit the normal

474

transaction after all DDLs, just like the statement transaction

475

is always committed at the end of all statements.

476

477

478

/**

479

480

481

Every storage engine MUST call this function when it starts

482

a transaction or a statement (that is it must be called both for the

483

"beginning of transaction" and "beginning of statement").

484

Only storage engines registered for the transaction/statement

485

will know when to commit/rollback it.

486

487

@note

488

trans_register_ha is idempotent - storage engine may register many

489

times per transaction.

490

491

492

void trans_register_ha(Session *session, bool all, plugin::StorageEngine *engine)

493

{

494

Session_TRANS *trans;

495

Ha_trx_info *ha_info;

496

497

if (all)

498

{

499

trans= &session->transaction.all;

500

session->server_status|= SERVER_STATUS_IN_TRANS;

501

}

502

else

503

trans= &session->transaction.stmt;

504

505

ha_info= session->ha_data[engine->getSlot()].ha_info + static_cast<unsigned>(all);

506

507

if (ha_info->is_started())

508

return; /* already registered, return */

509

510

ha_info->register_ha(trans, engine);

511

512

trans->no_2pc|= not engine->has_2pc();

513

if (session->transaction.xid_state.xid.is_null())

514

session->transaction.xid_state.xid.set(session->query_id);

515

}

516

517

/**

518

Check if we can skip the two-phase commit.

519

520

A helper function to evaluate if two-phase commit is mandatory.

521

As a side effect, propagates the read-only/read-write flags

522

of the statement transaction to its enclosing normal transaction.

523

524

@retval true we must run a two-phase commit. Returned

525

if we have at least two engines with read-write changes.

526

@retval false Don't need two-phase commit. Even if we have two

527

transactional engines, we can run two independent

528

commits if changes in one of the engines are read-only.

529

530

531

static

532

bool

533

ha_check_and_coalesce_trx_read_only(Session *session, Ha_trx_info *ha_list,

534

bool all)

535

{

536

/* The number of storage engines that have actual changes. */

537

unsigned rw_ha_count= 0;

538

Ha_trx_info *ha_info;

539

540

for (ha_info= ha_list; ha_info; ha_info= ha_info->next())

541

{

542

if (ha_info->is_trx_read_write())

543

++rw_ha_count;

544

545

if (! all)

546

{

547

Ha_trx_info *ha_info_all= &session->ha_data[ha_info->engine()->getSlot()].ha_info[1];

548

assert(ha_info != ha_info_all);

549

550

Merge read-only/read-write information about statement

551

transaction to its enclosing normal transaction. Do this

552

only if in a real transaction -- that is, if we know

553

that ha_info_all is registered in session->transaction.all.

554

Since otherwise we only clutter the normal transaction flags.

555

556

if (ha_info_all->is_started()) /* false if autocommit. */

557

ha_info_all->coalesce_trx_with(ha_info);

558

}

559

else if (rw_ha_count > 1)

560

{

561

562

It is a normal transaction, so we don't need to merge read/write

563

information up, and the need for two-phase commit has been

564

already established. Break the loop prematurely.

565

566

break;

567

}

568

}

569

return rw_ha_count > 1;

570

}

571

572

573

/**

574

@retval

575

0 ok

576

@retval

577

1 transaction was rolled back

578

@retval

579

2 error during commit, data may be inconsistent

580

581

@todo

582

Since we don't support nested statement transactions in 5.0,

583

we can't commit or rollback stmt transactions while we are inside

584

stored functions or triggers. So we simply do nothing now.

585

TODO: This should be fixed in later ( >= 5.1) releases.

586

587

int ha_commit_trans(Session *session, bool all)

588

{

589

int error= 0, cookie= 0;

590

591

'all' means that this is either an explicit commit issued by

592

user, or an implicit commit issued by a DDL.

593

594

Session_TRANS *trans= all ? &session->transaction.all : &session->transaction.stmt;

595

bool is_real_trans= all || session->transaction.all.ha_list == 0;

596

Ha_trx_info *ha_info= trans->ha_list;

597

598

599

We must not commit the normal transaction if a statement

600

transaction is pending. Otherwise statement transaction

601

flags will not get propagated to its normal transaction's

602

counterpart.

603

604

assert(session->transaction.stmt.ha_list == NULL ||

605

trans == &session->transaction.stmt);

606

607

if (ha_info)

608

{

609

bool must_2pc;

610

611

if (is_real_trans && wait_if_global_read_lock(session, 0, 0))

612

{

613

ha_rollback_trans(session, all);

614

return 1;

615

}

616

617

must_2pc= ha_check_and_coalesce_trx_read_only(session, ha_info, all);

618

619

if (!trans->no_2pc && must_2pc)

620

{

621

for (; ha_info && !error; ha_info= ha_info->next())

622

{

623

int err;

624

plugin::StorageEngine *engine= ha_info->engine();

625

626

Do not call two-phase commit if this particular

627

transaction is read-only. This allows for simpler

628

implementation in engines that are always read-only.

629

630

if (! ha_info->is_trx_read_write())

631

continue;

632

633

Sic: we know that prepare() is not NULL since otherwise

634

trans->no_2pc would have been set.

635

636

if ((err= engine->prepare(session, all)))

637

{

638

my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);

639

error= 1;

640

}

641

status_var_increment(session->status_var.ha_prepare_count);

642

}

643

if (error)

644

{

645

ha_rollback_trans(session, all);

646

error= 1;

647

goto end;

648

}

649

}

650

error=ha_commit_one_phase(session, all) ? (cookie ? 2 : 1) : 0;

651

end:

652

if (is_real_trans)

653

start_waiting_global_read_lock(session);

654

}

655

return error;

656

}

657

658

/**

659

@note

660

This function does not care about global read lock. A caller should.

661

662

int ha_commit_one_phase(Session *session, bool all)

663

{

664

int error=0;

665

Session_TRANS *trans=all ? &session->transaction.all : &session->transaction.stmt;

666

bool is_real_trans=all || session->transaction.all.ha_list == 0;

667

Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;

668

if (ha_info)

669

{

670

for (; ha_info; ha_info= ha_info_next)

671

{

672

int err;

673

plugin::StorageEngine *engine= ha_info->engine();

674

if ((err= engine->commit(session, all)))

675

{

676

my_error(ER_ERROR_DURING_COMMIT, MYF(0), err);

677

error=1;

678

}

679

status_var_increment(session->status_var.ha_commit_count);

680

ha_info_next= ha_info->next();

681

ha_info->reset(); /* keep it conveniently zero-filled */

682

}

683

trans->ha_list= 0;

684

trans->no_2pc=0;

685

if (is_real_trans)

686

session->transaction.xid_state.xid.null();

687

if (all)

688

{

689

session->variables.tx_isolation=session->session_tx_isolation;

690

session->transaction.cleanup();

691

}

692

}

693

if (error == 0)

694

{

695

if (is_real_trans)

696

{

697

698

* We commit the normal transaction by finalizing the transaction message

699

* and propogating the message to all registered replicators.

700

701

ReplicationServices &replication_services= ReplicationServices::singleton();

702

replication_services.commitNormalTransaction(session);

703

}

704

}

705

return error;

706

}

707

708

709

int ha_rollback_trans(Session *session, bool all)

710

{

711

int error=0;

712

Session_TRANS *trans=all ? &session->transaction.all : &session->transaction.stmt;

713

Ha_trx_info *ha_info= trans->ha_list, *ha_info_next;

714

bool is_real_trans=all || session->transaction.all.ha_list == 0;

715

716

717

We must not rollback the normal transaction if a statement

718

transaction is pending.

719

720

assert(session->transaction.stmt.ha_list == NULL ||

721

trans == &session->transaction.stmt);

722

723

if (ha_info)

724

{

725

for (; ha_info; ha_info= ha_info_next)

726

{

727

int err;

728

plugin::StorageEngine *engine= ha_info->engine();

729

if ((err= engine->rollback(session, all)))

730

{ // cannot happen

731

my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);

732

error=1;

733

}

734

status_var_increment(session->status_var.ha_rollback_count);

735

ha_info_next= ha_info->next();

736

ha_info->reset(); /* keep it conveniently zero-filled */

737

}

738

trans->ha_list= 0;

739

trans->no_2pc=0;

740

if (is_real_trans)

741

session->transaction.xid_state.xid.null();

742

if (all)

743

{

744

session->variables.tx_isolation=session->session_tx_isolation;

745

session->transaction.cleanup();

746

}

747

}

748

if (all)

749

session->transaction_rollback_request= false;

750

751

752

If a non-transactional table was updated, warn; don't warn if this is a

753

slave thread (because when a slave thread executes a ROLLBACK, it has

754

been read from the binary log, so it's 100% sure and normal to produce

755

error ER_WARNING_NOT_COMPLETE_ROLLBACK. If we sent the warning to the

756

slave SQL thread, it would not stop the thread but just be printed in

757

the error log; but we don't want users to wonder why they have this

758

message in the error log, so we don't send it.

759

760

if (is_real_trans && session->transaction.all.modified_non_trans_table && session->killed != Session::KILL_CONNECTION)

761

push_warning(session, DRIZZLE_ERROR::WARN_LEVEL_WARN,

762

ER_WARNING_NOT_COMPLETE_ROLLBACK,

763

ER(ER_WARNING_NOT_COMPLETE_ROLLBACK));

764

return error;

765

}

766

767

/**

768

This is used to commit or rollback a single statement depending on

769

the value of error.

770

771

@note

772

Note that if the autocommit is on, then the following call inside

773

InnoDB will commit or rollback the whole transaction (= the statement). The

774

autocommit mechanism built into InnoDB is based on counting locks, but if

775

the user has used LOCK TABLES then that mechanism does not know to do the

776

commit.

777

778

int ha_autocommit_or_rollback(Session *session, int error)

779

{

780

if (session->transaction.stmt.ha_list)

781

{

782

if (!error)

783

{

784

if (ha_commit_trans(session, 0))

785

error= 1;

786

}

787

else

788

{

789

(void) ha_rollback_trans(session, 0);

790

if (session->transaction_rollback_request)

791

(void) ha_rollback(session);

792

}

793

794

session->variables.tx_isolation=session->session_tx_isolation;

795

}

796

797

return error;

798

}

799

800

/**

801

return the list of XID's to a client, the same way SHOW commands do.

802

803

@note

804

I didn't find in XA specs that an RM cannot return the same XID twice,

805

so mysql_xa_recover does not filter XID's to ensure uniqueness.

806

It can be easily fixed later, if necessary.

807

808

bool mysql_xa_recover(Session *session)

809

{

810

List<Item> field_list;

811

int i= 0;

812

XID_STATE *xs;

813

814

field_list.push_back(new Item_int("formatID", 0, MY_INT32_NUM_DECIMAL_DIGITS));

815

field_list.push_back(new Item_int("gtrid_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));

816

field_list.push_back(new Item_int("bqual_length", 0, MY_INT32_NUM_DECIMAL_DIGITS));

817

field_list.push_back(new Item_empty_string("data",XIDDATASIZE));

818

819

if (session->client->sendFields(&field_list))

820

return 1;

821

822

pthread_mutex_lock(&LOCK_xid_cache);

823

while ((xs= (XID_STATE*)hash_element(&xid_cache, i++)))

824

{

825

if (xs->xa_state==XA_PREPARED)

826

{

827

session->client->store((int64_t)xs->xid.formatID);

828

session->client->store((int64_t)xs->xid.gtrid_length);

829

session->client->store((int64_t)xs->xid.bqual_length);

830

session->client->store(xs->xid.data,

831

xs->xid.gtrid_length+xs->xid.bqual_length);

832

if (session->client->flush())

833

{

834

pthread_mutex_unlock(&LOCK_xid_cache);

835

return 1;

836

}

837

}

838

}

839

840

pthread_mutex_unlock(&LOCK_xid_cache);

841

session->my_eof();

842

return 0;

843

}

844

845

846

int ha_rollback_to_savepoint(Session *session, SAVEPOINT *sv)

847

{

848

int error= 0;

849

Session_TRANS *trans= &session->transaction.all;

850

Ha_trx_info *ha_info, *ha_info_next;

851

852

trans->no_2pc=0;

853

854

rolling back to savepoint in all storage engines that were part of the

855

transaction when the savepoint was set

856

857

for (ha_info= sv->ha_list; ha_info; ha_info= ha_info->next())

858

{

859

int err;

860

plugin::StorageEngine *engine= ha_info->engine();

861

assert(engine);

862

if ((err= engine->savepoint_rollback(session,

863

(void *)(sv+1))))

864

{ // cannot happen

865

my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);

866

error= 1;

867

}

868

status_var_increment(session->status_var.ha_savepoint_rollback_count);

869

trans->no_2pc|= not engine->has_2pc();

870

}

871

872

rolling back the transaction in all storage engines that were not part of

873

the transaction when the savepoint was set

874

875

for (ha_info= trans->ha_list; ha_info != sv->ha_list;

876

ha_info= ha_info_next)

877

{

878

int err;

879

plugin::StorageEngine *engine= ha_info->engine();

880

if ((err= engine->rollback(session, !(0))))

881

{ // cannot happen

882

my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err);

883

error= 1;

884

}

885

status_var_increment(session->status_var.ha_rollback_count);

886

ha_info_next= ha_info->next();

887

ha_info->reset(); /* keep it conveniently zero-filled */

888

}

889

trans->ha_list= sv->ha_list;

890

return error;

891

}

892

893

/**

894

@note

895

according to the sql standard (ISO/IEC 9075-2:2003)

896

section "4.33.4 SQL-statements and transaction states",

897

SAVEPOINT is *not* transaction-initiating SQL-statement

898

899

int ha_savepoint(Session *session, SAVEPOINT *sv)

900

{

901

int error= 0;

902

Session_TRANS *trans= &session->transaction.all;

903

Ha_trx_info *ha_info= trans->ha_list;

904

for (; ha_info; ha_info= ha_info->next())

905

{

906

int err;

907

plugin::StorageEngine *engine= ha_info->engine();

908

assert(engine);

909

#ifdef NOT_IMPLEMENTED /*- TODO (examine this againt the original code base) */

910

if (! engine->savepoint_set)

911

{

912

my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "SAVEPOINT");

913

error= 1;

914

break;

915

}

916

#endif

917

if ((err= engine->savepoint_set(session, (void *)(sv+1))))

918

{ // cannot happen

919

my_error(ER_GET_ERRNO, MYF(0), err);

920

error= 1;

921

}

922

status_var_increment(session->status_var.ha_savepoint_count);

923

}

924

925

Remember the list of registered storage engines. All new

926

engines are prepended to the beginning of the list.

927

928

sv->ha_list= trans->ha_list;

929

return error;

930

}

931

932

int ha_release_savepoint(Session *session, SAVEPOINT *sv)

933

{

934

int error= 0;

935

Ha_trx_info *ha_info= sv->ha_list;

936

937

for (; ha_info; ha_info= ha_info->next())

938

{

939

int err;

940

plugin::StorageEngine *engine= ha_info->engine();

941

/* Savepoint life time is enclosed into transaction life time. */

942

assert(engine);

943

if ((err= engine->savepoint_release(session,

944

(void *)(sv+1))))

945

{ // cannot happen

946

my_error(ER_GET_ERRNO, MYF(0), err);

947

error= 1;

948

}

949

}

950

return error;

951

}

Older »