1
/*****************************************************************************
3
Copyright (C) 1996, 2010, Innobase Oy. All Rights Reserved.
5
This program is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free Software
7
Foundation; version 2 of the License.
9
This program is distributed in the hope that it will be useful, but WITHOUT
10
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
You should have received a copy of the GNU General Public License along with
14
this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15
St, Fifth Floor, Boston, MA 02110-1301 USA
17
*****************************************************************************/
19
/**************************************************//**
21
Transaction undo log record
23
Created 3/26/1996 Heikki Tuuri
24
*******************************************************/
33
#include "mach0data.h"
36
#ifndef UNIV_HOTBACKUP
37
#include "dict0dict.h"
42
#include "trx0purge.h"
46
/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
48
/**********************************************************************//**
49
Writes the mtr log entry of the inserted undo log record on the undo log
53
trx_undof_page_add_undo_rec_log(
54
/*============================*/
55
page_t* undo_page, /*!< in: undo log page */
56
ulint old_free, /*!< in: start offset of the inserted entry */
57
ulint new_free, /*!< in: end offset of the entry */
58
mtr_t* mtr) /*!< in: mtr */
64
log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN);
66
if (log_ptr == NULL) {
71
log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN];
72
log_ptr = mlog_write_initial_log_record_fast(
73
undo_page, MLOG_UNDO_INSERT, log_ptr, mtr);
74
len = new_free - old_free - 4;
76
mach_write_to_2(log_ptr, len);
79
if (log_ptr + len <= log_end) {
80
memcpy(log_ptr, undo_page + old_free + 2, len);
81
mlog_close(mtr, log_ptr + len);
83
mlog_close(mtr, log_ptr);
84
mlog_catenate_string(mtr, undo_page + old_free + 2, len);
87
#endif /* !UNIV_HOTBACKUP */
89
/***********************************************************//**
90
Parses a redo log record of adding an undo log record.
91
@return end of log record or NULL */
94
trx_undo_parse_add_undo_rec(
95
/*========================*/
96
byte* ptr, /*!< in: buffer */
97
byte* end_ptr,/*!< in: buffer end */
98
page_t* page) /*!< in: page or NULL */
104
if (end_ptr < ptr + 2) {
109
len = mach_read_from_2(ptr);
112
if (end_ptr < ptr + len) {
122
first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
123
+ TRX_UNDO_PAGE_FREE);
124
rec = page + first_free;
126
mach_write_to_2(rec, first_free + 4 + len);
127
mach_write_to_2(rec + 2 + len, first_free);
129
mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
130
first_free + 4 + len);
131
ut_memcpy(rec + 2, ptr, len);
136
#ifndef UNIV_HOTBACKUP
137
/**********************************************************************//**
138
Calculates the free space left for extending an undo log record.
139
@return bytes left */
144
const page_t* page, /*!< in: undo log page */
145
const byte* ptr) /*!< in: pointer to page */
147
/* The '- 10' is a safety margin, in case we have some small
148
calculation error below */
150
return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
153
/**********************************************************************//**
154
Set the next and previous pointers in the undo page for the undo record
155
that was written to ptr. Update the first free value by the number of bytes
156
written for this undo record.
157
@return offset of the inserted entry on the page if succeeded, 0 if fail */
160
trx_undo_page_set_next_prev_and_add(
161
/*================================*/
162
page_t* undo_page, /*!< in/out: undo log page */
163
byte* ptr, /*!< in: ptr up to where data has been
164
written on this undo page. */
165
mtr_t* mtr) /*!< in: mtr */
167
ulint first_free; /*!< offset within undo_page */
168
ulint end_of_rec; /*!< offset within undo_page */
169
byte* ptr_to_first_free;
170
/* pointer within undo_page
171
that points to the next free
172
offset value within undo_page.*/
174
ut_ad(ptr > undo_page);
175
ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);
177
if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {
182
ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;
184
first_free = mach_read_from_2(ptr_to_first_free);
186
/* Write offset of the previous undo log record */
187
mach_write_to_2(ptr, first_free);
190
end_of_rec = ptr - undo_page;
192
/* Write offset of the next undo log record */
193
mach_write_to_2(undo_page + first_free, end_of_rec);
195
/* Update the offset to first free undo record */
196
mach_write_to_2(ptr_to_first_free, end_of_rec);
198
/* Write this log entry to the UNDO log */
199
trx_undof_page_add_undo_rec_log(undo_page, first_free,
205
/**********************************************************************//**
206
Reports in the undo log of an insert of a clustered index record.
207
@return offset of the inserted entry on the page if succeed, 0 if fail */
210
trx_undo_page_report_insert(
211
/*========================*/
212
page_t* undo_page, /*!< in: undo log page */
213
trx_t* trx, /*!< in: transaction */
214
dict_index_t* index, /*!< in: clustered index */
215
const dtuple_t* clust_entry, /*!< in: index entry which will be
216
inserted to the clustered index */
217
mtr_t* mtr) /*!< in: mtr */
223
ut_ad(dict_index_is_clust(index));
224
ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
225
+ TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
227
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
228
+ TRX_UNDO_PAGE_FREE);
229
ptr = undo_page + first_free;
231
ut_ad(first_free <= UNIV_PAGE_SIZE);
233
if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
235
/* Not enough space for writing the general parameters */
240
/* Reserve 2 bytes for the pointer to the next undo log record */
243
/* Store first some general parameters to the undo log */
244
*ptr++ = TRX_UNDO_INSERT_REC;
245
ptr += mach_ull_write_much_compressed(ptr, trx->undo_no);
246
ptr += mach_ull_write_much_compressed(ptr, index->table->id);
247
/*----------------------------------------*/
248
/* Store then the fields required to uniquely determine the record
249
to be inserted in the clustered index */
251
for (i = 0; i < dict_index_get_n_unique(index); i++) {
253
const dfield_t* field = dtuple_get_nth_field(clust_entry, i);
254
ulint flen = dfield_get_len(field);
256
if (trx_undo_left(undo_page, ptr) < 5) {
261
ptr += mach_write_compressed(ptr, flen);
263
if (flen != UNIV_SQL_NULL) {
264
if (trx_undo_left(undo_page, ptr) < flen) {
269
ut_memcpy(ptr, dfield_get_data(field), flen);
274
return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
277
/**********************************************************************//**
278
Reads from an undo log record the general parameters.
279
@return remaining part of undo log record after reading these values */
282
trx_undo_rec_get_pars(
283
/*==================*/
284
trx_undo_rec_t* undo_rec, /*!< in: undo log record */
285
ulint* type, /*!< out: undo record type:
286
TRX_UNDO_INSERT_REC, ... */
287
ulint* cmpl_info, /*!< out: compiler info, relevant only
288
for update type records */
289
ibool* updated_extern, /*!< out: TRUE if we updated an
290
externally stored fild */
291
undo_no_t* undo_no, /*!< out: undo log record number */
292
table_id_t* table_id) /*!< out: table id */
299
type_cmpl = mach_read_from_1(ptr);
302
if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
303
*updated_extern = TRUE;
304
type_cmpl -= TRX_UNDO_UPD_EXTERN;
306
*updated_extern = FALSE;
309
*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
310
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
312
*undo_no = mach_ull_read_much_compressed(ptr);
313
ptr += mach_ull_get_much_compressed_size(*undo_no);
315
*table_id = mach_ull_read_much_compressed(ptr);
316
ptr += mach_ull_get_much_compressed_size(*table_id);
321
/**********************************************************************//**
322
Reads from an undo log record a stored column value.
323
@return remaining part of undo log record after reading these values */
326
trx_undo_rec_get_col_val(
327
/*=====================*/
328
byte* ptr, /*!< in: pointer to remaining part of undo log record */
329
byte** field, /*!< out: pointer to stored field */
330
ulint* len, /*!< out: length of the field, or UNIV_SQL_NULL */
331
ulint* orig_len)/*!< out: original length of the locally
332
stored part of an externally stored column, or 0 */
334
*len = mach_read_compressed(ptr);
335
ptr += mach_get_compressed_size(*len);
343
case UNIV_EXTERN_STORAGE_FIELD:
344
*orig_len = mach_read_compressed(ptr);
345
ptr += mach_get_compressed_size(*orig_len);
346
*len = mach_read_compressed(ptr);
347
ptr += mach_get_compressed_size(*len);
351
ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
352
ut_ad(*len > *orig_len);
353
/* @see dtuple_convert_big_rec() */
354
ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE * 2);
355
/* we do not have access to index->table here
356
ut_ad(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP
357
|| *len >= REC_MAX_INDEX_COL_LEN
358
+ BTR_EXTERN_FIELD_REF_SIZE);
361
*len += UNIV_EXTERN_STORAGE_FIELD;
365
if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
366
ptr += *len - UNIV_EXTERN_STORAGE_FIELD;
375
/*******************************************************************//**
376
Builds a row reference from an undo log record.
377
@return pointer to remaining part of undo record */
380
trx_undo_rec_get_row_ref(
381
/*=====================*/
382
byte* ptr, /*!< in: remaining part of a copy of an undo log
383
record, at the start of the row reference;
384
NOTE that this copy of the undo log record must
385
be preserved as long as the row reference is
386
used, as we do NOT copy the data in the
388
dict_index_t* index, /*!< in: clustered index */
389
dtuple_t** ref, /*!< out, own: row reference */
390
mem_heap_t* heap) /*!< in: memory heap from which the memory
391
needed is allocated */
396
ut_ad(index && ptr && ref && heap);
397
ut_a(dict_index_is_clust(index));
399
ref_len = dict_index_get_n_unique(index);
401
*ref = dtuple_create(heap, ref_len);
403
dict_index_copy_types(*ref, index, ref_len);
405
for (i = 0; i < ref_len; i++) {
411
dfield = dtuple_get_nth_field(*ref, i);
413
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
415
dfield_set_data(dfield, field, len);
421
/*******************************************************************//**
422
Skips a row reference from an undo log record.
423
@return pointer to remaining part of undo record */
426
trx_undo_rec_skip_row_ref(
427
/*======================*/
428
byte* ptr, /*!< in: remaining part in update undo log
429
record, at the start of the row reference */
430
dict_index_t* index) /*!< in: clustered index */
436
ut_a(dict_index_is_clust(index));
438
ref_len = dict_index_get_n_unique(index);
440
for (i = 0; i < ref_len; i++) {
445
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
451
/**********************************************************************//**
452
Fetch a prefix of an externally stored column, for writing to the undo log
453
of an update or delete marking of a clustered index record.
457
trx_undo_page_fetch_ext(
458
/*====================*/
459
byte* ext_buf, /*!< in: a buffer of
460
REC_MAX_INDEX_COL_LEN
461
+ BTR_EXTERN_FIELD_REF_SIZE */
462
ulint zip_size, /*!< compressed page size in bytes,
463
or 0 for uncompressed BLOB */
464
const byte* field, /*!< in: an externally stored column */
465
ulint* len) /*!< in: length of field;
466
out: used length of ext_buf */
468
/* Fetch the BLOB. */
469
ulint ext_len = btr_copy_externally_stored_field_prefix(
470
ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len);
471
/* BLOBs should always be nonempty. */
473
/* Append the BLOB pointer to the prefix. */
474
memcpy(ext_buf + ext_len,
475
field + *len - BTR_EXTERN_FIELD_REF_SIZE,
476
BTR_EXTERN_FIELD_REF_SIZE);
477
*len = ext_len + BTR_EXTERN_FIELD_REF_SIZE;
481
/**********************************************************************//**
482
Writes to the undo log a prefix of an externally stored column.
483
@return undo log position */
486
trx_undo_page_report_modify_ext(
487
/*============================*/
488
byte* ptr, /*!< in: undo log position,
489
at least 15 bytes must be available */
490
byte* ext_buf, /*!< in: a buffer of
491
REC_MAX_INDEX_COL_LEN
492
+ BTR_EXTERN_FIELD_REF_SIZE,
493
or NULL when should not fetch
495
ulint zip_size, /*!< compressed page size in bytes,
496
or 0 for uncompressed BLOB */
497
const byte** field, /*!< in/out: the locally stored part of
498
the externally stored column */
499
ulint* len) /*!< in/out: length of field, in bytes */
502
/* If an ordering column is externally stored, we will
503
have to store a longer prefix of the field. In this
504
case, write to the log a marker followed by the
505
original length and the real length of the field. */
506
ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD);
508
ptr += mach_write_compressed(ptr, *len);
510
*field = trx_undo_page_fetch_ext(ext_buf, zip_size,
513
ptr += mach_write_compressed(ptr, *len);
515
ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
522
/**********************************************************************//**
523
Reports in the undo log of an update or delete marking of a clustered index
525
@return byte offset of the inserted undo log entry on the page if
526
succeed, 0 if fail */
529
trx_undo_page_report_modify(
530
/*========================*/
531
page_t* undo_page, /*!< in: undo log page */
532
trx_t* trx, /*!< in: transaction */
533
dict_index_t* index, /*!< in: clustered index where update or
534
delete marking is done */
535
const rec_t* rec, /*!< in: clustered index record which
536
has NOT yet been modified */
537
const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
538
const upd_t* update, /*!< in: update vector which tells the
539
columns to be updated; in the case of
540
a delete, this should be set to NULL */
541
ulint cmpl_info, /*!< in: compiler info on secondary
543
mtr_t* mtr) /*!< in: mtr */
555
ibool ignore_prefix = FALSE;
556
byte ext_buf[REC_MAX_INDEX_COL_LEN
557
+ BTR_EXTERN_FIELD_REF_SIZE];
559
ut_a(dict_index_is_clust(index));
560
ut_ad(rec_offs_validate(rec, index, offsets));
561
ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
562
+ TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
563
table = index->table;
565
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
566
+ TRX_UNDO_PAGE_FREE);
567
ptr = undo_page + first_free;
569
ut_ad(first_free <= UNIV_PAGE_SIZE);
571
if (trx_undo_left(undo_page, ptr) < 50) {
573
/* NOTE: the value 50 must be big enough so that the general
574
fields written below fit on the undo log page */
579
/* Reserve 2 bytes for the pointer to the next undo log record */
582
/* Store first some general parameters to the undo log */
585
type_cmpl = TRX_UNDO_DEL_MARK_REC;
586
} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
587
type_cmpl = TRX_UNDO_UPD_DEL_REC;
588
/* We are about to update a delete marked record.
589
We don't typically need the prefix in this case unless
590
the delete marking is done by the same transaction
591
(which we check below). */
592
ignore_prefix = TRUE;
594
type_cmpl = TRX_UNDO_UPD_EXIST_REC;
597
type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
600
*ptr++ = (byte) type_cmpl;
601
ptr += mach_ull_write_much_compressed(ptr, trx->undo_no);
603
ptr += mach_ull_write_much_compressed(ptr, table->id);
605
/*----------------------------------------*/
606
/* Store the state of the info bits */
608
*ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));
610
/* Store the values of the system columns */
611
field = rec_get_nth_field(rec, offsets,
612
dict_index_get_sys_col_pos(
613
index, DATA_TRX_ID), &flen);
614
ut_ad(flen == DATA_TRX_ID_LEN);
616
trx_id = trx_read_trx_id(field);
618
/* If it is an update of a delete marked record, then we are
619
allowed to ignore blob prefixes if the delete marking was done
620
by some other trx as it must have committed by now for us to
621
allow an over-write. */
623
ignore_prefix = (trx_id != trx->id);
625
ptr += mach_ull_write_compressed(ptr, trx_id);
627
field = rec_get_nth_field(rec, offsets,
628
dict_index_get_sys_col_pos(
629
index, DATA_ROLL_PTR), &flen);
630
ut_ad(flen == DATA_ROLL_PTR_LEN);
632
ptr += mach_ull_write_compressed(ptr, trx_read_roll_ptr(field));
634
/*----------------------------------------*/
635
/* Store then the fields required to uniquely determine the
636
record which will be modified in the clustered index */
638
for (i = 0; i < dict_index_get_n_unique(index); i++) {
640
field = rec_get_nth_field(rec, offsets, i, &flen);
642
/* The ordering columns must not be stored externally. */
643
ut_ad(!rec_offs_nth_extern(offsets, i));
644
ut_ad(dict_index_get_nth_col(index, i)->ord_part);
646
if (trx_undo_left(undo_page, ptr) < 5) {
651
ptr += mach_write_compressed(ptr, flen);
653
if (flen != UNIV_SQL_NULL) {
654
if (trx_undo_left(undo_page, ptr) < flen) {
659
ut_memcpy(ptr, field, flen);
664
/*----------------------------------------*/
665
/* Save to the undo log the old values of the columns to be updated. */
668
if (trx_undo_left(undo_page, ptr) < 5) {
673
ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
675
for (i = 0; i < upd_get_n_fields(update); i++) {
677
ulint pos = upd_get_nth_field(update, i)->field_no;
679
/* Write field number to undo log */
680
if (trx_undo_left(undo_page, ptr) < 5) {
685
ptr += mach_write_compressed(ptr, pos);
687
/* Save the old value of field */
688
field = rec_get_nth_field(rec, offsets, pos, &flen);
690
if (trx_undo_left(undo_page, ptr) < 15) {
695
if (rec_offs_nth_extern(offsets, pos)) {
696
ptr = trx_undo_page_report_modify_ext(
698
dict_index_get_nth_col(index, pos)
701
&& flen < REC_MAX_INDEX_COL_LEN
703
dict_table_zip_size(table),
706
/* Notify purge that it eventually has to
707
free the old externally stored field */
709
trx->update_undo->del_marks = TRUE;
711
*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
713
ptr += mach_write_compressed(ptr, flen);
716
if (flen != UNIV_SQL_NULL) {
717
if (trx_undo_left(undo_page, ptr) < flen) {
722
ut_memcpy(ptr, field, flen);
728
/*----------------------------------------*/
729
/* In the case of a delete marking, and also in the case of an update
730
where any ordering field of any index changes, store the values of all
731
columns which occur as ordering fields in any index. This info is used
732
in the purge of old versions where we use it to build and search the
733
delete marked index records, to look if we can remove them from the
734
index tree. Note that starting from 4.0.14 also externally stored
735
fields can be ordering in some index. Starting from 5.2, we no longer
736
store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
737
but we can construct the column prefix fields in the index by
738
fetching the first page of the BLOB that is pointed to by the
739
clustered index. This works also in crash recovery, because all pages
740
(including BLOBs) are recovered before anything is rolled back. */
742
if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
745
trx->update_undo->del_marks = TRUE;
747
if (trx_undo_left(undo_page, ptr) < 5) {
752
/* Reserve 2 bytes to write the number of bytes the stored
753
fields take in this undo record */
757
for (col_no = 0; col_no < dict_table_get_n_cols(table);
760
const dict_col_t* col
761
= dict_table_get_nth_col(table, col_no);
766
/* Write field number to undo log */
767
if (trx_undo_left(undo_page, ptr) < 5 + 15) {
772
pos = dict_index_get_nth_col_pos(index,
774
ptr += mach_write_compressed(ptr, pos);
776
/* Save the old value of field */
777
field = rec_get_nth_field(rec, offsets, pos,
780
if (rec_offs_nth_extern(offsets, pos)) {
781
ptr = trx_undo_page_report_modify_ext(
783
flen < REC_MAX_INDEX_COL_LEN
786
dict_table_zip_size(table),
789
ptr += mach_write_compressed(
793
if (flen != UNIV_SQL_NULL) {
794
if (trx_undo_left(undo_page, ptr)
800
ut_memcpy(ptr, field, flen);
806
mach_write_to_2(old_ptr, ptr - old_ptr);
809
/*----------------------------------------*/
810
/* Write pointers to the previous and the next undo log records */
811
if (trx_undo_left(undo_page, ptr) < 2) {
816
mach_write_to_2(ptr, first_free);
818
mach_write_to_2(undo_page + first_free, ptr - undo_page);
820
mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
823
/* Write to the REDO log about this change in the UNDO log */
825
trx_undof_page_add_undo_rec_log(undo_page, first_free,
826
ptr - undo_page, mtr);
830
/**********************************************************************//**
831
Reads from an undo log update record the system field values of the old
833
@return remaining part of undo log record after reading these values */
836
trx_undo_update_rec_get_sys_cols(
837
/*=============================*/
838
byte* ptr, /*!< in: remaining part of undo
839
log record after reading
840
general parameters */
841
trx_id_t* trx_id, /*!< out: trx id */
842
roll_ptr_t* roll_ptr, /*!< out: roll ptr */
843
ulint* info_bits) /*!< out: info bits state */
845
/* Read the state of the info bits */
846
*info_bits = mach_read_from_1(ptr);
849
/* Read the values of the system columns */
851
*trx_id = mach_ull_read_compressed(ptr);
852
ptr += mach_ull_get_compressed_size(*trx_id);
854
*roll_ptr = mach_ull_read_compressed(ptr);
855
ptr += mach_ull_get_compressed_size(*roll_ptr);
860
/**********************************************************************//**
861
Reads from an update undo log record the number of updated fields.
862
@return remaining part of undo log record after reading this value */
865
trx_undo_update_rec_get_n_upd_fields(
866
/*=================================*/
867
byte* ptr, /*!< in: pointer to remaining part of undo log record */
868
ulint* n) /*!< out: number of fields */
870
*n = mach_read_compressed(ptr);
871
ptr += mach_get_compressed_size(*n);
876
/**********************************************************************//**
877
Reads from an update undo log record a stored field number.
878
@return remaining part of undo log record after reading this value */
881
trx_undo_update_rec_get_field_no(
882
/*=============================*/
883
byte* ptr, /*!< in: pointer to remaining part of undo log record */
884
ulint* field_no)/*!< out: field number */
886
*field_no = mach_read_compressed(ptr);
887
ptr += mach_get_compressed_size(*field_no);
892
/*******************************************************************//**
893
Builds an update vector based on a remaining part of an undo log record.
894
@return remaining part of the record, NULL if an error detected, which
895
means that the record is corrupted */
898
trx_undo_update_rec_get_update(
899
/*===========================*/
900
byte* ptr, /*!< in: remaining part in update undo log
901
record, after reading the row reference
902
NOTE that this copy of the undo log record must
903
be preserved as long as the update vector is
904
used, as we do NOT copy the data in the
906
dict_index_t* index, /*!< in: clustered index */
907
ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC,
908
TRX_UNDO_UPD_DEL_REC, or
909
TRX_UNDO_DEL_MARK_REC; in the last case,
910
only trx id and roll ptr fields are added to
912
trx_id_t trx_id, /*!< in: transaction id from this undo record */
913
roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
914
ulint info_bits,/*!< in: info bits from this undo record */
915
trx_t* trx, /*!< in: transaction */
916
mem_heap_t* heap, /*!< in: memory heap from which the memory
917
needed is allocated */
918
upd_t** upd) /*!< out, own: update vector */
920
upd_field_t* upd_field;
926
ut_a(dict_index_is_clust(index));
928
if (type != TRX_UNDO_DEL_MARK_REC) {
929
ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
934
update = upd_create(n_fields + 2, heap);
936
update->info_bits = info_bits;
938
/* Store first trx id and roll ptr to update vector */
940
upd_field = upd_get_nth_field(update, n_fields);
941
buf = static_cast<byte *>(mem_heap_alloc(heap, DATA_TRX_ID_LEN));
942
trx_write_trx_id(buf, trx_id);
944
upd_field_set_field_no(upd_field,
945
dict_index_get_sys_col_pos(index, DATA_TRX_ID),
947
dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
949
upd_field = upd_get_nth_field(update, n_fields + 1);
950
buf = static_cast<byte *>(mem_heap_alloc(heap, DATA_ROLL_PTR_LEN));
951
trx_write_roll_ptr(buf, roll_ptr);
953
upd_field_set_field_no(
954
upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
956
dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
958
/* Store then the updated ordinary columns to the update vector */
960
for (i = 0; i < n_fields; i++) {
967
ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
969
if (field_no >= dict_index_get_n_fields(index)) {
971
"InnoDB: Error: trying to access"
972
" update undo rec field %lu in ",
974
dict_index_name_print(stderr, trx, index);
976
"InnoDB: but index has only %lu fields\n"
977
"InnoDB: Submit a detailed bug report"
978
" to http://bugs.mysql.com\n"
979
"InnoDB: Run also CHECK TABLE ",
980
(ulong) dict_index_get_n_fields(index));
981
ut_print_name(stderr, trx, TRUE, index->table_name);
983
"InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
984
(ulong) n_fields, (ulong) i, ptr);
989
upd_field = upd_get_nth_field(update, i);
991
upd_field_set_field_no(upd_field, field_no, index, trx);
993
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
995
upd_field->orig_len = orig_len;
997
if (len == UNIV_SQL_NULL) {
998
dfield_set_null(&upd_field->new_val);
999
} else if (len < UNIV_EXTERN_STORAGE_FIELD) {
1000
dfield_set_data(&upd_field->new_val, field, len);
1002
len -= UNIV_EXTERN_STORAGE_FIELD;
1004
dfield_set_data(&upd_field->new_val, field, len);
1005
dfield_set_ext(&upd_field->new_val);
1014
/*******************************************************************//**
1015
Builds a partial row from an update undo log record. It contains the
1016
columns which occur as ordering in any index of the table.
1017
@return pointer to remaining part of undo record */
1020
trx_undo_rec_get_partial_row(
1021
/*=========================*/
1022
byte* ptr, /*!< in: remaining part in update undo log
1023
record of a suitable type, at the start of
1024
the stored index columns;
1025
NOTE that this copy of the undo log record must
1026
be preserved as long as the partial row is
1027
used, as we do NOT copy the data in the
1029
dict_index_t* index, /*!< in: clustered index */
1030
dtuple_t** row, /*!< out, own: partial row */
1031
ibool ignore_prefix, /*!< in: flag to indicate if we
1032
expect blob prefixes in undo. Used
1033
only in the assertion. */
1034
mem_heap_t* heap) /*!< in: memory heap from which the memory
1035
needed is allocated */
1037
const byte* end_ptr;
1044
ut_ad(dict_index_is_clust(index));
1046
row_len = dict_table_get_n_cols(index->table);
1048
*row = dtuple_create(heap, row_len);
1050
dict_table_copy_types(*row, index->table);
1052
end_ptr = ptr + mach_read_from_2(ptr);
1055
while (ptr != end_ptr) {
1059
const dict_col_t* col;
1064
ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
1066
col = dict_index_get_nth_col(index, field_no);
1067
col_no = dict_col_get_no(col);
1069
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
1071
dfield = dtuple_get_nth_field(*row, col_no);
1073
dfield_set_data(dfield, field, len);
1075
if (len != UNIV_SQL_NULL
1076
&& len >= UNIV_EXTERN_STORAGE_FIELD) {
1077
dfield_set_len(dfield,
1078
len - UNIV_EXTERN_STORAGE_FIELD);
1079
dfield_set_ext(dfield);
1080
/* If the prefix of this column is indexed,
1081
ensure that enough prefix is stored in the
1083
if (!ignore_prefix && col->ord_part) {
1084
ut_a(dfield_get_len(dfield)
1085
>= 2 * BTR_EXTERN_FIELD_REF_SIZE);
1086
ut_a(dict_table_get_format(index->table)
1087
>= DICT_TF_FORMAT_ZIP
1088
|| dfield_get_len(dfield)
1089
>= REC_MAX_INDEX_COL_LEN
1090
+ BTR_EXTERN_FIELD_REF_SIZE);
1097
#endif /* !UNIV_HOTBACKUP */
1099
/***********************************************************************//**
1100
Erases the unused undo log page end. */
1103
trx_undo_erase_page_end(
1104
/*====================*/
1105
page_t* undo_page, /*!< in: undo page whose end to erase */
1106
mtr_t* mtr) /*!< in: mtr */
1110
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
1111
+ TRX_UNDO_PAGE_FREE);
1112
memset(undo_page + first_free, 0xff,
1113
(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
1115
mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
1118
/***********************************************************//**
1119
Parses a redo log record of erasing of an undo page end.
1120
@return end of log record or NULL */
1123
trx_undo_parse_erase_page_end(
1124
/*==========================*/
1125
byte* ptr, /*!< in: buffer */
1126
byte* /*end_ptr*/, /*!< in: buffer end */
1127
page_t* page, /*!< in: page or NULL */
1128
mtr_t* mtr) /*!< in: mtr or NULL */
1130
ut_ad(ptr && end_ptr);
1137
trx_undo_erase_page_end(page, mtr);
1142
#ifndef UNIV_HOTBACKUP
1143
/***********************************************************************//**
1144
Writes information to an undo log about an insert, update, or a delete marking
1145
of a clustered index record. This information is used in a rollback of the
1146
transaction and in consistent reads that must look to the history of this
1148
@return DB_SUCCESS or error code */
1151
trx_undo_report_row_operation(
1152
/*==========================*/
1153
ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
1154
set, does nothing */
1155
ulint op_type, /*!< in: TRX_UNDO_INSERT_OP or
1156
TRX_UNDO_MODIFY_OP */
1157
que_thr_t* thr, /*!< in: query thread */
1158
dict_index_t* index, /*!< in: clustered index */
1159
const dtuple_t* clust_entry, /*!< in: in the case of an insert,
1160
index entry to insert into the
1161
clustered index, otherwise NULL */
1162
const upd_t* update, /*!< in: in the case of an update,
1163
the update vector, otherwise NULL */
1164
ulint cmpl_info, /*!< in: compiler info on secondary
1166
const rec_t* rec, /*!< in: in case of an update or delete
1167
marking, the record in the clustered
1168
index, otherwise NULL */
1169
roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the
1170
inserted undo log record,
1171
0 if BTR_NO_UNDO_LOG
1172
flag was specified */
1179
ulint err = DB_SUCCESS;
1180
mem_heap_t* heap = NULL;
1181
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1182
ulint* offsets = offsets_;
1183
rec_offs_init(offsets_);
1185
ut_a(dict_index_is_clust(index));
1187
if (flags & BTR_NO_UNDO_LOG_FLAG) {
1195
ut_ad((op_type != TRX_UNDO_INSERT_OP)
1196
|| (clust_entry && !update && !rec));
1198
trx = thr_get_trx(thr);
1201
mutex_enter(&(trx->undo_mutex));
1203
/* If the undo log is not assigned yet, assign one */
1205
if (op_type == TRX_UNDO_INSERT_OP) {
1207
if (trx->insert_undo == NULL) {
1209
err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
1212
undo = trx->insert_undo;
1214
if (UNIV_UNLIKELY(!undo)) {
1215
/* Did not succeed */
1216
mutex_exit(&(trx->undo_mutex));
1221
ut_ad(op_type == TRX_UNDO_MODIFY_OP);
1223
if (trx->update_undo == NULL) {
1225
err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
1229
undo = trx->update_undo;
1231
if (UNIV_UNLIKELY(!undo)) {
1232
/* Did not succeed */
1233
mutex_exit(&(trx->undo_mutex));
1237
offsets = rec_get_offsets(rec, index, offsets,
1238
ULINT_UNDEFINED, &heap);
1241
page_no = undo->last_page_no;
1246
buf_block_t* undo_block;
1250
undo_block = buf_page_get_gen(undo->space, undo->zip_size,
1251
page_no, RW_X_LATCH,
1252
undo->guess_block, BUF_GET,
1253
__FILE__, __LINE__, &mtr);
1254
buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
1256
undo_page = buf_block_get_frame(undo_block);
1258
if (op_type == TRX_UNDO_INSERT_OP) {
1259
offset = trx_undo_page_report_insert(
1260
undo_page, trx, index, clust_entry, &mtr);
1262
offset = trx_undo_page_report_modify(
1263
undo_page, trx, index, rec, offsets, update,
1267
if (UNIV_UNLIKELY(offset == 0)) {
1268
/* The record did not fit on the page. We erase the
1269
end segment of the undo log page and write a log
1270
record of it: this is to ensure that in the debug
1271
version the replicate page constructed using the log
1272
records stays identical to the original page */
1274
trx_undo_erase_page_end(undo_page, &mtr);
1281
undo->empty = FALSE;
1282
undo->top_page_no = page_no;
1283
undo->top_offset = offset;
1284
undo->top_undo_no = trx->undo_no;
1285
undo->guess_block = undo_block;
1289
mutex_exit(&trx->undo_mutex);
1291
*roll_ptr = trx_undo_build_roll_ptr(
1292
op_type == TRX_UNDO_INSERT_OP,
1293
rseg->id, page_no, offset);
1294
if (UNIV_LIKELY_NULL(heap)) {
1295
mem_heap_free(heap);
1300
ut_ad(page_no == undo->last_page_no);
1302
/* We have to extend the undo log by one page */
1306
/* When we add a page to an undo log, this is analogous to
1307
a pessimistic insert in a B-tree, and we must reserve the
1308
counterpart of the tree latch, which is the rseg mutex. */
1310
mutex_enter(&(rseg->mutex));
1312
page_no = trx_undo_add_page(trx, undo, &mtr);
1314
mutex_exit(&(rseg->mutex));
1316
if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
1317
/* Did not succeed: out of space */
1319
mutex_exit(&(trx->undo_mutex));
1321
if (UNIV_LIKELY_NULL(heap)) {
1322
mem_heap_free(heap);
1324
return(DB_OUT_OF_FILE_SPACE);
1329
/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
1331
/******************************************************************//**
1332
Copies an undo record to heap. This function can be called if we know that
1333
the undo log record exists.
1334
@return own: copy of the record */
1337
trx_undo_get_undo_rec_low(
1338
/*======================*/
1339
roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
1340
mem_heap_t* heap) /*!< in: memory heap where copied */
1342
trx_undo_rec_t* undo_rec;
1346
const page_t* undo_page;
1351
trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
1353
rseg = trx_rseg_get_on_id(rseg_id);
1357
undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
1360
undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
1367
/******************************************************************//**
1368
Copies an undo record to heap.
1370
NOTE: the caller must have latches on the clustered index page and
1373
@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
1374
truncated and we cannot fetch the old version */
1377
trx_undo_get_undo_rec(
1378
/*==================*/
1379
roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
1380
trx_id_t trx_id, /*!< in: id of the trx that generated
1381
the roll pointer: it points to an
1382
undo log of this transaction */
1383
trx_undo_rec_t** undo_rec, /*!< out, own: copy of the record */
1384
mem_heap_t* heap) /*!< in: memory heap where copied */
1386
#ifdef UNIV_SYNC_DEBUG
1387
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
1388
#endif /* UNIV_SYNC_DEBUG */
1390
if (!trx_purge_update_undo_must_exist(trx_id)) {
1392
/* It may be that the necessary undo log has already been
1395
return(DB_MISSING_HISTORY);
1398
*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
1403
/*******************************************************************//**
1404
Build a previous version of a clustered index record. This function checks
1405
that the caller has a latch on the index page of the clustered index record
1406
and an s-latch on the purge_view. This guarantees that the stack of versions
1407
is locked all the way down to the purge_view.
1408
@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
1409
earlier than purge_view, which means that it may have been removed,
1410
DB_ERROR if corrupted record */
1413
trx_undo_prev_version_build(
1414
/*========================*/
1415
const rec_t* index_rec,/*!< in: clustered index record in the
1417
mtr_t* /*index_mtr*/,
1418
/*!< in: mtr which contains the latch to
1419
index_rec page and purge_view */
1420
const rec_t* rec, /*!< in: version of a clustered index record */
1421
dict_index_t* index, /*!< in: clustered index */
1422
ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
1423
mem_heap_t* heap, /*!< in: memory heap from which the memory
1424
needed is allocated */
1425
rec_t** old_vers)/*!< out, own: previous version, or NULL if
1426
rec is the first inserted version, or if
1427
history data has been deleted (an error),
1428
or if the purge COULD have removed the version
1429
though it has not yet done so */
1431
trx_undo_rec_t* undo_rec = NULL;
1433
trx_id_t rec_trx_id;
1436
table_id_t table_id;
1438
roll_ptr_t roll_ptr;
1439
roll_ptr_t old_roll_ptr;
1440
upd_t* update= NULL;
1447
#ifdef UNIV_SYNC_DEBUG
1448
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
1449
#endif /* UNIV_SYNC_DEBUG */
1450
ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
1451
|| mtr_memo_contains_page(index_mtr, index_rec,
1452
MTR_MEMO_PAGE_X_FIX));
1453
ut_ad(rec_offs_validate(rec, index, offsets));
1455
if (!dict_index_is_clust(index)) {
1456
fprintf(stderr, "InnoDB: Error: trying to access"
1457
" update undo rec for non-clustered index %s\n"
1458
"InnoDB: Submit a detailed bug report to"
1459
" http://bugs.mysql.com\n"
1460
"InnoDB: index record ", index->name);
1461
rec_print(stderr, index_rec, index);
1463
"InnoDB: record version ", stderr);
1464
rec_print_new(stderr, rec, offsets);
1469
roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
1470
old_roll_ptr = roll_ptr;
1474
if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
1476
/* The record rec is the first inserted version */
1481
rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
1483
err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
1485
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1486
/* The undo record may already have been purged.
1487
This should never happen in InnoDB. */
1492
ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
1493
&dummy_extern, &undo_no, &table_id);
1495
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
1498
/* (a) If a clustered index record version is such that the
1499
trx id stamp in it is bigger than purge_sys->view, then the
1500
BLOBs in that version are known to exist (the purge has not
1501
progressed that far);
1503
(b) if the version is the first version such that trx id in it
1504
is less than purge_sys->view, and it is not delete-marked,
1505
then the BLOBs in that version are known to exist (the purge
1506
cannot have purged the BLOBs referenced by that version
1509
This function does not fetch any BLOBs. The callers might, by
1510
possibly invoking row_ext_create() via row_build(). However,
1511
they should have all needed information in the *old_vers
1512
returned by this function. This is because *old_vers is based
1513
on the transaction undo log records. The function
1514
trx_undo_page_fetch_ext() will write BLOB prefixes to the
1515
transaction undo log that are at least as long as the longest
1516
possible column prefix in a secondary index. Thus, secondary
1517
index entries for *old_vers can be constructed without
1518
dereferencing any BLOB pointers. */
1520
ptr = trx_undo_rec_skip_row_ref(ptr, index);
1522
ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
1523
roll_ptr, info_bits,
1524
NULL, heap, &update);
1526
if (UNIV_UNLIKELY(table_id != index->table->id)) {
1530
"InnoDB: Error: trying to access update undo rec"
1532
"InnoDB: but the table id in the"
1533
" undo record is wrong\n"
1534
"InnoDB: Submit a detailed bug report"
1535
" to http://bugs.mysql.com\n"
1536
"InnoDB: Run also CHECK TABLE %s\n",
1537
index->table_name, index->table_name);
1541
/* The record was corrupted, return an error; these printfs
1542
should catch an elusive bug in row_vers_old_has_index_entry */
1545
"InnoDB: table %s, index %s, n_uniq %lu\n"
1546
"InnoDB: undo rec address %p, type %lu cmpl_info %lu\n"
1547
"InnoDB: undo rec table id %llu,"
1548
" index table id %llu\n"
1549
"InnoDB: dump of 150 bytes in undo rec: ",
1550
index->table_name, index->name,
1551
(ulong) dict_index_get_n_unique(index),
1552
undo_rec, (ulong) type, (ulong) cmpl_info,
1554
(ullint) index->table->id);
1555
ut_print_buf(stderr, undo_rec, 150);
1557
"InnoDB: index record ", stderr);
1558
rec_print(stderr, index_rec, index);
1560
"InnoDB: record version ", stderr);
1561
rec_print_new(stderr, rec, offsets);
1562
fprintf(stderr, "\n"
1563
"InnoDB: Record trx id " TRX_ID_FMT
1564
", update rec trx id " TRX_ID_FMT "\n"
1565
"InnoDB: Roll ptr in rec " TRX_ID_FMT
1566
", in update rec" TRX_ID_FMT "\n",
1568
old_roll_ptr, roll_ptr);
1570
trx_purge_sys_print();
1574
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
1577
/* We have to set the appropriate extern storage bits in the
1578
old version of the record: the extern bits in rec for those
1579
fields that update does NOT update, as well as the the bits for
1580
those fields that update updates to become externally stored
1581
fields. Store the info: */
1583
entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index,
1584
offsets, &n_ext, heap);
1585
n_ext += btr_push_update_extern_fields(entry, update, heap);
1586
/* The page containing the clustered index record
1587
corresponding to entry is latched in mtr. Thus the
1588
following call is safe. */
1589
row_upd_index_replace_new_col_vals(entry, index, update, heap);
1591
buf = static_cast<byte *>(mem_heap_alloc(heap, rec_get_converted_size(index, entry,
1594
*old_vers = rec_convert_dtuple_to_rec(buf, index,
1597
buf = static_cast<byte *>(mem_heap_alloc(heap, rec_offs_size(offsets)));
1598
*old_vers = rec_copy(buf, rec, offsets);
1599
rec_offs_make_valid(*old_vers, index, offsets);
1600
row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
1605
#endif /* !UNIV_HOTBACKUP */