1
/******************************************************
2
Transaction undo log record
6
Created 3/26/1996 Heikki Tuuri
7
*******************************************************/
16
#include "mach0data.h"
20
#include "dict0dict.h"
25
#include "trx0purge.h"
28
/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
30
/**************************************************************************
31
Writes the mtr log entry of the inserted undo log record on the undo log
35
trx_undof_page_add_undo_rec_log(
36
/*============================*/
37
page_t* undo_page, /* in: undo log page */
38
ulint old_free, /* in: start offset of the inserted entry */
39
ulint new_free, /* in: end offset of the entry */
40
mtr_t* mtr) /* in: mtr */
46
log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN);
48
if (log_ptr == NULL) {
53
log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN];
54
log_ptr = mlog_write_initial_log_record_fast(
55
undo_page, MLOG_UNDO_INSERT, log_ptr, mtr);
56
len = new_free - old_free - 4;
58
mach_write_to_2(log_ptr, len);
61
if (log_ptr + len <= log_end) {
62
memcpy(log_ptr, undo_page + old_free + 2, len);
63
mlog_close(mtr, log_ptr + len);
65
mlog_close(mtr, log_ptr);
66
mlog_catenate_string(mtr, undo_page + old_free + 2, len);
70
/***************************************************************
71
Parses a redo log record of adding an undo log record. */
74
trx_undo_parse_add_undo_rec(
75
/*========================*/
76
/* out: end of log record or NULL */
77
byte* ptr, /* in: buffer */
78
byte* end_ptr,/* in: buffer end */
79
page_t* page) /* in: page or NULL */
85
if (end_ptr < ptr + 2) {
90
len = mach_read_from_2(ptr);
93
if (end_ptr < ptr + len) {
103
first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
104
+ TRX_UNDO_PAGE_FREE);
105
rec = page + first_free;
107
mach_write_to_2(rec, first_free + 4 + len);
108
mach_write_to_2(rec + 2 + len, first_free);
110
mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
111
first_free + 4 + len);
112
ut_memcpy(rec + 2, ptr, len);
117
/**************************************************************************
118
Calculates the free space left for extending an undo log record. */
123
/* out: bytes left */
124
const page_t* page, /* in: undo log page */
125
const byte* ptr) /* in: pointer to page */
127
/* The '- 10' is a safety margin, in case we have some small
128
calculation error below */
130
return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
133
/**************************************************************************
134
Set the next and previous pointers in the undo page for the undo record
135
that was written to ptr. Update the first free value by the number of bytes
136
written for this undo record.*/
139
trx_undo_page_set_next_prev_and_add(
140
/*================================*/
141
/* out: offset of the inserted entry
142
on the page if succeeded, 0 if fail */
143
page_t* undo_page, /* in/out: undo log page */
144
byte* ptr, /* in: ptr up to where data has been
145
written on this undo page. */
146
mtr_t* mtr) /* in: mtr */
148
ulint first_free; /* offset within undo_page */
149
ulint end_of_rec; /* offset within undo_page */
150
byte* ptr_to_first_free;
151
/* pointer within undo_page
152
that points to the next free
153
offset value within undo_page.*/
155
ut_ad(ptr > undo_page);
156
ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);
158
if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {
163
ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;
165
first_free = mach_read_from_2(ptr_to_first_free);
167
/* Write offset of the previous undo log record */
168
mach_write_to_2(ptr, first_free);
171
end_of_rec = ptr - undo_page;
173
/* Write offset of the next undo log record */
174
mach_write_to_2(undo_page + first_free, end_of_rec);
176
/* Update the offset to first free undo record */
177
mach_write_to_2(ptr_to_first_free, end_of_rec);
179
/* Write this log entry to the UNDO log */
180
trx_undof_page_add_undo_rec_log(undo_page, first_free,
186
/**************************************************************************
187
Reports in the undo log of an insert of a clustered index record. */
190
trx_undo_page_report_insert(
191
/*========================*/
192
/* out: offset of the inserted entry
193
on the page if succeed, 0 if fail */
194
page_t* undo_page, /* in: undo log page */
195
trx_t* trx, /* in: transaction */
196
dict_index_t* index, /* in: clustered index */
197
const dtuple_t* clust_entry, /* in: index entry which will be
198
inserted to the clustered index */
199
mtr_t* mtr) /* in: mtr */
205
ut_ad(dict_index_is_clust(index));
206
ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
207
+ TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
209
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
210
+ TRX_UNDO_PAGE_FREE);
211
ptr = undo_page + first_free;
213
ut_ad(first_free <= UNIV_PAGE_SIZE);
215
if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
217
/* Not enough space for writing the general parameters */
222
/* Reserve 2 bytes for the pointer to the next undo log record */
225
/* Store first some general parameters to the undo log */
226
*ptr++ = TRX_UNDO_INSERT_REC;
227
ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
228
ptr += mach_dulint_write_much_compressed(ptr, index->table->id);
229
/*----------------------------------------*/
230
/* Store then the fields required to uniquely determine the record
231
to be inserted in the clustered index */
233
for (i = 0; i < dict_index_get_n_unique(index); i++) {
235
const dfield_t* field = dtuple_get_nth_field(clust_entry, i);
236
ulint flen = dfield_get_len(field);
238
if (trx_undo_left(undo_page, ptr) < 5) {
243
ptr += mach_write_compressed(ptr, flen);
245
if (flen != UNIV_SQL_NULL) {
246
if (trx_undo_left(undo_page, ptr) < flen) {
251
ut_memcpy(ptr, dfield_get_data(field), flen);
256
return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
259
/**************************************************************************
260
Reads from an undo log record the general parameters. */
263
trx_undo_rec_get_pars(
264
/*==================*/
265
/* out: remaining part of undo log
266
record after reading these values */
267
trx_undo_rec_t* undo_rec, /* in: undo log record */
268
ulint* type, /* out: undo record type:
269
TRX_UNDO_INSERT_REC, ... */
270
ulint* cmpl_info, /* out: compiler info, relevant only
271
for update type records */
272
ibool* updated_extern, /* out: TRUE if we updated an
273
externally stored fild */
274
dulint* undo_no, /* out: undo log record number */
275
dulint* table_id) /* out: table id */
282
type_cmpl = mach_read_from_1(ptr);
285
if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
286
*updated_extern = TRUE;
287
type_cmpl -= TRX_UNDO_UPD_EXTERN;
289
*updated_extern = FALSE;
292
*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
293
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
295
*undo_no = mach_dulint_read_much_compressed(ptr);
296
ptr += mach_dulint_get_much_compressed_size(*undo_no);
298
*table_id = mach_dulint_read_much_compressed(ptr);
299
ptr += mach_dulint_get_much_compressed_size(*table_id);
304
/**************************************************************************
305
Reads from an undo log record a stored column value. */
308
trx_undo_rec_get_col_val(
309
/*=====================*/
310
/* out: remaining part of undo log record after
311
reading these values */
312
byte* ptr, /* in: pointer to remaining part of undo log record */
313
byte** field, /* out: pointer to stored field */
314
ulint* len, /* out: length of the field, or UNIV_SQL_NULL */
315
ulint* orig_len)/* out: original length of the locally
316
stored part of an externally stored column, or 0 */
318
*len = mach_read_compressed(ptr);
319
ptr += mach_get_compressed_size(*len);
327
case UNIV_EXTERN_STORAGE_FIELD:
328
*orig_len = mach_read_compressed(ptr);
329
ptr += mach_get_compressed_size(*orig_len);
330
*len = mach_read_compressed(ptr);
331
ptr += mach_get_compressed_size(*len);
335
ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
336
ut_ad(*len > *orig_len);
337
ut_ad(*len >= REC_MAX_INDEX_COL_LEN
338
+ BTR_EXTERN_FIELD_REF_SIZE);
340
*len += UNIV_EXTERN_STORAGE_FIELD;
344
if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
345
ptr += *len - UNIV_EXTERN_STORAGE_FIELD;
354
/***********************************************************************
355
Builds a row reference from an undo log record. */
358
trx_undo_rec_get_row_ref(
359
/*=====================*/
360
/* out: pointer to remaining part of undo
362
byte* ptr, /* in: remaining part of a copy of an undo log
363
record, at the start of the row reference;
364
NOTE that this copy of the undo log record must
365
be preserved as long as the row reference is
366
used, as we do NOT copy the data in the
368
dict_index_t* index, /* in: clustered index */
369
dtuple_t** ref, /* out, own: row reference */
370
mem_heap_t* heap) /* in: memory heap from which the memory
371
needed is allocated */
376
ut_ad(index && ptr && ref && heap);
377
ut_a(dict_index_is_clust(index));
379
ref_len = dict_index_get_n_unique(index);
381
*ref = dtuple_create(heap, ref_len);
383
dict_index_copy_types(*ref, index, ref_len);
385
for (i = 0; i < ref_len; i++) {
391
dfield = dtuple_get_nth_field(*ref, i);
393
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
395
dfield_set_data(dfield, field, len);
401
/***********************************************************************
402
Skips a row reference from an undo log record. */
405
trx_undo_rec_skip_row_ref(
406
/*======================*/
407
/* out: pointer to remaining part of undo
409
byte* ptr, /* in: remaining part in update undo log
410
record, at the start of the row reference */
411
dict_index_t* index) /* in: clustered index */
417
ut_a(dict_index_is_clust(index));
419
ref_len = dict_index_get_n_unique(index);
421
for (i = 0; i < ref_len; i++) {
426
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
432
/**************************************************************************
433
Fetch a prefix of an externally stored column, for writing to the undo log
434
of an update or delete marking of a clustered index record. */
437
trx_undo_page_fetch_ext(
438
/*====================*/
440
byte* ext_buf, /* in: a buffer of
441
REC_MAX_INDEX_COL_LEN
442
+ BTR_EXTERN_FIELD_REF_SIZE */
443
ulint zip_size, /* compressed page size in bytes,
444
or 0 for uncompressed BLOB */
445
const byte* field, /* in: an externally stored column */
446
ulint* len) /* in: length of field;
447
out: used length of ext_buf */
449
/* Fetch the BLOB. */
450
ulint ext_len = btr_copy_externally_stored_field_prefix(
451
ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len);
452
/* BLOBs should always be nonempty. */
454
/* Append the BLOB pointer to the prefix. */
455
memcpy(ext_buf + ext_len,
456
field + *len - BTR_EXTERN_FIELD_REF_SIZE,
457
BTR_EXTERN_FIELD_REF_SIZE);
458
*len = ext_len + BTR_EXTERN_FIELD_REF_SIZE;
462
/**************************************************************************
463
Writes to the undo log a prefix of an externally stored column. */
466
trx_undo_page_report_modify_ext(
467
/*============================*/
468
/* out: undo log position */
469
byte* ptr, /* in: undo log position,
470
at least 15 bytes must be available */
471
byte* ext_buf, /* in: a buffer of
472
REC_MAX_INDEX_COL_LEN
473
+ BTR_EXTERN_FIELD_REF_SIZE,
474
or NULL when should not fetch
476
ulint zip_size, /* compressed page size in bytes,
477
or 0 for uncompressed BLOB */
478
const byte** field, /* in/out: the locally stored part of
479
the externally stored column */
480
ulint* len) /* in/out: length of field, in bytes */
483
/* If an ordering column is externally stored, we will
484
have to store a longer prefix of the field. In this
485
case, write to the log a marker followed by the
486
original length and the real length of the field. */
487
ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD);
489
ptr += mach_write_compressed(ptr, *len);
491
*field = trx_undo_page_fetch_ext(ext_buf, zip_size,
494
ptr += mach_write_compressed(ptr, *len);
496
ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
503
/**************************************************************************
504
Reports in the undo log of an update or delete marking of a clustered index
508
trx_undo_page_report_modify(
509
/*========================*/
510
/* out: byte offset of the inserted
511
undo log entry on the page if succeed,
513
page_t* undo_page, /* in: undo log page */
514
trx_t* trx, /* in: transaction */
515
dict_index_t* index, /* in: clustered index where update or
516
delete marking is done */
517
const rec_t* rec, /* in: clustered index record which
518
has NOT yet been modified */
519
const ulint* offsets, /* in: rec_get_offsets(rec, index) */
520
const upd_t* update, /* in: update vector which tells the
521
columns to be updated; in the case of
522
a delete, this should be set to NULL */
523
ulint cmpl_info, /* in: compiler info on secondary
525
mtr_t* mtr) /* in: mtr */
536
byte ext_buf[REC_MAX_INDEX_COL_LEN
537
+ BTR_EXTERN_FIELD_REF_SIZE];
539
ut_a(dict_index_is_clust(index));
540
ut_ad(rec_offs_validate(rec, index, offsets));
541
ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
542
+ TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
543
table = index->table;
545
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
546
+ TRX_UNDO_PAGE_FREE);
547
ptr = undo_page + first_free;
549
ut_ad(first_free <= UNIV_PAGE_SIZE);
551
if (trx_undo_left(undo_page, ptr) < 50) {
553
/* NOTE: the value 50 must be big enough so that the general
554
fields written below fit on the undo log page */
559
/* Reserve 2 bytes for the pointer to the next undo log record */
562
/* Store first some general parameters to the undo log */
565
type_cmpl = TRX_UNDO_DEL_MARK_REC;
566
} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
567
type_cmpl = TRX_UNDO_UPD_DEL_REC;
569
type_cmpl = TRX_UNDO_UPD_EXIST_REC;
572
type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
575
*ptr++ = (byte) type_cmpl;
576
ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
578
ptr += mach_dulint_write_much_compressed(ptr, table->id);
580
/*----------------------------------------*/
581
/* Store the state of the info bits */
583
*ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));
585
/* Store the values of the system columns */
586
field = rec_get_nth_field(rec, offsets,
587
dict_index_get_sys_col_pos(
588
index, DATA_TRX_ID), &flen);
589
ut_ad(flen == DATA_TRX_ID_LEN);
591
ptr += mach_dulint_write_compressed(ptr, trx_read_trx_id(field));
593
field = rec_get_nth_field(rec, offsets,
594
dict_index_get_sys_col_pos(
595
index, DATA_ROLL_PTR), &flen);
596
ut_ad(flen == DATA_ROLL_PTR_LEN);
598
ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field));
600
/*----------------------------------------*/
601
/* Store then the fields required to uniquely determine the
602
record which will be modified in the clustered index */
604
for (i = 0; i < dict_index_get_n_unique(index); i++) {
606
field = rec_get_nth_field(rec, offsets, i, &flen);
608
/* The ordering columns must not be stored externally. */
609
ut_ad(!rec_offs_nth_extern(offsets, i));
610
ut_ad(dict_index_get_nth_col(index, i)->ord_part);
612
if (trx_undo_left(undo_page, ptr) < 5) {
617
ptr += mach_write_compressed(ptr, flen);
619
if (flen != UNIV_SQL_NULL) {
620
if (trx_undo_left(undo_page, ptr) < flen) {
625
ut_memcpy(ptr, field, flen);
630
/*----------------------------------------*/
631
/* Save to the undo log the old values of the columns to be updated. */
634
if (trx_undo_left(undo_page, ptr) < 5) {
639
ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
641
for (i = 0; i < upd_get_n_fields(update); i++) {
643
ulint pos = upd_get_nth_field(update, i)->field_no;
645
/* Write field number to undo log */
646
if (trx_undo_left(undo_page, ptr) < 5) {
651
ptr += mach_write_compressed(ptr, pos);
653
/* Save the old value of field */
654
field = rec_get_nth_field(rec, offsets, pos, &flen);
656
if (trx_undo_left(undo_page, ptr) < 15) {
661
if (rec_offs_nth_extern(offsets, pos)) {
662
ptr = trx_undo_page_report_modify_ext(
664
dict_index_get_nth_col(index, pos)
666
&& flen < REC_MAX_INDEX_COL_LEN
668
dict_table_zip_size(table),
671
/* Notify purge that it eventually has to
672
free the old externally stored field */
674
trx->update_undo->del_marks = TRUE;
676
*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
678
ptr += mach_write_compressed(ptr, flen);
681
if (flen != UNIV_SQL_NULL) {
682
if (trx_undo_left(undo_page, ptr) < flen) {
687
ut_memcpy(ptr, field, flen);
693
/*----------------------------------------*/
694
/* In the case of a delete marking, and also in the case of an update
695
where any ordering field of any index changes, store the values of all
696
columns which occur as ordering fields in any index. This info is used
697
in the purge of old versions where we use it to build and search the
698
delete marked index records, to look if we can remove them from the
699
index tree. Note that starting from 4.0.14 also externally stored
700
fields can be ordering in some index. Starting from 5.2, we no longer
701
store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
702
but we can construct the column prefix fields in the index by
703
fetching the first page of the BLOB that is pointed to by the
704
clustered index. This works also in crash recovery, because all pages
705
(including BLOBs) are recovered before anything is rolled back. */
707
if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
710
trx->update_undo->del_marks = TRUE;
712
if (trx_undo_left(undo_page, ptr) < 5) {
717
/* Reserve 2 bytes to write the number of bytes the stored
718
fields take in this undo record */
722
for (col_no = 0; col_no < dict_table_get_n_cols(table);
725
const dict_col_t* col
726
= dict_table_get_nth_col(table, col_no);
731
/* Write field number to undo log */
732
if (trx_undo_left(undo_page, ptr) < 5 + 15) {
737
pos = dict_index_get_nth_col_pos(index,
739
ptr += mach_write_compressed(ptr, pos);
741
/* Save the old value of field */
742
field = rec_get_nth_field(rec, offsets, pos,
745
if (rec_offs_nth_extern(offsets, pos)) {
746
ptr = trx_undo_page_report_modify_ext(
748
flen < REC_MAX_INDEX_COL_LEN
750
dict_table_zip_size(table),
753
ptr += mach_write_compressed(
757
if (flen != UNIV_SQL_NULL) {
758
if (trx_undo_left(undo_page, ptr)
764
ut_memcpy(ptr, field, flen);
770
mach_write_to_2(old_ptr, ptr - old_ptr);
773
/*----------------------------------------*/
774
/* Write pointers to the previous and the next undo log records */
775
if (trx_undo_left(undo_page, ptr) < 2) {
780
mach_write_to_2(ptr, first_free);
782
mach_write_to_2(undo_page + first_free, ptr - undo_page);
784
mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
787
/* Write to the REDO log about this change in the UNDO log */
789
trx_undof_page_add_undo_rec_log(undo_page, first_free,
790
ptr - undo_page, mtr);
794
/**************************************************************************
795
Reads from an undo log update record the system field values of the old
799
trx_undo_update_rec_get_sys_cols(
800
/*=============================*/
801
/* out: remaining part of undo log
802
record after reading these values */
803
byte* ptr, /* in: remaining part of undo log
804
record after reading general
806
dulint* trx_id, /* out: trx id */
807
dulint* roll_ptr, /* out: roll ptr */
808
ulint* info_bits) /* out: info bits state */
810
/* Read the state of the info bits */
811
*info_bits = mach_read_from_1(ptr);
814
/* Read the values of the system columns */
816
*trx_id = mach_dulint_read_compressed(ptr);
817
ptr += mach_dulint_get_compressed_size(*trx_id);
819
*roll_ptr = mach_dulint_read_compressed(ptr);
820
ptr += mach_dulint_get_compressed_size(*roll_ptr);
825
/**************************************************************************
826
Reads from an update undo log record the number of updated fields. */
829
trx_undo_update_rec_get_n_upd_fields(
830
/*=================================*/
831
/* out: remaining part of undo log record after
832
reading this value */
833
byte* ptr, /* in: pointer to remaining part of undo log record */
834
ulint* n) /* out: number of fields */
836
*n = mach_read_compressed(ptr);
837
ptr += mach_get_compressed_size(*n);
842
/**************************************************************************
843
Reads from an update undo log record a stored field number. */
846
trx_undo_update_rec_get_field_no(
847
/*=============================*/
848
/* out: remaining part of undo log record after
849
reading this value */
850
byte* ptr, /* in: pointer to remaining part of undo log record */
851
ulint* field_no)/* out: field number */
853
*field_no = mach_read_compressed(ptr);
854
ptr += mach_get_compressed_size(*field_no);
859
/***********************************************************************
860
Builds an update vector based on a remaining part of an undo log record. */
863
trx_undo_update_rec_get_update(
864
/*===========================*/
865
/* out: remaining part of the record,
866
NULL if an error detected, which means that
867
the record is corrupted */
868
byte* ptr, /* in: remaining part in update undo log
869
record, after reading the row reference
870
NOTE that this copy of the undo log record must
871
be preserved as long as the update vector is
872
used, as we do NOT copy the data in the
874
dict_index_t* index, /* in: clustered index */
875
ulint type, /* in: TRX_UNDO_UPD_EXIST_REC,
876
TRX_UNDO_UPD_DEL_REC, or
877
TRX_UNDO_DEL_MARK_REC; in the last case,
878
only trx id and roll ptr fields are added to
880
dulint trx_id, /* in: transaction id from this undo record */
881
dulint roll_ptr,/* in: roll pointer from this undo record */
882
ulint info_bits,/* in: info bits from this undo record */
883
trx_t* trx, /* in: transaction */
884
mem_heap_t* heap, /* in: memory heap from which the memory
885
needed is allocated */
886
upd_t** upd) /* out, own: update vector */
888
upd_field_t* upd_field;
894
ut_a(dict_index_is_clust(index));
896
if (type != TRX_UNDO_DEL_MARK_REC) {
897
ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
902
update = upd_create(n_fields + 2, heap);
904
update->info_bits = info_bits;
906
/* Store first trx id and roll ptr to update vector */
908
upd_field = upd_get_nth_field(update, n_fields);
909
buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
910
trx_write_trx_id(buf, trx_id);
912
upd_field_set_field_no(upd_field,
913
dict_index_get_sys_col_pos(index, DATA_TRX_ID),
915
dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
917
upd_field = upd_get_nth_field(update, n_fields + 1);
918
buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
919
trx_write_roll_ptr(buf, roll_ptr);
921
upd_field_set_field_no(
922
upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
924
dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
926
/* Store then the updated ordinary columns to the update vector */
928
for (i = 0; i < n_fields; i++) {
935
ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
937
if (field_no >= dict_index_get_n_fields(index)) {
939
"InnoDB: Error: trying to access"
940
" update undo rec field %lu in ",
942
dict_index_name_print(stderr, trx, index);
944
"InnoDB: but index has only %lu fields\n"
945
"InnoDB: Submit a detailed bug report"
946
" to http://bugs.mysql.com\n"
947
"InnoDB: Run also CHECK TABLE ",
948
(ulong) dict_index_get_n_fields(index));
949
ut_print_name(stderr, trx, TRUE, index->table_name);
951
"InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
952
(ulong) n_fields, (ulong) i, ptr);
956
upd_field = upd_get_nth_field(update, i);
958
upd_field_set_field_no(upd_field, field_no, index, trx);
960
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
962
upd_field->orig_len = orig_len;
964
if (len == UNIV_SQL_NULL) {
965
dfield_set_null(&upd_field->new_val);
966
} else if (len < UNIV_EXTERN_STORAGE_FIELD) {
967
dfield_set_data(&upd_field->new_val, field, len);
969
len -= UNIV_EXTERN_STORAGE_FIELD;
971
dfield_set_data(&upd_field->new_val, field, len);
972
dfield_set_ext(&upd_field->new_val);
981
/***********************************************************************
982
Builds a partial row from an update undo log record. It contains the
983
columns which occur as ordering in any index of the table. */
986
trx_undo_rec_get_partial_row(
987
/*=========================*/
988
/* out: pointer to remaining part of undo
990
byte* ptr, /* in: remaining part in update undo log
991
record of a suitable type, at the start of
992
the stored index columns;
993
NOTE that this copy of the undo log record must
994
be preserved as long as the partial row is
995
used, as we do NOT copy the data in the
997
dict_index_t* index, /* in: clustered index */
998
dtuple_t** row, /* out, own: partial row */
999
mem_heap_t* heap) /* in: memory heap from which the memory
1000
needed is allocated */
1002
const byte* end_ptr;
1009
ut_ad(dict_index_is_clust(index));
1011
row_len = dict_table_get_n_cols(index->table);
1013
*row = dtuple_create(heap, row_len);
1015
dict_table_copy_types(*row, index->table);
1017
end_ptr = ptr + mach_read_from_2(ptr);
1020
while (ptr != end_ptr) {
1024
const dict_col_t* col;
1029
ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
1031
col = dict_index_get_nth_col(index, field_no);
1032
col_no = dict_col_get_no(col);
1034
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
1036
dfield = dtuple_get_nth_field(*row, col_no);
1038
dfield_set_data(dfield, field, len);
1040
if (len != UNIV_SQL_NULL
1041
&& len >= UNIV_EXTERN_STORAGE_FIELD) {
1042
dfield_set_len(dfield,
1043
len - UNIV_EXTERN_STORAGE_FIELD);
1044
dfield_set_ext(dfield);
1045
/* If the prefix of this column is indexed,
1046
ensure that enough prefix is stored in the
1049
|| dfield_get_len(dfield)
1050
>= REC_MAX_INDEX_COL_LEN
1051
+ BTR_EXTERN_FIELD_REF_SIZE);
1058
/***************************************************************************
1059
Erases the unused undo log page end. */
1062
trx_undo_erase_page_end(
1063
/*====================*/
1064
page_t* undo_page, /* in: undo page whose end to erase */
1065
mtr_t* mtr) /* in: mtr */
1069
first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
1070
+ TRX_UNDO_PAGE_FREE);
1071
memset(undo_page + first_free, 0xff,
1072
(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
1074
mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
1077
/***************************************************************
1078
Parses a redo log record of erasing of an undo page end. */
1081
trx_undo_parse_erase_page_end(
1082
/*==========================*/
1083
/* out: end of log record or NULL */
1084
byte* ptr, /* in: buffer */
1085
byte* end_ptr __attribute__((unused)), /* in: buffer end */
1086
page_t* page, /* in: page or NULL */
1087
mtr_t* mtr) /* in: mtr or NULL */
1089
ut_ad(ptr && end_ptr);
1096
trx_undo_erase_page_end(page, mtr);
1101
/***************************************************************************
1102
Writes information to an undo log about an insert, update, or a delete marking
1103
of a clustered index record. This information is used in a rollback of the
1104
transaction and in consistent reads that must look to the history of this
1108
trx_undo_report_row_operation(
1109
/*==========================*/
1110
/* out: DB_SUCCESS or error code */
1111
ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is
1112
set, does nothing */
1113
ulint op_type, /* in: TRX_UNDO_INSERT_OP or
1114
TRX_UNDO_MODIFY_OP */
1115
que_thr_t* thr, /* in: query thread */
1116
dict_index_t* index, /* in: clustered index */
1117
const dtuple_t* clust_entry, /* in: in the case of an insert,
1118
index entry to insert into the
1119
clustered index, otherwise NULL */
1120
const upd_t* update, /* in: in the case of an update,
1121
the update vector, otherwise NULL */
1122
ulint cmpl_info, /* in: compiler info on secondary
1124
const rec_t* rec, /* in: in case of an update or delete
1125
marking, the record in the clustered
1126
index, otherwise NULL */
1127
dulint* roll_ptr) /* out: rollback pointer to the
1128
inserted undo log record,
1129
ut_dulint_zero if BTR_NO_UNDO_LOG
1130
flag was specified */
1137
ulint err = DB_SUCCESS;
1138
mem_heap_t* heap = NULL;
1139
ulint offsets_[REC_OFFS_NORMAL_SIZE];
1140
ulint* offsets = offsets_;
1141
rec_offs_init(offsets_);
1143
ut_a(dict_index_is_clust(index));
1145
if (flags & BTR_NO_UNDO_LOG_FLAG) {
1147
*roll_ptr = ut_dulint_zero;
1153
ut_ad((op_type != TRX_UNDO_INSERT_OP)
1154
|| (clust_entry && !update && !rec));
1156
trx = thr_get_trx(thr);
1159
mutex_enter(&(trx->undo_mutex));
1161
/* If the undo log is not assigned yet, assign one */
1163
if (op_type == TRX_UNDO_INSERT_OP) {
1165
if (trx->insert_undo == NULL) {
1167
err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
1170
undo = trx->insert_undo;
1172
if (UNIV_UNLIKELY(!undo)) {
1173
/* Did not succeed */
1174
mutex_exit(&(trx->undo_mutex));
1179
ut_ad(op_type == TRX_UNDO_MODIFY_OP);
1181
if (trx->update_undo == NULL) {
1183
err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
1187
undo = trx->update_undo;
1189
if (UNIV_UNLIKELY(!undo)) {
1190
/* Did not succeed */
1191
mutex_exit(&(trx->undo_mutex));
1195
offsets = rec_get_offsets(rec, index, offsets,
1196
ULINT_UNDEFINED, &heap);
1199
page_no = undo->last_page_no;
1204
buf_block_t* undo_block;
1208
undo_block = buf_page_get_gen(undo->space, undo->zip_size,
1209
page_no, RW_X_LATCH,
1210
undo->guess_block, BUF_GET,
1211
__FILE__, __LINE__, &mtr);
1212
#ifdef UNIV_SYNC_DEBUG
1213
buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
1214
#endif /* UNIV_SYNC_DEBUG */
1215
undo_page = buf_block_get_frame(undo_block);
1217
if (op_type == TRX_UNDO_INSERT_OP) {
1218
offset = trx_undo_page_report_insert(
1219
undo_page, trx, index, clust_entry, &mtr);
1221
offset = trx_undo_page_report_modify(
1222
undo_page, trx, index, rec, offsets, update,
1226
if (UNIV_UNLIKELY(offset == 0)) {
1227
/* The record did not fit on the page. We erase the
1228
end segment of the undo log page and write a log
1229
record of it: this is to ensure that in the debug
1230
version the replicate page constructed using the log
1231
records stays identical to the original page */
1233
trx_undo_erase_page_end(undo_page, &mtr);
1240
undo->empty = FALSE;
1241
undo->top_page_no = page_no;
1242
undo->top_offset = offset;
1243
undo->top_undo_no = trx->undo_no;
1244
undo->guess_block = undo_block;
1246
UT_DULINT_INC(trx->undo_no);
1248
mutex_exit(&trx->undo_mutex);
1250
*roll_ptr = trx_undo_build_roll_ptr(
1251
op_type == TRX_UNDO_INSERT_OP,
1252
rseg->id, page_no, offset);
1253
if (UNIV_LIKELY_NULL(heap)) {
1254
mem_heap_free(heap);
1259
ut_ad(page_no == undo->last_page_no);
1261
/* We have to extend the undo log by one page */
1265
/* When we add a page to an undo log, this is analogous to
1266
a pessimistic insert in a B-tree, and we must reserve the
1267
counterpart of the tree latch, which is the rseg mutex. */
1269
mutex_enter(&(rseg->mutex));
1271
page_no = trx_undo_add_page(trx, undo, &mtr);
1273
mutex_exit(&(rseg->mutex));
1275
if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
1276
/* Did not succeed: out of space */
1278
mutex_exit(&(trx->undo_mutex));
1280
if (UNIV_LIKELY_NULL(heap)) {
1281
mem_heap_free(heap);
1283
return(DB_OUT_OF_FILE_SPACE);
1288
/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
1290
/**********************************************************************
1291
Copies an undo record to heap. This function can be called if we know that
1292
the undo log record exists. */
1295
trx_undo_get_undo_rec_low(
1296
/*======================*/
1297
/* out, own: copy of the record */
1298
dulint roll_ptr, /* in: roll pointer to record */
1299
mem_heap_t* heap) /* in: memory heap where copied */
1301
trx_undo_rec_t* undo_rec;
1310
trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
1312
rseg = trx_rseg_get_on_id(rseg_id);
1316
undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
1319
undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
1326
/**********************************************************************
1327
Copies an undo record to heap. */
1330
trx_undo_get_undo_rec(
1331
/*==================*/
1332
/* out: DB_SUCCESS, or
1333
DB_MISSING_HISTORY if the undo log
1334
has been truncated and we cannot
1335
fetch the old version; NOTE: the
1336
caller must have latches on the
1337
clustered index page and purge_view */
1338
dulint roll_ptr, /* in: roll pointer to record */
1339
dulint trx_id, /* in: id of the trx that generated
1340
the roll pointer: it points to an
1341
undo log of this transaction */
1342
trx_undo_rec_t** undo_rec, /* out, own: copy of the record */
1343
mem_heap_t* heap) /* in: memory heap where copied */
1345
#ifdef UNIV_SYNC_DEBUG
1346
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
1347
#endif /* UNIV_SYNC_DEBUG */
1349
if (!trx_purge_update_undo_must_exist(trx_id)) {
1351
/* It may be that the necessary undo log has already been
1354
return(DB_MISSING_HISTORY);
1357
*undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
1362
/***********************************************************************
1363
Build a previous version of a clustered index record. This function checks
1364
that the caller has a latch on the index page of the clustered index record
1365
and an s-latch on the purge_view. This guarantees that the stack of versions
1366
is locked all the way down to the purge_view. */
1369
trx_undo_prev_version_build(
1370
/*========================*/
1371
/* out: DB_SUCCESS, or DB_MISSING_HISTORY if
1372
the previous version is not >= purge_view,
1373
which means that it may have been removed,
1374
DB_ERROR if corrupted record */
1375
const rec_t* index_rec,/* in: clustered index record in the
1377
mtr_t* index_mtr __attribute__((unused)),
1378
/* in: mtr which contains the latch to
1379
index_rec page and purge_view */
1380
const rec_t* rec, /* in: version of a clustered index record */
1381
dict_index_t* index, /* in: clustered index */
1382
ulint* offsets,/* in: rec_get_offsets(rec, index) */
1383
mem_heap_t* heap, /* in: memory heap from which the memory
1384
needed is allocated */
1385
rec_t** old_vers)/* out, own: previous version, or NULL if
1386
rec is the first inserted version, or if
1387
history data has been deleted (an error),
1388
or if the purge COULD have removed the version
1389
though it has not yet done so */
1391
trx_undo_rec_t* undo_rec = NULL;
1399
dulint old_roll_ptr;
1407
#ifdef UNIV_SYNC_DEBUG
1408
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
1409
#endif /* UNIV_SYNC_DEBUG */
1410
ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
1411
|| mtr_memo_contains_page(index_mtr, index_rec,
1412
MTR_MEMO_PAGE_X_FIX));
1413
ut_ad(rec_offs_validate(rec, index, offsets));
1415
if (!dict_index_is_clust(index)) {
1416
fprintf(stderr, "InnoDB: Error: trying to access"
1417
" update undo rec for non-clustered index %s\n"
1418
"InnoDB: Submit a detailed bug report to"
1419
" http://bugs.mysql.com\n"
1420
"InnoDB: index record ", index->name);
1421
rec_print(stderr, index_rec, index);
1423
"InnoDB: record version ", stderr);
1424
rec_print_new(stderr, rec, offsets);
1429
roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
1430
old_roll_ptr = roll_ptr;
1434
if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
1436
/* The record rec is the first inserted version */
1441
rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
1443
err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
1445
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1446
/* The undo record may already have been purged.
1447
This should never happen in InnoDB. */
1452
ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
1453
&dummy_extern, &undo_no, &table_id);
1455
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
1458
/* (a) If a clustered index record version is such that the
1459
trx id stamp in it is bigger than purge_sys->view, then the
1460
BLOBs in that version are known to exist (the purge has not
1461
progressed that far);
1463
(b) if the version is the first version such that trx id in it
1464
is less than purge_sys->view, and it is not delete-marked,
1465
then the BLOBs in that version are known to exist (the purge
1466
cannot have purged the BLOBs referenced by that version
1469
This function does not fetch any BLOBs. The callers might, by
1470
possibly invoking row_ext_create() via row_build(). However,
1471
they should have all needed information in the *old_vers
1472
returned by this function. This is because *old_vers is based
1473
on the transaction undo log records. The function
1474
trx_undo_page_fetch_ext() will write BLOB prefixes to the
1475
transaction undo log that are at least as long as the longest
1476
possible column prefix in a secondary index. Thus, secondary
1477
index entries for *old_vers can be constructed without
1478
dereferencing any BLOB pointers. */
1480
ptr = trx_undo_rec_skip_row_ref(ptr, index);
1482
ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
1483
roll_ptr, info_bits,
1484
NULL, heap, &update);
1486
if (ut_dulint_cmp(table_id, index->table->id) != 0) {
1490
"InnoDB: Error: trying to access update undo rec"
1492
"InnoDB: but the table id in the"
1493
" undo record is wrong\n"
1494
"InnoDB: Submit a detailed bug report"
1495
" to http://bugs.mysql.com\n"
1496
"InnoDB: Run also CHECK TABLE %s\n",
1497
index->table_name, index->table_name);
1501
/* The record was corrupted, return an error; these printfs
1502
should catch an elusive bug in row_vers_old_has_index_entry */
1505
"InnoDB: table %s, index %s, n_uniq %lu\n"
1506
"InnoDB: undo rec address %p, type %lu cmpl_info %lu\n"
1507
"InnoDB: undo rec table id %lu %lu,"
1508
" index table id %lu %lu\n"
1509
"InnoDB: dump of 150 bytes in undo rec: ",
1510
index->table_name, index->name,
1511
(ulong) dict_index_get_n_unique(index),
1512
undo_rec, (ulong) type, (ulong) cmpl_info,
1513
(ulong) ut_dulint_get_high(table_id),
1514
(ulong) ut_dulint_get_low(table_id),
1515
(ulong) ut_dulint_get_high(index->table->id),
1516
(ulong) ut_dulint_get_low(index->table->id));
1517
ut_print_buf(stderr, undo_rec, 150);
1519
"InnoDB: index record ", stderr);
1520
rec_print(stderr, index_rec, index);
1522
"InnoDB: record version ", stderr);
1523
rec_print_new(stderr, rec, offsets);
1524
fprintf(stderr, "\n"
1525
"InnoDB: Record trx id " TRX_ID_FMT
1526
", update rec trx id " TRX_ID_FMT "\n"
1527
"InnoDB: Roll ptr in rec %lu %lu, in update rec"
1529
TRX_ID_PREP_PRINTF(rec_trx_id),
1530
TRX_ID_PREP_PRINTF(trx_id),
1531
(ulong) ut_dulint_get_high(old_roll_ptr),
1532
(ulong) ut_dulint_get_low(old_roll_ptr),
1533
(ulong) ut_dulint_get_high(roll_ptr),
1534
(ulong) ut_dulint_get_low(roll_ptr));
1536
trx_purge_sys_print();
1540
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
1543
/* We have to set the appropriate extern storage bits in the
1544
old version of the record: the extern bits in rec for those
1545
fields that update does NOT update, as well as the the bits for
1546
those fields that update updates to become externally stored
1547
fields. Store the info: */
1549
entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index,
1550
offsets, &n_ext, heap);
1551
n_ext += btr_push_update_extern_fields(entry, update, heap);
1552
/* The page containing the clustered index record
1553
corresponding to entry is latched in mtr. Thus the
1554
following call is safe. */
1555
row_upd_index_replace_new_col_vals(entry, index, update, heap);
1557
buf = mem_heap_alloc(heap, rec_get_converted_size(index, entry,
1560
*old_vers = rec_convert_dtuple_to_rec(buf, index,
1563
buf = mem_heap_alloc(heap, rec_offs_size(offsets));
1564
*old_vers = rec_copy(buf, rec, offsets);
1565
rec_offs_make_valid(*old_vers, index, offsets);
1566
row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);