~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to plugin/innobase/btr/btr0cur.cc

  • Committer: Monty Taylor
  • Date: 2011-02-13 17:26:39 UTC
  • mfrom: (2157.2.2 give-in-to-pkg-config)
  • mto: This revision was merged to the branch mainline in revision 2166.
  • Revision ID: mordred@inaugust.com-20110213172639-nhy7i72sfhoq13ms
Merged in pkg-config fixes.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
1
/*****************************************************************************
2
2
 
3
 
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
4
 
Copyright (c) 2008, Google Inc.
 
3
Copyright (C) 1994, 2010, Innobase Oy. All Rights Reserved.
 
4
Copyright (C) 2008, Google Inc.
5
5
 
6
6
Portions of this file contain modifications contributed and copyrighted by
7
7
Google, Inc. Those modifications are gratefully acknowledged and are described
57
57
#include "buf0lru.h"
58
58
#include "btr0btr.h"
59
59
#include "btr0sea.h"
 
60
#include "row0purge.h"
 
61
#include "row0upd.h"
60
62
#include "trx0rec.h"
61
63
#include "trx0roll.h" /* trx_is_recv() */
62
64
#include "que0que.h"
66
68
#include "lock0lock.h"
67
69
#include "zlib.h"
68
70
 
 
71
/** Buffered B-tree operation types, introduced as part of delete buffering. */
 
72
typedef enum btr_op_enum {
 
73
        BTR_NO_OP = 0,                  /*!< Not buffered */
 
74
        BTR_INSERT_OP,                  /*!< Insert, do not ignore UNIQUE */
 
75
        BTR_INSERT_IGNORE_UNIQUE_OP,    /*!< Insert, ignoring UNIQUE */
 
76
        BTR_DELETE_OP,                  /*!< Purge a delete-marked record */
 
77
        BTR_DELMARK_OP                  /*!< Mark a record for deletion */
 
78
} btr_op_t;
 
79
 
69
80
#ifdef UNIV_DEBUG
70
81
/** If the following is set to TRUE, this module prints a lot of
71
82
trace information of individual record operations */
106
117
/** A BLOB field reference full of zero, for use in assertions and tests.
107
118
Initially, BLOB field references are set to zero, in
108
119
dtuple_convert_big_rec(). */
109
 
UNIV_INTERN const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE]= {0};
 
120
const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE]= {0};
110
121
 
111
122
#ifndef UNIV_HOTBACKUP
112
123
/*******************************************************************//**
328
339
                                Inserts should always be made using
329
340
                                PAGE_CUR_LE to search the position! */
330
341
        ulint           latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
331
 
                                BTR_INSERT and BTR_ESTIMATE;
 
342
                                at most one of BTR_INSERT, BTR_DELETE_MARK,
 
343
                                BTR_DELETE, or BTR_ESTIMATE;
332
344
                                cursor->left_block is used to store a pointer
333
345
                                to the left neighbor page, in the cases
334
346
                                BTR_SEARCH_PREV and BTR_MODIFY_PREV;
346
358
        ulint           line,   /*!< in: line where called */
347
359
        mtr_t*          mtr)    /*!< in: mtr */
348
360
{
349
 
        page_cur_t*     page_cursor;
350
361
        page_t*         page;
 
362
        buf_block_t*    block;
 
363
        ulint           space;
351
364
        buf_block_t*    guess;
352
 
        rec_t*          node_ptr;
 
365
        ulint           height;
353
366
        ulint           page_no;
354
 
        ulint           space;
355
367
        ulint           up_match;
356
368
        ulint           up_bytes;
357
369
        ulint           low_match;
358
370
        ulint           low_bytes;
359
 
        ulint           height;
360
371
        ulint           savepoint;
 
372
        ulint           rw_latch;
361
373
        ulint           page_mode;
362
 
        ulint           insert_planned;
 
374
        ulint           buf_mode;
363
375
        ulint           estimate;
364
 
        ulint           ignore_sec_unique;
 
376
        ulint           zip_size;
 
377
        page_cur_t*     page_cursor;
 
378
        btr_op_t        btr_op;
365
379
        ulint           root_height = 0; /* remove warning */
 
380
 
366
381
#ifdef BTR_CUR_ADAPT
367
382
        btr_search_t*   info;
368
383
#endif
382
397
        cursor->up_match = ULINT_UNDEFINED;
383
398
        cursor->low_match = ULINT_UNDEFINED;
384
399
#endif
385
 
        insert_planned = latch_mode & BTR_INSERT;
 
400
 
 
401
        /* These flags are mutually exclusive, they are lumped together
 
402
        with the latch mode for historical reasons. It's possible for
 
403
        none of the flags to be set. */
 
404
        switch (UNIV_EXPECT(latch_mode
 
405
                            & (BTR_INSERT | BTR_DELETE | BTR_DELETE_MARK),
 
406
                            0)) {
 
407
        case 0:
 
408
                btr_op = BTR_NO_OP;
 
409
                break;
 
410
        case BTR_INSERT:
 
411
                btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE)
 
412
                        ? BTR_INSERT_IGNORE_UNIQUE_OP
 
413
                        : BTR_INSERT_OP;
 
414
                break;
 
415
        case BTR_DELETE:
 
416
                btr_op = BTR_DELETE_OP;
 
417
                ut_a(cursor->purge_node);
 
418
                break;
 
419
        case BTR_DELETE_MARK:
 
420
                btr_op = BTR_DELMARK_OP;
 
421
                break;
 
422
        default:
 
423
                /* only one of BTR_INSERT, BTR_DELETE, BTR_DELETE_MARK
 
424
                should be specified at a time */
 
425
                ut_error;
 
426
        }
 
427
 
 
428
        /* Operations on the insert buffer tree cannot be buffered. */
 
429
        ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index));
 
430
        /* Operations on the clustered index cannot be buffered. */
 
431
        ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
 
432
 
386
433
        estimate = latch_mode & BTR_ESTIMATE;
387
 
        ignore_sec_unique = latch_mode & BTR_IGNORE_SEC_UNIQUE;
388
 
        latch_mode = latch_mode & ~(BTR_INSERT | BTR_ESTIMATE
389
 
                                    | BTR_IGNORE_SEC_UNIQUE);
390
434
 
391
 
        ut_ad(!insert_planned || (mode == PAGE_CUR_LE));
 
435
        /* Turn the flags unrelated to the latch mode off. */
 
436
        latch_mode &= ~(BTR_INSERT
 
437
                        | BTR_DELETE_MARK
 
438
                        | BTR_DELETE
 
439
                        | BTR_ESTIMATE
 
440
                        | BTR_IGNORE_SEC_UNIQUE);
392
441
 
393
442
        cursor->flag = BTR_CUR_BINARY;
394
443
        cursor->index = index;
395
444
 
 
445
        cursor->ibuf_cnt = ULINT_UNDEFINED;
 
446
 
396
447
#ifndef BTR_CUR_ADAPT
397
448
        guess = NULL;
398
449
#else
406
457
        info->n_searches++;
407
458
#endif
408
459
        if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
409
 
            && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
 
460
            && latch_mode <= BTR_MODIFY_LEAF
 
461
            && info->last_hash_succ
410
462
            && !estimate
411
463
#ifdef PAGE_CUR_LE_OR_EXTENDS
412
464
            && mode != PAGE_CUR_LE_OR_EXTENDS
495
547
 
496
548
        /* Loop and search until we arrive at the desired level */
497
549
 
498
 
        for (;;) {
499
 
                ulint           zip_size;
500
 
                buf_block_t*    block;
501
 
                ulint           rw_latch;
502
 
                ulint           buf_mode;
503
 
 
504
 
                zip_size = dict_table_zip_size(index->table);
505
 
                rw_latch = RW_NO_LATCH;
506
 
                buf_mode = BUF_GET;
507
 
 
508
 
                if (height == 0 && latch_mode <= BTR_MODIFY_LEAF) {
509
 
 
510
 
                        rw_latch = latch_mode;
511
 
 
512
 
                        if (insert_planned
513
 
                            && ibuf_should_try(index, ignore_sec_unique)) {
514
 
 
515
 
                                /* Try insert to the insert buffer if the
516
 
                                page is not in the buffer pool */
517
 
 
518
 
                                buf_mode = BUF_GET_IF_IN_POOL;
519
 
                        }
 
550
search_loop:
 
551
        buf_mode = BUF_GET;
 
552
        rw_latch = RW_NO_LATCH;
 
553
 
 
554
        if (height != 0) {
 
555
                /* We are about to fetch the root or a non-leaf page. */
 
556
        } else if (latch_mode <= BTR_MODIFY_LEAF) {
 
557
                rw_latch = latch_mode;
 
558
 
 
559
                if (btr_op != BTR_NO_OP
 
560
                    && ibuf_should_try(index, btr_op != BTR_INSERT_OP)) {
 
561
 
 
562
                        /* Try to buffer the operation if the leaf
 
563
                        page is not in the buffer pool. */
 
564
 
 
565
                        buf_mode = btr_op == BTR_DELETE_OP
 
566
                                ? BUF_GET_IF_IN_POOL_OR_WATCH
 
567
                                : BUF_GET_IF_IN_POOL;
520
568
                }
 
569
        }
 
570
 
 
571
        zip_size = dict_table_zip_size(index->table);
521
572
 
522
573
retry_page_get:
523
 
                block = buf_page_get_gen(space, zip_size, page_no,
524
 
                                         rw_latch, guess, buf_mode,
525
 
                                         file, line, mtr);
526
 
                if (block == NULL) {
527
 
                        /* This must be a search to perform an insert;
528
 
                        try insert to the insert buffer */
529
 
 
530
 
                        ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
531
 
                        ut_ad(insert_planned);
532
 
                        ut_ad(cursor->thr);
533
 
 
534
 
                        if (ibuf_insert(tuple, index, space, zip_size,
 
574
        block = buf_page_get_gen(
 
575
                space, zip_size, page_no, rw_latch, guess, buf_mode,
 
576
                file, line, mtr);
 
577
 
 
578
        if (block == NULL) {
 
579
                /* This must be a search to perform an insert/delete
 
580
                mark/ delete; try using the insert/delete buffer */
 
581
 
 
582
                ut_ad(height == 0);
 
583
                ut_ad(cursor->thr);
 
584
 
 
585
                switch (btr_op) {
 
586
                case BTR_INSERT_OP:
 
587
                case BTR_INSERT_IGNORE_UNIQUE_OP:
 
588
                        ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
 
589
 
 
590
                        if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
 
591
                                        space, zip_size, page_no,
 
592
                                        cursor->thr)) {
 
593
 
 
594
                                cursor->flag = BTR_CUR_INSERT_TO_IBUF;
 
595
 
 
596
                                goto func_exit;
 
597
                        }
 
598
                        break;
 
599
 
 
600
                case BTR_DELMARK_OP:
 
601
                        ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
 
602
 
 
603
                        if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
 
604
                                        index, space, zip_size,
535
605
                                        page_no, cursor->thr)) {
536
 
                                /* Insertion to the insert buffer succeeded */
537
 
                                cursor->flag = BTR_CUR_INSERT_TO_IBUF;
538
 
                                if (UNIV_LIKELY_NULL(heap)) {
539
 
                                        mem_heap_free(heap);
540
 
                                }
 
606
 
 
607
                                cursor->flag = BTR_CUR_DEL_MARK_IBUF;
 
608
 
541
609
                                goto func_exit;
542
610
                        }
543
611
 
544
 
                        /* Insert to the insert buffer did not succeed:
545
 
                        retry page get */
 
612
                        break;
 
613
 
 
614
                case BTR_DELETE_OP:
 
615
                        ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
 
616
 
 
617
                        if (!row_purge_poss_sec(cursor->purge_node,
 
618
                                                index, tuple)) {
 
619
 
 
620
                                /* The record cannot be purged yet. */
 
621
                                cursor->flag = BTR_CUR_DELETE_REF;
 
622
                        } else if (ibuf_insert(IBUF_OP_DELETE, tuple,
 
623
                                               index, space, zip_size,
 
624
                                               page_no,
 
625
                                               cursor->thr)) {
 
626
 
 
627
                                /* The purge was buffered. */
 
628
                                cursor->flag = BTR_CUR_DELETE_IBUF;
 
629
                        } else {
 
630
                                /* The purge could not be buffered. */
 
631
                                buf_pool_watch_unset(space, page_no);
 
632
                                break;
 
633
                        }
 
634
 
 
635
                        buf_pool_watch_unset(space, page_no);
 
636
                        goto func_exit;
 
637
 
 
638
                default:
 
639
                        ut_error;
 
640
                }
 
641
 
 
642
                /* Insert to the insert/delete buffer did not succeed, we
 
643
                must read the page from disk. */
 
644
 
 
645
                buf_mode = BUF_GET;
 
646
 
 
647
                goto retry_page_get;
 
648
        }
 
649
 
 
650
        block->check_index_page_at_flush = TRUE;
 
651
        page = buf_block_get_frame(block);
 
652
 
 
653
        if (rw_latch != RW_NO_LATCH) {
 
654
#ifdef UNIV_ZIP_DEBUG
 
655
                const page_zip_des_t*   page_zip
 
656
                        = buf_block_get_page_zip(block);
 
657
                ut_a(!page_zip || page_zip_validate(page_zip, page));
 
658
#endif /* UNIV_ZIP_DEBUG */
 
659
 
 
660
                buf_block_dbg_add_level(block, SYNC_TREE_NODE);
 
661
        }
 
662
 
 
663
        ut_ad(index->id == btr_page_get_index_id(page));
 
664
 
 
665
        if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
 
666
                /* We are in the root node */
 
667
 
 
668
                height = btr_page_get_level(page, mtr);
 
669
                root_height = height;
 
670
                cursor->tree_height = root_height + 1;
 
671
 
 
672
#ifdef BTR_CUR_ADAPT
 
673
                if (block != guess) {
 
674
                        info->root_guess = block;
 
675
                }
 
676
#endif
 
677
        }
 
678
 
 
679
        if (height == 0) {
 
680
                if (rw_latch == RW_NO_LATCH) {
 
681
 
 
682
                        btr_cur_latch_leaves(
 
683
                                page, space, zip_size, page_no, latch_mode,
 
684
                                cursor, mtr);
 
685
                }
 
686
 
 
687
                if (latch_mode != BTR_MODIFY_TREE
 
688
                    && latch_mode != BTR_CONT_MODIFY_TREE) {
 
689
 
 
690
                        /* Release the tree s-latch */
 
691
 
 
692
                        mtr_release_s_latch_at_savepoint(
 
693
                                mtr, savepoint, dict_index_get_lock(index));
 
694
                }
 
695
 
 
696
                page_mode = mode;
 
697
        }
 
698
 
 
699
        page_cur_search_with_match(
 
700
                block, index, tuple, page_mode, &up_match, &up_bytes,
 
701
                &low_match, &low_bytes, page_cursor);
 
702
 
 
703
        if (estimate) {
 
704
                btr_cur_add_path_info(cursor, height, root_height);
 
705
        }
 
706
 
 
707
        /* If this is the desired level, leave the loop */
 
708
 
 
709
        ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor),
 
710
                                           mtr));
 
711
 
 
712
        if (level != height) {
 
713
 
 
714
                const rec_t*    node_ptr;
 
715
                ut_ad(height > 0);
 
716
 
 
717
                height--;
 
718
                guess = NULL;
 
719
 
 
720
                node_ptr = page_cur_get_rec(page_cursor);
 
721
 
 
722
                offsets = rec_get_offsets(
 
723
                        node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
 
724
 
 
725
                /* Go to the child node */
 
726
                page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
 
727
 
 
728
                if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
 
729
                        /* We're doing a search on an ibuf tree and we're one
 
730
                        level above the leaf page. */
 
731
 
 
732
                        ulint   is_min_rec;
 
733
 
 
734
                        ut_ad(level == 0);
 
735
 
 
736
                        is_min_rec = rec_get_info_bits(node_ptr, 0)
 
737
                                & REC_INFO_MIN_REC_FLAG;
 
738
 
 
739
                        if (!is_min_rec) {
 
740
                                cursor->ibuf_cnt
 
741
                                        = ibuf_rec_get_counter(node_ptr);
 
742
 
 
743
                                ut_a(cursor->ibuf_cnt <= 0xFFFF
 
744
                                     || cursor->ibuf_cnt == ULINT_UNDEFINED);
 
745
                        }
546
746
 
547
747
                        buf_mode = BUF_GET;
548
 
 
 
748
                        rw_latch = RW_NO_LATCH;
549
749
                        goto retry_page_get;
550
750
                }
551
751
 
552
 
                page = buf_block_get_frame(block);
553
 
 
554
 
                block->check_index_page_at_flush = TRUE;
555
 
 
556
 
                if (rw_latch != RW_NO_LATCH) {
557
 
#ifdef UNIV_ZIP_DEBUG
558
 
                        const page_zip_des_t*   page_zip
559
 
                                = buf_block_get_page_zip(block);
560
 
                        ut_a(!page_zip || page_zip_validate(page_zip, page));
561
 
#endif /* UNIV_ZIP_DEBUG */
562
 
 
563
 
                        buf_block_dbg_add_level(block, SYNC_TREE_NODE);
564
 
                }
565
 
 
566
 
                ut_ad(0 == ut_dulint_cmp(index->id,
567
 
                                         btr_page_get_index_id(page)));
568
 
 
569
 
                if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
570
 
                        /* We are in the root node */
571
 
 
572
 
                        height = btr_page_get_level(page, mtr);
573
 
                        root_height = height;
574
 
                        cursor->tree_height = root_height + 1;
575
 
#ifdef BTR_CUR_ADAPT
576
 
                        if (block != guess) {
577
 
                                info->root_guess = block;
578
 
                        }
579
 
#endif
580
 
                }
581
 
 
582
 
                if (height == 0) {
583
 
                        if (rw_latch == RW_NO_LATCH) {
584
 
 
585
 
                                btr_cur_latch_leaves(page, space, zip_size,
586
 
                                                     page_no, latch_mode,
587
 
                                                     cursor, mtr);
588
 
                        }
589
 
 
590
 
                        if ((latch_mode != BTR_MODIFY_TREE)
591
 
                            && (latch_mode != BTR_CONT_MODIFY_TREE)) {
592
 
 
593
 
                                /* Release the tree s-latch */
594
 
 
595
 
                                mtr_release_s_latch_at_savepoint(
596
 
                                        mtr, savepoint,
597
 
                                        dict_index_get_lock(index));
598
 
                        }
599
 
 
600
 
                        page_mode = mode;
601
 
                }
602
 
 
603
 
                page_cur_search_with_match(block, index, tuple, page_mode,
604
 
                                           &up_match, &up_bytes,
605
 
                                           &low_match, &low_bytes,
606
 
                                           page_cursor);
607
 
 
608
 
                if (estimate) {
609
 
                        btr_cur_add_path_info(cursor, height, root_height);
610
 
                }
611
 
 
612
 
                /* If this is the desired level, leave the loop */
613
 
 
614
 
                ut_ad(height == btr_page_get_level(
615
 
                              page_cur_get_page(page_cursor), mtr));
616
 
 
617
 
                if (level == height) {
618
 
 
619
 
                        if (level > 0) {
620
 
                                /* x-latch the page */
621
 
                                page = btr_page_get(space, zip_size,
622
 
                                                    page_no, RW_X_LATCH, mtr);
623
 
                                ut_a((ibool)!!page_is_comp(page)
624
 
                                     == dict_table_is_comp(index->table));
625
 
                        }
626
 
 
627
 
                        break;
628
 
                }
629
 
 
630
 
                ut_ad(height > 0);
631
 
 
632
 
                height--;
633
 
 
634
 
                guess = NULL;
635
 
 
636
 
                node_ptr = page_cur_get_rec(page_cursor);
637
 
                offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
638
 
                                          ULINT_UNDEFINED, &heap);
639
 
                /* Go to the child node */
640
 
                page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
641
 
        }
642
 
 
643
 
        if (UNIV_LIKELY_NULL(heap)) {
644
 
                mem_heap_free(heap);
645
 
        }
646
 
 
647
 
        if (level == 0) {
 
752
                goto search_loop;
 
753
        }
 
754
 
 
755
        if (level != 0) {
 
756
                /* x-latch the page */
 
757
                page = btr_page_get(
 
758
                        space, zip_size, page_no, RW_X_LATCH, mtr);
 
759
 
 
760
                ut_a((ibool)!!page_is_comp(page)
 
761
                     == dict_table_is_comp(index->table));
 
762
        } else {
648
763
                cursor->low_match = low_match;
649
764
                cursor->low_bytes = low_bytes;
650
765
                cursor->up_match = up_match;
669
784
        }
670
785
 
671
786
func_exit:
 
787
 
 
788
        if (UNIV_LIKELY_NULL(heap)) {
 
789
                mem_heap_free(heap);
 
790
        }
 
791
 
672
792
        if (has_search_latch) {
673
793
 
674
794
                rw_lock_s_lock(&btr_search_latch);
734
854
                                         RW_NO_LATCH, NULL, BUF_GET,
735
855
                                         file, line, mtr);
736
856
                page = buf_block_get_frame(block);
737
 
                ut_ad(0 == ut_dulint_cmp(index->id,
738
 
                                         btr_page_get_index_id(page)));
 
857
                ut_ad(index->id == btr_page_get_index_id(page));
739
858
 
740
859
                block->check_index_page_at_flush = TRUE;
741
860
 
855
974
                                         RW_NO_LATCH, NULL, BUF_GET,
856
975
                                         file, line, mtr);
857
976
                page = buf_block_get_frame(block);
858
 
                ut_ad(0 == ut_dulint_cmp(index->id,
859
 
                                         btr_page_get_index_id(page)));
 
977
                ut_ad(index->id == btr_page_get_index_id(page));
860
978
 
861
979
                if (height == ULINT_UNDEFINED) {
862
980
                        /* We are in the root node */
953
1071
                                not zero, the parameters index and thr
954
1072
                                should be specified */
955
1073
        btr_cur_t*      cursor, /*!< in: cursor on page after which to insert */
956
 
        const dtuple_t* entry,  /*!< in: entry to insert */
 
1074
        dtuple_t*       entry,  /*!< in/out: entry to insert */
957
1075
        que_thr_t*      thr,    /*!< in: query thread or NULL */
958
1076
        mtr_t*          mtr,    /*!< in/out: mini-transaction */
959
1077
        ibool*          inherit)/*!< out: TRUE if the inserted new record maybe
1015
1133
        const char*             op)     /*!< in: operation */
1016
1134
{
1017
1135
        fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ",
1018
 
                TRX_ID_PREP_PRINTF(trx->id));
 
1136
                (ullint) trx->id);
1019
1137
        fputs(op, stderr);
1020
1138
        dict_index_name_print(stderr, trx, index);
1021
1139
        putc('\n', stderr);
1626
1744
See if there is enough place in the page modification log to log
1627
1745
an update-in-place.
1628
1746
@return TRUE if enough place */
1629
 
static
 
1747
UNIV_INTERN
1630
1748
ibool
1631
1749
btr_cur_update_alloc_zip(
1632
1750
/*=====================*/
1706
1824
        page_zip_des_t* page_zip;
1707
1825
        ulint           err;
1708
1826
        rec_t*          rec;
1709
 
        roll_ptr_t      roll_ptr        = ut_dulint_zero;
 
1827
        roll_ptr_t      roll_ptr        = 0;
1710
1828
        trx_t*          trx;
1711
1829
        ulint           was_delete_marked;
1712
1830
        mem_heap_t*     heap            = NULL;
1836
1954
        page_t*         page;
1837
1955
        page_zip_des_t* page_zip;
1838
1956
        rec_t*          rec;
1839
 
        rec_t*          orig_rec;
1840
1957
        ulint           max_size;
1841
1958
        ulint           new_rec_size;
1842
1959
        ulint           old_rec_size;
1850
1967
 
1851
1968
        block = btr_cur_get_block(cursor);
1852
1969
        page = buf_block_get_frame(block);
1853
 
        orig_rec = rec = btr_cur_get_rec(cursor);
 
1970
        rec = btr_cur_get_rec(cursor);
1854
1971
        index = cursor->index;
1855
1972
        ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
1856
1973
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1959
2076
        err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
1960
2077
                                        thr, mtr, &roll_ptr);
1961
2078
        if (err != DB_SUCCESS) {
1962
 
err_exit:
1963
 
                mem_heap_free(heap);
1964
 
                return(err);
 
2079
 
 
2080
                goto err_exit;
1965
2081
        }
1966
2082
 
1967
2083
        /* Ok, we may do the replacement. Store on the page infimum the
2007
2123
 
2008
2124
        page_cur_move_to_next(page_cursor);
2009
2125
 
 
2126
        err = DB_SUCCESS;
 
2127
err_exit:
2010
2128
        mem_heap_free(heap);
2011
 
 
2012
 
        return(DB_SUCCESS);
 
2129
        return(err);
2013
2130
}
2014
2131
 
2015
2132
/*************************************************************//**
2722
2839
}
2723
2840
 
2724
2841
/***********************************************************//**
2725
 
Clear a secondary index record's delete mark.  This function is only
2726
 
used by the insert buffer insert merge mechanism. */
 
2842
Sets a secondary index record's delete mark to the given value. This
 
2843
function is only used by the insert buffer merge mechanism. */
2727
2844
UNIV_INTERN
2728
2845
void
2729
 
btr_cur_del_unmark_for_ibuf(
2730
 
/*========================*/
2731
 
        rec_t*          rec,            /*!< in/out: record to delete unmark */
 
2846
btr_cur_set_deleted_flag_for_ibuf(
 
2847
/*==============================*/
 
2848
        rec_t*          rec,            /*!< in/out: record */
2732
2849
        page_zip_des_t* page_zip,       /*!< in/out: compressed page
2733
2850
                                        corresponding to rec, or NULL
2734
2851
                                        when the tablespace is
2735
2852
                                        uncompressed */
 
2853
        ibool           val,            /*!< in: value to set */
2736
2854
        mtr_t*          mtr)            /*!< in: mtr */
2737
2855
{
2738
2856
        /* We do not need to reserve btr_search_latch, as the page has just
2739
2857
        been read to the buffer pool and there cannot be a hash index to it. */
2740
2858
 
2741
 
        btr_rec_set_deleted_flag(rec, page_zip, FALSE);
 
2859
        btr_rec_set_deleted_flag(rec, page_zip, val);
2742
2860
 
2743
 
        btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr);
 
2861
        btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
2744
2862
}
2745
2863
 
2746
2864
/*==================== B-TREE RECORD REMOVE =========================*/
3034
3152
{
3035
3153
        btr_path_t*     slot;
3036
3154
        rec_t*          rec;
 
3155
        page_t*         page;
3037
3156
 
3038
3157
        ut_a(cursor->path_arr);
3039
3158
 
3056
3175
 
3057
3176
        slot = cursor->path_arr + (root_height - height);
3058
3177
 
 
3178
        page = page_align(rec);
 
3179
 
3059
3180
        slot->nth_rec = page_rec_get_n_recs_before(rec);
3060
 
        slot->n_recs = page_get_n_recs(page_align(rec));
 
3181
        slot->n_recs = page_get_n_recs(page);
 
3182
        slot->page_no = page_get_page_no(page);
 
3183
        slot->page_level = btr_page_get_level_low(page);
 
3184
}
 
3185
 
 
3186
/*******************************************************************//**
 
3187
Estimate the number of rows between slot1 and slot2 for any level on a
 
3188
B-tree. This function starts from slot1->page and reads a few pages to
 
3189
the right, counting their records. If we reach slot2->page quickly then
 
3190
we know exactly how many records there are between slot1 and slot2 and
 
3191
we set is_n_rows_exact to TRUE. If we cannot reach slot2->page quickly
 
3192
then we calculate the average number of records in the pages scanned
 
3193
so far and assume that all pages that we did not scan up to slot2->page
 
3194
contain the same number of records, then we multiply that average to
 
3195
the number of pages between slot1->page and slot2->page (which is
 
3196
n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE.
 
3197
@return number of rows (exact or estimated) */
 
3198
static
 
3199
ib_int64_t
 
3200
btr_estimate_n_rows_in_range_on_level(
 
3201
/*==================================*/
 
3202
        dict_index_t*   index,                  /*!< in: index */
 
3203
        btr_path_t*     slot1,                  /*!< in: left border */
 
3204
        btr_path_t*     slot2,                  /*!< in: right border */
 
3205
        ib_int64_t      n_rows_on_prev_level,   /*!< in: number of rows
 
3206
                                                on the previous level for the
 
3207
                                                same descend paths; used to
 
3208
                                                determine the numbe of pages
 
3209
                                                on this level */
 
3210
        ibool*          is_n_rows_exact)        /*!< out: TRUE if the returned
 
3211
                                                value is exact i.e. not an
 
3212
                                                estimation */
 
3213
{
 
3214
        ulint           space;
 
3215
        ib_int64_t      n_rows;
 
3216
        ulint           n_pages_read;
 
3217
        ulint           page_no;
 
3218
        ulint           zip_size;
 
3219
        ulint           level;
 
3220
 
 
3221
        space = dict_index_get_space(index);
 
3222
 
 
3223
        n_rows = 0;
 
3224
        n_pages_read = 0;
 
3225
 
 
3226
        /* Assume by default that we will scan all pages between
 
3227
        slot1->page_no and slot2->page_no */
 
3228
        *is_n_rows_exact = TRUE;
 
3229
 
 
3230
        /* add records from slot1->page_no which are to the right of
 
3231
        the record which serves as a left border of the range, if any */
 
3232
        if (slot1->nth_rec < slot1->n_recs) {
 
3233
                n_rows += slot1->n_recs - slot1->nth_rec;
 
3234
        }
 
3235
 
 
3236
        /* add records from slot2->page_no which are to the left of
 
3237
        the record which servers as a right border of the range, if any */
 
3238
        if (slot2->nth_rec > 1) {
 
3239
                n_rows += slot2->nth_rec - 1;
 
3240
        }
 
3241
 
 
3242
        /* count the records in the pages between slot1->page_no and
 
3243
        slot2->page_no (non inclusive), if any */
 
3244
 
 
3245
        zip_size = fil_space_get_zip_size(space);
 
3246
 
 
3247
        /* Do not read more than this number of pages in order not to hurt
 
3248
        performance with this code which is just an estimation. If we read
 
3249
        this many pages before reaching slot2->page_no then we estimate the
 
3250
        average from the pages scanned so far */
 
3251
#       define N_PAGES_READ_LIMIT       10
 
3252
 
 
3253
        page_no = slot1->page_no;
 
3254
        level = slot1->page_level;
 
3255
 
 
3256
        do {
 
3257
                mtr_t           mtr;
 
3258
                page_t*         page;
 
3259
                buf_block_t*    block;
 
3260
 
 
3261
                mtr_start(&mtr);
 
3262
 
 
3263
                /* fetch the page */
 
3264
                block = buf_page_get(space, zip_size, page_no, RW_S_LATCH,
 
3265
                                     &mtr);
 
3266
 
 
3267
                page = buf_block_get_frame(block);
 
3268
 
 
3269
                /* It is possible that the tree has been reorganized in the
 
3270
                meantime and this is a different page. If this happens the
 
3271
                calculated estimate will be bogus, which is not fatal as
 
3272
                this is only an estimate. We are sure that a page with
 
3273
                page_no exists because InnoDB never frees pages, only
 
3274
                reuses them. */
 
3275
                if (fil_page_get_type(page) != FIL_PAGE_INDEX
 
3276
                    || btr_page_get_index_id(page) != index->id
 
3277
                    || btr_page_get_level_low(page) != level) {
 
3278
 
 
3279
                        /* The page got reused for something else */
 
3280
                        mtr_commit(&mtr);
 
3281
                        goto inexact;
 
3282
                }
 
3283
 
 
3284
                n_pages_read++;
 
3285
 
 
3286
                if (page_no != slot1->page_no) {
 
3287
                        /* Do not count the records on slot1->page_no,
 
3288
                        we already counted them before this loop. */
 
3289
                        n_rows += page_get_n_recs(page);
 
3290
                }
 
3291
 
 
3292
                page_no = btr_page_get_next(page, &mtr);
 
3293
 
 
3294
                mtr_commit(&mtr);
 
3295
 
 
3296
                if (n_pages_read == N_PAGES_READ_LIMIT
 
3297
                    || page_no == FIL_NULL) {
 
3298
                        /* Either we read too many pages or
 
3299
                        we reached the end of the level without passing
 
3300
                        through slot2->page_no, the tree must have changed
 
3301
                        in the meantime */
 
3302
                        goto inexact;
 
3303
                }
 
3304
 
 
3305
        } while (page_no != slot2->page_no);
 
3306
 
 
3307
        return(n_rows);
 
3308
 
 
3309
inexact:
 
3310
 
 
3311
        *is_n_rows_exact = FALSE;
 
3312
 
 
3313
        /* We did interrupt before reaching slot2->page */
 
3314
 
 
3315
        if (n_pages_read > 0) {
 
3316
                /* The number of pages on this level is
 
3317
                n_rows_on_prev_level, multiply it by the
 
3318
                average number of recs per page so far */
 
3319
                n_rows = n_rows_on_prev_level
 
3320
                        * n_rows / n_pages_read;
 
3321
        } else {
 
3322
                /* The tree changed before we could even
 
3323
                start with slot1->page_no */
 
3324
                n_rows = 10;
 
3325
        }
 
3326
 
 
3327
        return(n_rows);
3061
3328
}
3062
3329
 
3063
3330
/*******************************************************************//**
3082
3349
        ibool           diverged_lot;
3083
3350
        ulint           divergence_level;
3084
3351
        ib_int64_t      n_rows;
 
3352
        ibool           is_n_rows_exact;
3085
3353
        ulint           i;
3086
3354
        mtr_t           mtr;
3087
3355
 
3124
3392
        /* We have the path information for the range in path1 and path2 */
3125
3393
 
3126
3394
        n_rows = 1;
 
3395
        is_n_rows_exact = TRUE;
3127
3396
        diverged = FALSE;           /* This becomes true when the path is not
3128
3397
                                    the same any more */
3129
3398
        diverged_lot = FALSE;       /* This becomes true when the paths are
3139
3408
                if (slot1->nth_rec == ULINT_UNDEFINED
3140
3409
                    || slot2->nth_rec == ULINT_UNDEFINED) {
3141
3410
 
3142
 
                        if (i > divergence_level + 1) {
 
3411
                        if (i > divergence_level + 1 && !is_n_rows_exact) {
3143
3412
                                /* In trees whose height is > 1 our algorithm
3144
3413
                                tends to underestimate: multiply the estimate
3145
3414
                                by 2: */
3151
3420
                        to over 1 / 2 of the estimated rows in the whole
3152
3421
                        table */
3153
3422
 
3154
 
                        if (n_rows > index->table->stat_n_rows / 2) {
 
3423
                        if (n_rows > index->table->stat_n_rows / 2
 
3424
                            && !is_n_rows_exact) {
 
3425
 
3155
3426
                                n_rows = index->table->stat_n_rows / 2;
3156
3427
 
3157
3428
                                /* If there are just 0 or 1 rows in the table,
3177
3448
                                        divergence_level = i;
3178
3449
                                }
3179
3450
                        } else {
3180
 
                                /* Maybe the tree has changed between
3181
 
                                searches */
3182
 
 
3183
 
                                return(10);
 
3451
                                /* It is possible that
 
3452
                                slot1->nth_rec >= slot2->nth_rec
 
3453
                                if, for example, we have a single page
 
3454
                                tree which contains (inf, 5, 6, supr)
 
3455
                                and we select where x > 20 and x < 30;
 
3456
                                in this case slot1->nth_rec will point
 
3457
                                to the supr record and slot2->nth_rec
 
3458
                                will point to 6 */
 
3459
                                n_rows = 0;
3184
3460
                        }
3185
3461
 
3186
3462
                } else if (diverged && !diverged_lot) {
3204
3480
                        }
3205
3481
                } else if (diverged_lot) {
3206
3482
 
3207
 
                        n_rows = (n_rows * (slot1->n_recs + slot2->n_recs))
3208
 
                                / 2;
 
3483
                        n_rows = btr_estimate_n_rows_in_range_on_level(
 
3484
                                index, slot1, slot2, n_rows,
 
3485
                                &is_n_rows_exact);
3209
3486
                }
3210
3487
        }
3211
3488
}
3244
3521
 
3245
3522
        n_cols = dict_index_get_n_unique(index);
3246
3523
 
3247
 
        n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
 
3524
        n_diff = (ib_int64_t *)mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
3248
3525
 
3249
3526
        /* It makes no sense to test more pages than are contained
3250
3527
        in the index, thus we lower the number if it is too high */
3356
3633
        also the pages used for external storage of fields (those pages are
3357
3634
        included in index->stat_n_leaf_pages) */
3358
3635
 
3359
 
        dict_index_stat_mutex_enter(index);
3360
 
 
3361
3636
        for (j = 0; j <= n_cols; j++) {
3362
3637
                index->stat_n_diff_key_vals[j]
3363
3638
                        = ((n_diff[j]
3387
3662
                index->stat_n_diff_key_vals[j] += add_on;
3388
3663
        }
3389
3664
 
3390
 
        dict_index_stat_mutex_exit(index);
3391
 
 
3392
3665
        mem_free(n_diff);
3393
3666
        if (UNIV_LIKELY_NULL(heap)) {
3394
3667
                mem_heap_free(heap);
3484
3757
Marks not updated extern fields as not-owned by this record. The ownership
3485
3758
is transferred to the updated record which is inserted elsewhere in the
3486
3759
index tree. In purge only the owner of externally stored field is allowed
3487
 
to free the field. */
 
3760
to free the field.
 
3761
@return TRUE if BLOB ownership was transferred */
3488
3762
UNIV_INTERN
3489
 
void
 
3763
ibool
3490
3764
btr_cur_mark_extern_inherited_fields(
3491
3765
/*=================================*/
3492
3766
        page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
3500
3774
        ulint   n;
3501
3775
        ulint   j;
3502
3776
        ulint   i;
 
3777
        ibool   change_ownership = FALSE;
3503
3778
 
3504
3779
        ut_ad(rec_offs_validate(rec, NULL, offsets));
3505
3780
        ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
3506
3781
 
3507
3782
        if (!rec_offs_any_extern(offsets)) {
3508
3783
 
3509
 
                return;
 
3784
                return(FALSE);
3510
3785
        }
3511
3786
 
3512
3787
        n = rec_offs_n_fields(offsets);
3529
3804
 
3530
3805
                        btr_cur_set_ownership_of_extern_field(
3531
3806
                                page_zip, rec, index, offsets, i, FALSE, mtr);
 
3807
 
 
3808
                        change_ownership = TRUE;
3532
3809
updated:
3533
3810
                        ;
3534
3811
                }
3535
3812
        }
 
3813
 
 
3814
        return(change_ownership);
3536
3815
}
3537
3816
 
3538
3817
/*******************************************************************//**
3569
3848
                        }
3570
3849
                }
3571
3850
 
3572
 
                data = dfield_get_data(dfield);
 
3851
                data = (unsigned char *)dfield_get_data(dfield);
3573
3852
                len = dfield_get_len(dfield);
3574
3853
                data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3575
3854
                        |= BTR_EXTERN_INHERITED_FLAG;
3628
3907
                dfield_t* dfield = dtuple_get_nth_field(entry, i);
3629
3908
 
3630
3909
                if (dfield_is_ext(dfield)) {
3631
 
                        byte*   data = dfield_get_data(dfield);
 
3910
                        byte*   data = (unsigned char *)dfield_get_data(dfield);
3632
3911
                        ulint   len = dfield_get_len(dfield);
3633
3912
 
3634
3913
                        data[len - BTR_EXTERN_FIELD_REF_SIZE + BTR_EXTERN_LEN]
3694
3973
                                will have to be copied. */
3695
3974
                                ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
3696
3975
 
3697
 
                                data = dfield_get_data(field);
 
3976
                                data = (unsigned char *)dfield_get_data(field);
3698
3977
                                len = dfield_get_len(field);
3699
3978
 
3700
 
                                buf = mem_heap_alloc(heap, uf->orig_len);
 
3979
                                buf = (unsigned char *)mem_heap_alloc(heap, uf->orig_len);
3701
3980
                                /* Copy the locally stored prefix. */
3702
3981
                                memcpy(buf, data,
3703
3982
                                       uf->orig_len
3752
4031
                                if there is one */
3753
4032
        mtr_t*          mtr)    /*!< in: mini-transaction to commit */
3754
4033
{
3755
 
        ulint   space   = buf_block_get_space(block);
3756
 
        ulint   page_no = buf_block_get_page_no(block);
 
4034
        buf_pool_t*     buf_pool = buf_pool_from_block(block);
 
4035
        ulint           space   = buf_block_get_space(block);
 
4036
        ulint           page_no = buf_block_get_page_no(block);
3757
4037
 
3758
4038
        ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3759
4039
 
3760
4040
        mtr_commit(mtr);
3761
4041
 
3762
 
        buf_pool_mutex_enter();
 
4042
        buf_pool_mutex_enter(buf_pool);
3763
4043
        mutex_enter(&block->mutex);
3764
4044
 
3765
4045
        /* Only free the block if it is still allocated to
3780
4060
                }
3781
4061
        }
3782
4062
 
3783
 
        buf_pool_mutex_exit();
 
4063
        buf_pool_mutex_exit(buf_pool);
3784
4064
        mutex_exit(&block->mutex);
3785
4065
}
3786
4066
 
3789
4069
them in rec.  The extern flags in rec will have to be set beforehand.
3790
4070
The fields are stored on pages allocated from leaf node
3791
4071
file segment of the index tree.
3792
 
@return DB_SUCCESS or error */
 
4072
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
3793
4073
UNIV_INTERN
3794
4074
ulint
3795
4075
btr_store_big_rec_extern_fields(
3804
4084
                                        this function returns */
3805
4085
        big_rec_t*      big_rec_vec,    /*!< in: vector containing fields
3806
4086
                                        to be stored externally */
3807
 
        mtr_t*          local_mtr __attribute__((unused))) /*!< in: mtr
 
4087
        mtr_t*          /*local_mtr __attribute__((unused))*/) /*!< in: mtr
3808
4088
                                        containing the latch to rec and to the
3809
4089
                                        tree */
3810
4090
{
3882
4162
                        int     err = deflateReset(&c_stream);
3883
4163
                        ut_a(err == Z_OK);
3884
4164
 
3885
 
                        c_stream.next_in = (void*) big_rec_vec->fields[i].data;
 
4165
                        c_stream.next_in = (Bytef *) big_rec_vec->fields[i].data;
3886
4166
                        c_stream.avail_in = extern_len;
3887
4167
                }
3888
4168
 
4219
4499
        ulint           i,              /*!< in: field number of field_ref;
4220
4500
                                        ignored if rec == NULL */
4221
4501
        enum trx_rb_ctx rb_ctx,         /*!< in: rollback context */
4222
 
        mtr_t*          local_mtr __attribute__((unused))) /*!< in: mtr
 
4502
        mtr_t*          /*local_mtr __attribute__((unused))*/) /*!< in: mtr
4223
4503
                                        containing the latch to data an an
4224
4504
                                        X-latch to the index tree */
4225
4505
{
4279
4559
        }
4280
4560
 
4281
4561
        for (;;) {
 
4562
#ifdef UNIV_SYNC_DEBUG
4282
4563
                buf_block_t*    rec_block;
 
4564
#endif /* UNIV_SYNC_DEBUG */
4283
4565
                buf_block_t*    ext_block;
4284
4566
 
4285
4567
                mtr_start(&mtr);
4286
4568
 
4287
 
                rec_block = buf_page_get(page_get_space_id(
 
4569
#ifdef UNIV_SYNC_DEBUG
 
4570
                rec_block =
 
4571
#endif /* UNIV_SYNC_DEBUG */
 
4572
                        buf_page_get(page_get_space_id(
4288
4573
                                                 page_align(field_ref)),
4289
4574
                                         rec_zip_size,
4290
4575
                                         page_get_page_no(
4799
5084
 
4800
5085
        extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
4801
5086
 
4802
 
        buf = mem_heap_alloc(heap, local_len + extern_len);
 
5087
        buf = (unsigned char *)mem_heap_alloc(heap, local_len + extern_len);
4803
5088
 
4804
5089
        memcpy(buf, data, local_len);
4805
5090
        *len = local_len
4814
5099
 
4815
5100
/*******************************************************************//**
4816
5101
Copies an externally stored field of a record to mem heap.
4817
 
@return the field copied to heap */
 
5102
@return the field copied to heap, or NULL if the field is incomplete */
4818
5103
UNIV_INTERN
4819
5104
byte*
4820
5105
btr_rec_copy_externally_stored_field(
4844
5129
 
4845
5130
        data = rec_get_nth_field(rec, offsets, no, &local_len);
4846
5131
 
 
5132
        ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 
5133
 
 
5134
        if (UNIV_UNLIKELY
 
5135
            (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
 
5136
                     field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
 
5137
                /* The externally stored field was not written yet.
 
5138
                This record should only be seen by
 
5139
                recv_recovery_rollback_active() or any
 
5140
                TRX_ISO_READ_UNCOMMITTED transactions. */
 
5141
                return(NULL);
 
5142
        }
 
5143
 
4847
5144
        return(btr_copy_externally_stored_field(len, data,
4848
5145
                                                zip_size, local_len, heap));
4849
5146
}