~drizzle-trunk/drizzle/development

« back to all changes in this revision

Viewing changes to storage/innobase/buf/buf0lru.c

Tags: innodb-plugin-1.0.2
InnoDB Plugin 1.0.2

Show diffs side-by-side

added added

removed removed

Lines of Context:
44
44
 
45
45
#define BUF_LRU_INITIAL_RATIO   8
46
46
 
 
47
/* When dropping the search hash index entries before deleting an ibd
 
48
file, we build a local array of pages belonging to that tablespace
 
49
in the buffer pool. Following is the size of that array. */
 
50
#define BUF_LRU_DROP_SEARCH_HASH_SIZE   1024
 
51
 
47
52
/* If we switch on the InnoDB monitor because there are too few available
48
53
frames in the buffer pool, we set this to TRUE */
49
54
UNIV_INTERN ibool       buf_lru_switched_on_innodb_mon  = FALSE;
158
163
}
159
164
 
160
165
/**********************************************************************
 
166
Attempts to drop page hash index on a batch of pages belonging to a
 
167
particular space id. */
 
168
static
 
169
void
 
170
buf_LRU_drop_page_hash_batch(
 
171
/*=========================*/
 
172
        ulint           space_id,       /* in: space id */
 
173
        ulint           zip_size,       /* in: compressed page size in bytes
 
174
                                        or 0 for uncompressed pages */
 
175
        const ulint*    arr,            /* in: array of page_no */
 
176
        ulint           count)          /* in: number of entries in array */
 
177
{
 
178
        ulint   i;
 
179
 
 
180
        ut_ad(arr != NULL);
 
181
        ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE);
 
182
 
 
183
        for (i = 0; i < count; ++i) {
 
184
                btr_search_drop_page_hash_when_freed(space_id, zip_size,
 
185
                                                     arr[i]);
 
186
        }
 
187
}
 
188
 
 
189
/**********************************************************************
 
190
When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
 
191
hash index entries belonging to that table. This function tries to
 
192
do that in batch. Note that this is a 'best effort' attempt and does
 
193
not guarantee that ALL hash entries will be removed. */
 
194
static
 
195
void
 
196
buf_LRU_drop_page_hash_for_tablespace(
 
197
/*==================================*/
 
198
        ulint   id)     /* in: space id */
 
199
{
 
200
        buf_page_t*     bpage;
 
201
        ulint*          page_arr;
 
202
        ulint           num_entries;
 
203
        ulint           zip_size;
 
204
 
 
205
        zip_size = fil_space_get_zip_size(id);
 
206
 
 
207
        if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
 
208
                /* Somehow, the tablespace does not exist.  Nothing to drop. */
 
209
                ut_ad(0);
 
210
                return;
 
211
        }
 
212
 
 
213
        page_arr = ut_malloc(sizeof(ulint)
 
214
                             * BUF_LRU_DROP_SEARCH_HASH_SIZE);
 
215
        buf_pool_mutex_enter();
 
216
 
 
217
scan_again:
 
218
        num_entries = 0;
 
219
        bpage = UT_LIST_GET_LAST(buf_pool->LRU);
 
220
 
 
221
        while (bpage != NULL) {
 
222
                mutex_t*        block_mutex = buf_page_get_mutex(bpage);
 
223
                buf_page_t*     prev_bpage;
 
224
 
 
225
                mutex_enter(block_mutex);
 
226
                prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
227
 
 
228
                ut_a(buf_page_in_file(bpage));
 
229
 
 
230
                if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
 
231
                    || bpage->space != id
 
232
                    || bpage->buf_fix_count > 0
 
233
                    || bpage->io_fix != BUF_IO_NONE) {
 
234
                        /* We leave the fixed pages as is in this scan.
 
235
                        To be dealt with later in the final scan. */
 
236
                        mutex_exit(block_mutex);
 
237
                        goto next_page;
 
238
                }
 
239
 
 
240
                if (((buf_block_t*) bpage)->is_hashed) {
 
241
 
 
242
                        /* Store the offset(i.e.: page_no) in the array
 
243
                        so that we can drop hash index in a batch
 
244
                        later. */
 
245
                        page_arr[num_entries] = bpage->offset;
 
246
                        mutex_exit(block_mutex);
 
247
                        ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
 
248
                        ++num_entries;
 
249
 
 
250
                        if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
 
251
                                goto next_page;
 
252
                        }
 
253
                        /* Array full. We release the buf_pool->mutex to
 
254
                        obey the latching order. */
 
255
                        buf_pool_mutex_exit();
 
256
 
 
257
                        buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
 
258
                                                     num_entries);
 
259
                        num_entries = 0;
 
260
                        buf_pool_mutex_enter();
 
261
                } else {
 
262
                        mutex_exit(block_mutex);
 
263
                }
 
264
 
 
265
next_page:
 
266
                /* Note that we may have released the buf_pool mutex
 
267
                above after reading the prev_bpage during processing
 
268
                of a page_hash_batch (i.e.: when the array was full).
 
269
                This means that prev_bpage can change in LRU list.
 
270
                This is OK because this function is a 'best effort'
 
271
                to drop as many search hash entries as possible and
 
272
                it does not guarantee that ALL such entries will be
 
273
                dropped. */
 
274
                bpage = prev_bpage;
 
275
 
 
276
                /* If, however, bpage has been removed from LRU list
 
277
                to the free list then we should restart the scan.
 
278
                bpage->state is protected by buf_pool mutex. */
 
279
                if (bpage && !buf_page_in_file(bpage)) {
 
280
                        ut_a(num_entries == 0);
 
281
                        goto scan_again;
 
282
                }
 
283
        }
 
284
 
 
285
        buf_pool_mutex_exit();
 
286
 
 
287
        /* Drop any remaining batch of search hashed pages. */
 
288
        buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
 
289
        ut_free(page_arr);
 
290
}
 
291
 
 
292
/**********************************************************************
161
293
Invalidates all pages belonging to a given tablespace when we are deleting
162
294
the data file(s) of that tablespace. */
163
295
UNIV_INTERN
170
302
        ulint           page_no;
171
303
        ibool           all_freed;
172
304
 
 
305
        /* Before we attempt to drop pages one by one we first
 
306
        attempt to drop page hash index entries in batches to make
 
307
        it more efficient. The batching attempt is a best effort
 
308
        attempt and does not guarantee that all pages hash entries
 
309
        will be dropped. We get rid of remaining page hash entries
 
310
        one by one below. */
 
311
        buf_LRU_drop_page_hash_for_tablespace(id);
 
312
 
173
313
scan_again:
174
314
        buf_pool_mutex_enter();
175
315
 
632
772
 
633
773
                if (!buf_lru_switched_on_innodb_mon) {
634
774
 
635
 
                        /* Over 67 % of the buffer pool is occupied by lock
 
775
                        /* Over 67 % of the buffer pool is occupied by lock
636
776
                        heaps or the adaptive hash index. This may be a memory
637
777
                        leak! */
638
778
 
712
852
        if (n_iterations > 30) {
713
853
                ut_print_timestamp(stderr);
714
854
                fprintf(stderr,
715
 
                        "InnoDB: Warning: difficult to find free blocks from\n"
 
855
                        "  InnoDB: Warning: difficult to find free blocks in\n"
716
856
                        "InnoDB: the buffer pool (%lu search iterations)!"
717
857
                        " Consider\n"
718
858
                        "InnoDB: increasing the buffer pool size.\n"
790
930
#if 3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5
791
931
# error "3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5"
792
932
#endif
 
933
#ifdef UNIV_LRU_DEBUG
 
934
        /* buf_pool->LRU_old must be the first item in the LRU list
 
935
        whose "old" flag is set. */
 
936
        ut_a(buf_pool->LRU_old->old);
 
937
        ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
 
938
             || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
 
939
        ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
 
940
             || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
 
941
#endif /* UNIV_LRU_DEBUG */
793
942
 
794
943
        for (;;) {
795
944
                old_len = buf_pool->LRU_old_len;
796
945
                new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
797
946
 
798
947
                ut_ad(buf_pool->LRU_old->in_LRU_list);
 
948
                ut_a(buf_pool->LRU_old);
 
949
#ifdef UNIV_LRU_DEBUG
 
950
                ut_a(buf_pool->LRU_old->old);
 
951
#endif /* UNIV_LRU_DEBUG */
799
952
 
800
953
                /* Update the LRU_old pointer if necessary */
801
954
 
803
956
 
804
957
                        buf_pool->LRU_old = UT_LIST_GET_PREV(
805
958
                                LRU, buf_pool->LRU_old);
 
959
#ifdef UNIV_LRU_DEBUG
 
960
                        ut_a(!buf_pool->LRU_old->old);
 
961
#endif /* UNIV_LRU_DEBUG */
806
962
                        buf_page_set_old(buf_pool->LRU_old, TRUE);
807
963
                        buf_pool->LRU_old_len++;
808
964
 
813
969
                                LRU, buf_pool->LRU_old);
814
970
                        buf_pool->LRU_old_len--;
815
971
                } else {
816
 
                        ut_a(buf_pool->LRU_old); /* Check that we did not
817
 
                                                 fall out of the LRU list */
818
972
                        return;
819
973
                }
820
974
        }
901
1055
 
902
1056
                buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, bpage);
903
1057
                ut_a(buf_pool->LRU_old);
 
1058
#ifdef UNIV_LRU_DEBUG
 
1059
                ut_a(!buf_pool->LRU_old->old);
 
1060
#endif /* UNIV_LRU_DEBUG */
904
1061
                buf_page_set_old(buf_pool->LRU_old, TRUE);
905
1062
 
906
1063
                buf_pool->LRU_old_len++;
974
1131
 
975
1132
        ut_a(buf_page_in_file(bpage));
976
1133
 
977
 
        buf_page_set_old(bpage, TRUE);
978
 
 
979
1134
        last_bpage = UT_LIST_GET_LAST(buf_pool->LRU);
980
1135
 
981
1136
        if (last_bpage) {
988
1143
        UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
989
1144
        ut_d(bpage->in_LRU_list = TRUE);
990
1145
 
 
1146
        buf_page_set_old(bpage, TRUE);
 
1147
 
991
1148
        if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
992
1149
 
993
1150
                buf_pool->LRU_old_len++;
1035
1192
        ut_a(buf_page_in_file(bpage));
1036
1193
        ut_ad(!bpage->in_LRU_list);
1037
1194
 
1038
 
        buf_page_set_old(bpage, old);
1039
 
 
1040
1195
        if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
1041
1196
 
1042
1197
                UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
1044
1199
                bpage->LRU_position = buf_pool_clock_tic();
1045
1200
                bpage->freed_page_clock = buf_pool->freed_page_clock;
1046
1201
        } else {
 
1202
#ifdef UNIV_LRU_DEBUG
 
1203
                /* buf_pool->LRU_old must be the first item in the LRU list
 
1204
                whose "old" flag is set. */
 
1205
                ut_a(buf_pool->LRU_old->old);
 
1206
                ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
 
1207
                     || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
 
1208
                ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
 
1209
                     || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
 
1210
#endif /* UNIV_LRU_DEBUG */
1047
1211
                UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
1048
1212
                                     bpage);
1049
1213
                buf_pool->LRU_old_len++;
1056
1220
 
1057
1221
        ut_d(bpage->in_LRU_list = TRUE);
1058
1222
 
 
1223
        buf_page_set_old(bpage, old);
 
1224
 
1059
1225
        if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
1060
1226
 
1061
1227
                ut_ad(buf_pool->LRU_old);
1246
1412
 
1247
1413
                                if (buf_page_is_old(b)) {
1248
1414
                                        buf_pool->LRU_old_len++;
 
1415
                                        if (UNIV_UNLIKELY
 
1416
                                            (buf_pool->LRU_old
 
1417
                                             == UT_LIST_GET_NEXT(LRU, b))) {
 
1418
 
 
1419
                                                buf_pool->LRU_old = b;
 
1420
                                        }
 
1421
#ifdef UNIV_LRU_DEBUG
 
1422
                                        ut_a(prev_b->old
 
1423
                                             || !UT_LIST_GET_NEXT(LRU, b)
 
1424
                                             || UT_LIST_GET_NEXT(LRU, b)->old);
 
1425
                                } else {
 
1426
                                        ut_a(!prev_b->old
 
1427
                                             || !UT_LIST_GET_NEXT(LRU, b)
 
1428
                                             || !UT_LIST_GET_NEXT(LRU, b)->old);
 
1429
#endif /* UNIV_LRU_DEBUG */
1249
1430
                                }
1250
1431
 
1251
1432
                                lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
1455
1636
                buf_block_modify_clock_inc((buf_block_t*) bpage);
1456
1637
                if (bpage->zip.data) {
1457
1638
                        const page_t*   page = ((buf_block_t*) bpage)->frame;
 
1639
                        const ulint     zip_size
 
1640
                                = page_zip_get_size(&bpage->zip);
1458
1641
 
1459
1642
                        ut_a(!zip || bpage->oldest_modification == 0);
1460
1643
 
1472
1655
                                        to the compressed page, which will
1473
1656
                                        be preserved. */
1474
1657
                                        memcpy(bpage->zip.data, page,
1475
 
                                               page_zip_get_size(&bpage->zip));
 
1658
                                               zip_size);
1476
1659
                                }
1477
1660
                                break;
1478
1661
                        case FIL_PAGE_TYPE_ZBLOB:
1484
1667
#endif /* UNIV_ZIP_DEBUG */
1485
1668
                                break;
1486
1669
                        default:
 
1670
                                ut_print_timestamp(stderr);
 
1671
                                fputs("  InnoDB: ERROR: The compressed page"
 
1672
                                      " to be evicted seems corrupt:", stderr);
 
1673
                                ut_print_buf(stderr, page, zip_size);
 
1674
                                fputs("\nInnoDB: Possibly older version"
 
1675
                                      " of the page:", stderr);
 
1676
                                ut_print_buf(stderr, bpage->zip.data,
 
1677
                                             zip_size);
 
1678
                                putc('\n', stderr);
1487
1679
                                ut_error;
1488
1680
                        }
1489
1681