28
29
#include "buf0flu.ic"
35
#ifndef UNIV_HOTBACKUP
32
36
#include "ut0byte.h"
33
37
#include "ut0lst.h"
34
38
#include "page0page.h"
36
39
#include "fil0fil.h"
38
40
#include "buf0lru.h"
39
41
#include "buf0rea.h"
40
42
#include "ibuf0ibuf.h"
41
43
#include "log0log.h"
42
44
#include "os0file.h"
43
45
#include "trx0sys.h"
47
/**********************************************************************
48
These statistics are generated for heuristics used in estimating the
49
rate at which we should flush the dirty blocks to avoid bursty IO
50
activity. Note that the rate of flushing not only depends on how many
51
dirty pages we have in the buffer pool but it is also a fucntion of
52
how much redo the workload is generating and at what rate. */
55
/** Number of intervals for which we keep the history of these stats.
56
Each interval is 1 second, defined by the rate at which
57
srv_error_monitor_thread() calls buf_flush_stat_update(). */
58
#define BUF_FLUSH_STAT_N_INTERVAL 20
60
/** Sampled values buf_flush_stat_cur.
61
Not protected by any mutex. Updated by buf_flush_stat_update(). */
62
static buf_flush_stat_t buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
64
/** Cursor to buf_flush_stat_arr[]. Updated in a round-robin fashion. */
65
static ulint buf_flush_stat_arr_ind;
67
/** Values at start of the current interval. Reset by
68
buf_flush_stat_update(). */
69
static buf_flush_stat_t buf_flush_stat_cur;
71
/** Running sum of past values of buf_flush_stat_cur.
72
Updated by buf_flush_stat_update(). Not protected by any mutex. */
73
static buf_flush_stat_t buf_flush_stat_sum;
75
/** Number of pages flushed through non flush_list flushes. */
76
static ulint buf_lru_flush_page_count = 0;
46
80
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
47
/**********************************************************************
48
Validates the flush list. */
81
/******************************************************************//**
82
Validates the flush list.
51
86
buf_flush_validate_low(void);
52
87
/*========================*/
54
88
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
56
/************************************************************************
90
/********************************************************************//**
57
91
Inserts a modified block into the flush list. */
60
94
buf_flush_insert_into_flush_list(
61
95
/*=============================*/
62
buf_block_t* block) /* in/out: block which is modified */
96
buf_block_t* block) /*!< in/out: block which is modified */
64
98
ut_ad(buf_pool_mutex_own());
65
99
ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
122
156
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
125
/************************************************************************
159
/********************************************************************//**
126
160
Returns TRUE if the file page block is immediately suitable for replacement,
127
i.e., the transition FILE_PAGE => NOT_USED allowed. */
161
i.e., the transition FILE_PAGE => NOT_USED allowed.
162
@return TRUE if can replace immediately */
130
165
buf_flush_ready_for_replace(
131
166
/*========================*/
132
/* out: TRUE if can replace immediately */
133
buf_page_t* bpage) /* in: buffer control block, must be
167
buf_page_t* bpage) /*!< in: buffer control block, must be
134
168
buf_page_in_file(bpage) and in the LRU list */
136
170
ut_ad(buf_pool_mutex_own());
158
/************************************************************************
159
Returns TRUE if the block is modified and ready for flushing. */
192
/********************************************************************//**
193
Returns TRUE if the block is modified and ready for flushing.
194
@return TRUE if can flush immediately */
162
197
buf_flush_ready_for_flush(
163
198
/*======================*/
164
/* out: TRUE if can flush immediately */
165
buf_page_t* bpage, /* in: buffer control block, must be
199
buf_page_t* bpage, /*!< in: buffer control block, must be
166
200
buf_page_in_file(bpage) */
167
enum buf_flush flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
201
enum buf_flush flush_type)/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
169
203
ut_a(buf_page_in_file(bpage));
170
204
ut_ad(buf_pool_mutex_own());
228
262
bpage->oldest_modification = 0;
230
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list));
264
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
265
ut_ad(ut_list_node_313->in_flush_list)));
233
/************************************************************************
268
/********************************************************************//**
234
269
Updates the flush system data structures when a write is completed. */
237
272
buf_flush_write_complete(
238
273
/*=====================*/
239
buf_page_t* bpage) /* in: pointer to the block in question */
274
buf_page_t* bpage) /*!< in: pointer to the block in question */
241
276
enum buf_flush flush_type;
574
609
mutex_exit(&(trx_doublewrite->mutex));
611
#endif /* !UNIV_HOTBACKUP */
577
/************************************************************************
613
/********************************************************************//**
578
614
Initializes a page for writing to the tablespace. */
581
617
buf_flush_init_for_writing(
582
618
/*=======================*/
583
byte* page, /* in/out: page */
584
void* page_zip_, /* in/out: compressed page, or NULL */
585
ib_uint64_t newest_lsn) /* in: newest modification lsn
619
byte* page, /*!< in/out: page */
620
void* page_zip_, /*!< in/out: compressed page, or NULL */
621
ib_uint64_t newest_lsn) /*!< in: newest modification lsn
854
891
buf_flush_write_block_low(bpage);
857
/***************************************************************
858
Flushes to disk all flushable pages within the flush area. */
894
/***********************************************************//**
895
Flushes to disk all flushable pages within the flush area.
896
@return number of pages flushed */
861
899
buf_flush_try_neighbors(
862
900
/*====================*/
863
/* out: number of pages flushed */
864
ulint space, /* in: space id */
865
ulint offset, /* in: page offset */
866
enum buf_flush flush_type) /* in: BUF_FLUSH_LRU or
901
ulint space, /*!< in: space id */
902
ulint offset, /*!< in: page offset */
903
enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or
867
904
BUF_FLUSH_LIST */
869
906
buf_page_t* bpage;
948
/***********************************************************************
985
/*******************************************************************//**
949
986
This utility flushes dirty blocks from the end of the LRU list or flush_list.
950
987
NOTE 1: in the case of an LRU flush the calling thread may own latches to
951
988
pages: to avoid deadlocks, this function must be written so that it cannot
952
989
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
953
the calling thread is not allowed to own any latches on pages! */
990
the calling thread is not allowed to own any latches on pages!
991
@return number of blocks for which the write request was queued;
992
ULINT_UNDEFINED if there was a flush of the same type already running */
958
/* out: number of blocks for which the
959
write request was queued;
960
ULINT_UNDEFINED if there was a flush
961
of the same type already running */
962
enum buf_flush flush_type, /* in: BUF_FLUSH_LRU or
997
enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
963
998
BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
964
999
then the caller must not own any
965
1000
latches on pages */
966
ulint min_n, /* in: wished minimum mumber of blocks
1001
ulint min_n, /*!< in: wished minimum mumber of blocks
967
1002
flushed (it is not guaranteed that the
968
1003
actual number is that big, though) */
969
ib_uint64_t lsn_limit) /* in the case BUF_FLUSH_LIST all
1004
ib_uint64_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
970
1005
blocks whose oldest_modification is
971
1006
smaller than this should be flushed
972
1007
(if their number does not exceed
1101
1136
srv_buf_pool_flushed += page_count;
1138
/* We keep track of all flushes happening as part of LRU
1139
flush. When estimating the desired rate at which flush_list
1140
should be flushed we factor in this value. */
1141
if (flush_type == BUF_FLUSH_LRU) {
1142
buf_lru_flush_page_count += page_count;
1103
1145
return(page_count);
1106
/**********************************************************************
1148
/******************************************************************//**
1107
1149
Waits until a flush batch of the given type ends */
1110
1152
buf_flush_wait_batch_end(
1111
1153
/*=====================*/
1112
enum buf_flush type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1154
enum buf_flush type) /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
1114
1156
ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST));
1116
1158
os_event_wait(buf_pool->no_flush[type]);
1119
/**********************************************************************
1161
/******************************************************************//**
1120
1162
Gives a recommendation of how many blocks should be flushed to establish
1121
1163
a big enough margin of replaceable blocks near the end of the LRU list
1122
and in the free list. */
1164
and in the free list.
1165
@return number of blocks which should be flushed from the end of the
1125
1169
buf_flush_LRU_recommendation(void)
1126
1170
/*==============================*/
1127
/* out: number of blocks which should be flushed
1128
from the end of the LRU list */
1130
1172
buf_page_t* bpage;
1131
1173
ulint n_replaceable;
1240
/*********************************************************************
1241
Update the historical stats that we are collecting for flush rate
1242
heuristics at the end of each interval.
1243
Flush rate heuristic depends on (a) rate of redo log generation and
1244
(b) the rate at which LRU flush is happening. */
1247
buf_flush_stat_update(void)
1248
/*=======================*/
1250
buf_flush_stat_t* item;
1251
ib_uint64_t lsn_diff;
1255
lsn = log_get_lsn();
1256
if (buf_flush_stat_cur.redo == 0) {
1257
/* First time around. Just update the current LSN
1259
buf_flush_stat_cur.redo = lsn;
1263
item = &buf_flush_stat_arr[buf_flush_stat_arr_ind];
1265
/* values for this interval */
1266
lsn_diff = lsn - buf_flush_stat_cur.redo;
1267
n_flushed = buf_lru_flush_page_count
1268
- buf_flush_stat_cur.n_flushed;
1270
/* add the current value and subtract the obsolete entry. */
1271
buf_flush_stat_sum.redo += lsn_diff - item->redo;
1272
buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed;
1274
/* put current entry in the array. */
1275
item->redo = lsn_diff;
1276
item->n_flushed = n_flushed;
1278
/* update the index */
1279
buf_flush_stat_arr_ind++;
1280
buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL;
1282
/* reset the current entry. */
1283
buf_flush_stat_cur.redo = lsn;
1284
buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count;
1287
/*********************************************************************
1288
Determines the fraction of dirty pages that need to be flushed based
1289
on the speed at which we generate redo log. Note that if redo log
1290
is generated at a significant rate without corresponding increase
1291
in the number of dirty pages (for example, an in-memory workload)
1292
it can cause IO bursts of flushing. This function implements heuristics
1293
to avoid this burstiness.
1294
@return number of dirty pages to be flushed / second */
1297
buf_flush_get_desired_flush_rate(void)
1298
/*==================================*/
1301
ulint lru_flush_avg;
1305
ib_uint64_t lsn = log_get_lsn();
1306
ulint log_capacity = log_get_capacity();
1308
/* log_capacity should never be zero after the initialization
1309
of log subsystem. */
1310
ut_ad(log_capacity != 0);
1312
/* Get total number of dirty pages. It is OK to access
1313
flush_list without holding any mtex as we are using this
1314
only for heuristics. */
1315
n_dirty = UT_LIST_GET_LEN(buf_pool->flush_list);
1317
/* An overflow can happen if we generate more than 2^32 bytes
1318
of redo in this interval i.e.: 4G of redo in 1 second. We can
1319
safely consider this as infinity because if we ever come close
1320
to 4G we'll start a synchronous flush of dirty pages. */
1321
/* redo_avg below is average at which redo is generated in
1322
past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current
1324
redo_avg = (ulint) (buf_flush_stat_sum.redo
1325
/ BUF_FLUSH_STAT_N_INTERVAL
1326
+ (lsn - buf_flush_stat_cur.redo));
1328
/* An overflow can happen possibly if we flush more than 2^32
1329
pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very
1330
unlikely scenario. Even when this happens it means that our
1331
flush rate will be off the mark. It won't affect correctness
1332
of any subsystem. */
1333
/* lru_flush_avg below is rate at which pages are flushed as
1334
part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the
1335
number of pages flushed in the current interval. */
1336
lru_flush_avg = buf_flush_stat_sum.n_flushed
1337
/ BUF_FLUSH_STAT_N_INTERVAL
1338
+ (buf_lru_flush_page_count
1339
- buf_flush_stat_cur.n_flushed);
1341
n_flush_req = (n_dirty * redo_avg) / log_capacity;
1343
/* The number of pages that we want to flush from the flush
1344
list is the difference between the required rate and the
1345
number of pages that we are historically flushing from the
1347
rate = n_flush_req - lru_flush_avg;
1348
return(rate > 0 ? (ulint) rate : 0);
1198
1351
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1199
/**********************************************************************
1200
Validates the flush list. */
1352
/******************************************************************//**
1353
Validates the flush list.
1354
@return TRUE if ok */
1203
1357
buf_flush_validate_low(void)
1204
1358
/*========================*/
1205
/* out: TRUE if ok */
1207
1360
buf_page_t* bpage;
1209
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list);
1362
UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
1363
ut_ad(ut_list_node_313->in_flush_list));
1211
1365
bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);