33 #include "buf0checksum.h"
37 #ifndef UNIV_HOTBACKUP
49 #include "mysql/plugin.h"
53 static ulint buf_lru_flush_page_count = 0;
64 #define PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE 100
66 #ifdef UNIV_PFS_THREAD
67 UNIV_INTERN mysql_pfs_key_t buf_page_cleaner_thread_key;
74 #define BUF_LRU_MIN_LEN 256
83 incr_flush_list_size_in_bytes(
94 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
100 buf_flush_validate_low(
109 buf_flush_validate_skip(
114 # define BUF_FLUSH_VALIDATE_SKIP 23
118 static int buf_flush_validate_count = BUF_FLUSH_VALIDATE_SKIP;
124 if (--buf_flush_validate_count > 0) {
128 buf_flush_validate_count = BUF_FLUSH_VALIDATE_SKIP;
129 return(buf_flush_validate_low(buf_pool));
145 ut_ad(!bpage || bpage->in_flush_list);
194 buf_flush_insert_in_flush_rbt(
207 ut_a(c_node != NULL);
212 if (p_node != NULL) {
226 buf_flush_delete_from_flush_rbt(
274 ut_ad(b1->in_flush_list);
275 ut_ad(b2->in_flush_list);
301 for (i = 0; i < srv_buf_pool_instances; i++) {
325 for (i = 0; i < srv_buf_pool_instances; i++) {
332 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
333 ut_a(buf_flush_validate_low(buf_pool));
365 if (UNIV_LIKELY_NULL(buf_pool->
flush_rbt)) {
374 ut_d(block->
page.in_flush_list = TRUE);
377 incr_flush_list_size_in_bytes(block, buf_pool);
379 #ifdef UNIV_DEBUG_VALGRIND
384 UNIV_MEM_ASSERT_RW(block->
page.
zip.
data, zip_size);
386 UNIV_MEM_ASSERT_RW(block->
frame, UNIV_PAGE_SIZE);
390 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
391 ut_a(buf_flush_validate_skip(buf_pool));
436 ut_d(block->
page.in_flush_list = TRUE);
439 #ifdef UNIV_DEBUG_VALGRIND
444 UNIV_MEM_ASSERT_RW(block->
page.
zip.
data, zip_size);
446 UNIV_MEM_ASSERT_RW(block->
frame, UNIV_PAGE_SIZE);
461 prev_b = buf_flush_insert_in_flush_rbt(&block->
page);
469 ut_ad(b->in_flush_list);
475 if (prev_b == NULL) {
479 prev_b, &block->
page);
482 incr_flush_list_size_in_bytes(block, buf_pool);
484 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
485 ut_a(buf_flush_validate_low(buf_pool));
507 ut_ad(bpage->in_LRU_list);
518 " InnoDB: Error: buffer block state %lu"
519 " in the LRU list!\n",
552 ut_ad(bpage->in_flush_list);
554 switch (flush_type) {
588 ut_ad(bpage->in_flush_list);
605 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
606 buf_LRU_insert_zip_clean(bpage);
615 if (UNIV_LIKELY_NULL(buf_pool->
flush_rbt)) {
616 buf_flush_delete_from_flush_rbt(bpage);
621 ut_d(bpage->in_flush_list = FALSE);
628 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
629 ut_a(buf_flush_validate_skip(buf_pool));
673 ut_ad(bpage->in_flush_list);
674 ut_ad(dpage->in_flush_list);
678 if (UNIV_LIKELY_NULL(buf_pool->
flush_rbt)) {
679 buf_flush_delete_from_flush_rbt(bpage);
680 prev_b = buf_flush_insert_in_flush_rbt(dpage);
685 ut_d(bpage->in_flush_list = FALSE);
691 ut_ad(prev->in_flush_list);
694 buf_pool->flush_list,
699 buf_pool->flush_list,
707 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
708 ut_a(buf_flush_validate_low(buf_pool));
731 buf_pool->
n_flush[flush_type]--;
736 if (buf_pool->
n_flush[flush_type] == 0
737 && buf_pool->
init_flush[flush_type] == FALSE) {
780 ib_uint32_t checksum = 0 ;
793 ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
802 memcpy(page_zip->
data, page, zip_size);
809 page_zip->
data, zip_size, newest_lsn);
815 fputs(
" InnoDB: ERROR: The compressed page to be written"
816 " seems corrupt:", stderr);
818 fputs(
"\nInnoDB: Possibly older version of the page:", stderr);
874 #ifndef UNIV_HOTBACKUP
882 buf_flush_write_block_low(
896 #ifdef UNIV_LOG_DEBUG
897 static ibool univ_log_debug_warned;
913 #ifdef UNIV_IBUF_COUNT_DEBUG
918 #ifdef UNIV_LOG_DEBUG
919 if (!univ_log_debug_warned) {
920 univ_log_debug_warned = TRUE;
921 fputs(
"Warning: cannot force log to disk if"
922 " UNIV_LOG_DEBUG is defined!\n"
923 "Crash recovery will not work!\n",
956 ? &bpage->
zip : NULL,
961 if (!srv_use_doublewrite_buf || !
buf_dblwr) {
965 zip_size ? zip_size : UNIV_PAGE_SIZE,
1005 ibool is_uncompressed;
1013 ut_ad(mutex_own(block_mutex));
1021 if (buf_pool->
n_flush[flush_type] == 0) {
1026 buf_pool->
n_flush[flush_type]++;
1031 switch (flush_type) {
1039 if (is_s_latched && is_uncompressed) {
1044 mutex_exit(block_mutex);
1053 if (!is_s_latched) {
1056 if (is_uncompressed) {
1077 if (is_uncompressed) {
1086 mutex_exit(block_mutex);
1100 if (buf_debug_prints) {
1102 "Flushing %u space %u page %u\n",
1106 buf_flush_write_block_low(bpage, flush_type, sync);
1109 # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
1142 buf_flush_check_neighbor(
1159 bpage = buf_page_hash_get(buf_pool, space, offset);
1176 mutex_enter(block_mutex);
1180 mutex_exit(block_mutex);
1192 buf_flush_try_neighbors(
1222 ulint buf_flush_area;
1228 low = (offset / buf_flush_area) * buf_flush_area;
1229 high = (offset / buf_flush_area + 1) * buf_flush_area;
1235 for (i = offset - 1;
1237 && buf_flush_check_neighbor(
1238 space, i, flush_type);
1245 for (i = offset + 1;
1247 && buf_flush_check_neighbor(
1248 space, i, flush_type);
1262 for (i = low; i < high; i++) {
1266 if ((count + n_flushed) >= n_to_flush) {
1286 bpage = buf_page_hash_get(buf_pool, space, i);
1304 mutex_enter(block_mutex);
1318 ut_ad(!mutex_own(block_mutex));
1323 mutex_exit(block_mutex);
1331 MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
1332 MONITOR_FLUSH_NEIGHBOR_COUNT,
1333 MONITOR_FLUSH_NEIGHBOR_PAGES,
1349 buf_flush_page_and_try_neighbors(
1362 ibool flushed = FALSE;
1370 mutex_enter(block_mutex);
1388 mutex_exit(block_mutex);
1391 *count += buf_flush_try_neighbors(space,
1400 mutex_exit(block_mutex);
1419 buf_free_from_unzip_LRU_list_batch(
1434 while (block != NULL && count < max
1458 MONITOR_LRU_BATCH_SCANNED,
1459 MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
1460 MONITOR_LRU_BATCH_SCANNED_PER_CALL,
1476 buf_flush_LRU_list_batch(
1491 while (bpage != NULL && count < max
1498 mutex_enter(block_mutex);
1500 mutex_exit(block_mutex);
1523 }
else if (buf_flush_page_and_try_neighbors(
1541 buf_lru_flush_page_count += count;
1547 MONITOR_LRU_BATCH_SCANNED,
1548 MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
1549 MONITOR_LRU_BATCH_SCANNED_PER_CALL,
1573 count += buf_free_from_unzip_LRU_list_batch(buf_pool, max);
1577 count += buf_flush_LRU_list_batch(buf_pool, max - count);
1591 buf_do_flush_list_batch(
1620 count < min_n && bpage != NULL && len > 0
1627 ut_ad(bpage->in_flush_list);
1637 buf_flush_page_and_try_neighbors(
1656 ut_ad(!bpage || bpage->in_flush_list);
1662 MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
1663 MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
1699 #ifdef UNIV_SYNC_DEBUG
1701 || sync_thread_levels_empty_except_dict());
1708 switch (flush_type) {
1710 count = buf_do_LRU_batch(buf_pool, min_n);
1713 count = buf_do_flush_list_batch(buf_pool, min_n, lsn_limit);
1722 if (buf_debug_prints && count > 0) {
1724 ?
"Flushed %lu pages in LRU flush\n"
1725 :
"Flushed %lu pages in flush list flush\n",
1747 if (buf_debug_prints && page_count > 0) {
1749 ?
"Flushed %lu pages in LRU flush\n"
1750 :
"Flushed %lu pages in flush list flush\n",
1751 (ulong) page_count);
1770 if (buf_pool->
n_flush[flush_type] > 0
1771 || buf_pool->
init_flush[flush_type] == TRUE) {
1803 if (buf_pool->
n_flush[flush_type] == 0) {
1825 if (buf_pool == NULL) {
1828 for (i = 0; i < srv_buf_pool_instances; ++
i) {
1834 os_event_wait(buf_pool->
no_flush[type]);
1839 os_event_wait(buf_pool->
no_flush[type]);
1873 page_count = buf_flush_batch(buf_pool,
BUF_FLUSH_LRU, min_n, 0);
1880 *n_processed = page_count;
1911 bool success =
true;
1917 if (min_n != ULINT_MAX) {
1922 min_n = (min_n + srv_buf_pool_instances - 1)
1923 / srv_buf_pool_instances;
1927 for (i = 0; i < srv_buf_pool_instances; i++) {
1929 ulint page_count = 0;
1949 page_count = buf_flush_batch(
1957 *n_processed += page_count;
1962 MONITOR_FLUSH_BATCH_TOTAL_PAGE,
1963 MONITOR_FLUSH_BATCH_COUNT,
1964 MONITOR_FLUSH_BATCH_PAGES,
1999 mutex_enter(block_mutex);
2006 mutex_exit(block_mutex);
2010 MONITOR_LRU_SINGLE_FLUSH_SCANNED,
2011 MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL,
2012 MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL,
2041 mutex_enter(block_mutex);
2043 mutex_exit(block_mutex);
2076 ulint total_flushed = 0;
2078 for (ulint i = 0; i < srv_buf_pool_instances; i++) {
2098 ulint n_flushed = 0;
2104 if (buf_flush_LRU(buf_pool,
2115 total_flushed += n_flushed;
2123 if (total_flushed) {
2125 MONITOR_LRU_BATCH_TOTAL_PAGE,
2126 MONITOR_LRU_BATCH_COUNT,
2127 MONITOR_LRU_BATCH_PAGES,
2131 return(total_flushed);
2141 for (ulint i = 0; i < srv_buf_pool_instances; i++) {
2165 page_cleaner_do_flush_batch(
2185 af_get_pct_for_dirty()
2190 ut_a(srv_max_dirty_pages_pct_lwm
2191 <= srv_max_buf_pool_modified_pct);
2193 if (srv_max_dirty_pages_pct_lwm == 0) {
2196 if (dirty_pct > srv_max_buf_pool_modified_pct) {
2202 }
else if (dirty_pct > srv_max_dirty_pages_pct_lwm) {
2204 return((dirty_pct * 100)
2205 / (srv_max_buf_pool_modified_pct + 1));
2220 lsn_t max_async_age;
2221 lsn_t lsn_age_factor;
2222 lsn_t af_lwm = (srv_adaptive_flushing_lwm
2223 * log_get_capacity()) / 100;
2230 max_async_age = log_get_max_modified_age_async();
2232 if (age < max_async_age && !srv_adaptive_flushing) {
2242 lsn_age_factor = (age * 100) / max_async_age;
2244 ut_ad(srv_max_io_capacity >= srv_io_capacity);
2245 return(static_cast<ulint>(
2246 ((srv_max_io_capacity / srv_io_capacity)
2247 * (lsn_age_factor * sqrt((
double)lsn_age_factor)))
2259 page_cleaner_flush_pages_if_needed(
void)
2262 static lsn_t lsn_avg_rate = 0;
2263 static lsn_t prev_lsn = 0;
2264 static lsn_t last_lsn = 0;
2265 static ulint sum_pages = 0;
2266 static ulint last_pages = 0;
2267 static ulint prev_pages = 0;
2268 static ulint avg_page_rate = 0;
2269 static ulint n_iterations = 0;
2275 ulint pct_for_dirty = 0;
2276 ulint pct_for_lsn = 0;
2277 ulint pct_total = 0;
2282 if (prev_lsn == 0) {
2288 if (prev_lsn == cur_lsn) {
2294 if (++n_iterations >= srv_flushing_avg_loops) {
2296 avg_page_rate = ((sum_pages / srv_flushing_avg_loops)
2297 + avg_page_rate) / 2;
2300 lsn_rate = (cur_lsn - prev_lsn) / srv_flushing_avg_loops;
2302 lsn_avg_rate = (lsn_avg_rate + lsn_rate) / 2;
2315 age = cur_lsn > oldest_lsn ? cur_lsn - oldest_lsn : 0;
2317 pct_for_dirty = af_get_pct_for_dirty();
2318 pct_for_lsn = af_get_pct_for_lsn(age);
2320 pct_total =
ut_max(pct_for_dirty, pct_for_lsn);
2324 n_pages = (PCT_IO(pct_total) + avg_page_rate) / 2;
2326 if (n_pages > srv_max_io_capacity) {
2327 n_pages = srv_max_io_capacity;
2330 if (last_pages && cur_lsn - last_lsn > lsn_avg_rate / 2) {
2331 age_factor = prev_pages / last_pages;
2334 MONITOR_SET(MONITOR_FLUSH_N_TO_FLUSH_REQUESTED, n_pages);
2336 prev_pages = n_pages;
2337 n_pages = page_cleaner_do_flush_batch(
2338 n_pages, oldest_lsn + lsn_avg_rate * (age_factor + 1));
2341 last_pages= n_pages + 1;
2343 MONITOR_SET(MONITOR_FLUSH_AVG_PAGE_RATE, avg_page_rate);
2344 MONITOR_SET(MONITOR_FLUSH_LSN_AVG_RATE, lsn_avg_rate);
2345 MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty);
2346 MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn);
2350 MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
2351 MONITOR_FLUSH_ADAPTIVE_COUNT,
2352 MONITOR_FLUSH_ADAPTIVE_PAGES,
2355 sum_pages += n_pages;
2366 page_cleaner_sleep_if_needed(
2368 ulint next_loop_time)
2373 if (next_loop_time > cur_time) {
2378 (next_loop_time - cur_time)
2387 extern "C" UNIV_INTERN
2391 void* arg __attribute__((unused)))
2396 ulint n_flushed = 0;
2401 #ifdef UNIV_PFS_THREAD
2402 pfs_register_thread(buf_page_cleaner_thread_key);
2405 #ifdef UNIV_DEBUG_THREAD_CREATION
2406 fprintf(stderr,
"InnoDB: page_cleaner thread running, id %lu\n",
2419 || n_flushed == 0) {
2420 page_cleaner_sleep_if_needed(next_loop_time);
2432 n_flushed += page_cleaner_flush_pages_if_needed();
2434 n_flushed = page_cleaner_do_flush_batch(
2440 MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
2441 MONITOR_FLUSH_BACKGROUND_COUNT,
2442 MONITOR_FLUSH_BACKGROUND_PAGES,
2468 n_flushed = page_cleaner_do_flush_batch(PCT_IO(100), LSN_MAX);
2471 if (n_flushed == 0) {
2498 }
while (!success || n_flushed > 0);
2503 for (ulint i = 0; i < srv_buf_pool_instances; i++) {
2517 OS_THREAD_DUMMY_RETURN;
2520 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2526 ut_a(elem->in_flush_list);
2535 buf_flush_validate_low(
2551 if (UNIV_LIKELY_NULL(buf_pool->
flush_rbt)) {
2555 while (bpage != NULL) {
2560 ut_ad(bpage->in_flush_list);
2572 if (UNIV_LIKELY_NULL(buf_pool->
flush_rbt)) {
2579 ut_a(*prpage == bpage);
2590 ut_a(rnode == NULL);
2608 ret = buf_flush_validate_low(buf_pool);
2624 buf_pool_get_dirty_pages_count(
2642 ut_ad(bpage->in_flush_list);
2661 buf_flush_get_dirty_pages_count(
2668 for (ulint i = 0; i < srv_buf_pool_instances; ++
i) {
2673 count += buf_pool_get_dirty_pages_count(buf_pool,
id);