polardbxengine/storage/innobase/clone/clone0snapshot.cc

970 lines
28 KiB
C++

/*****************************************************************************
Copyright (c) 2017, 2019, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Free Software Foundation.
This program is also distributed with certain software (including but not
limited to OpenSSL) that is licensed under separate terms, as designated in a
particular file or component or in included license documentation. The authors
of MySQL hereby grant you an additional permission to link the program and
your derivative works with the separately licensed software that they have
included with MySQL.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
/** @file clone/clone0snapshot.cc
Innodb physical Snaphot
*******************************************************/
#include "clone0snapshot.h"
#include "page0zip.h"
#include "sql/handler.h"
/** Snapshot heap initial size */
const uint SNAPSHOT_MEM_INITIAL_SIZE = 16 * 1024;
/** Number of clones that can attach to a snapshot. */
const uint MAX_CLONES_PER_SNAPSHOT = 1;
Clone_Snapshot::Clone_Snapshot(Clone_Handle_Type hdl_type,
Ha_clone_type clone_type, uint arr_idx,
ib_uint64_t snap_id)
: m_snapshot_handle_type(hdl_type),
m_snapshot_type(clone_type),
m_snapshot_id(snap_id),
m_snapshot_arr_idx(arr_idx),
m_allow_new_clone(true),
m_num_clones(),
m_num_clones_current(),
m_num_clones_next(),
m_snapshot_state(CLONE_SNAPSHOT_INIT),
m_snapshot_next_state(CLONE_SNAPSHOT_NONE),
m_num_current_chunks(),
m_max_file_name_len(),
m_num_data_files(),
m_num_data_chunks(),
m_data_bytes_disk(),
m_page_ctx(false),
m_num_pages(),
m_num_duplicate_pages(),
m_redo_ctx(),
m_redo_start_offset(),
m_redo_header(),
m_redo_header_size(),
m_redo_trailer(),
m_redo_trailer_size(),
m_redo_trailer_offset(),
m_redo_file_size(),
m_num_redo_files(),
m_num_redo_chunks(),
m_enable_pfs(false) {
mutex_create(LATCH_ID_CLONE_SNAPSHOT, &m_snapshot_mutex);
m_snapshot_heap = mem_heap_create(SNAPSHOT_MEM_INITIAL_SIZE);
m_chunk_size_pow2 = SNAPSHOT_DEF_CHUNK_SIZE_POW2;
m_block_size_pow2 = SNAPSHOT_DEF_BLOCK_SIZE_POW2;
}
Clone_Snapshot::~Clone_Snapshot() {
m_redo_ctx.release();
if (m_page_ctx.is_active()) {
m_page_ctx.stop(nullptr);
}
m_page_ctx.release();
mem_heap_free(m_snapshot_heap);
mutex_free(&m_snapshot_mutex);
}
void Clone_Snapshot::get_state_info(bool do_estimate,
Clone_Desc_State *state_desc) {
state_desc->m_state = m_snapshot_state;
state_desc->m_num_chunks = m_num_current_chunks;
state_desc->m_is_start = true;
state_desc->m_is_ack = false;
state_desc->m_estimate = 0;
state_desc->m_estimate_disk = 0;
if (do_estimate) {
state_desc->m_estimate = m_monitor.get_estimate();
state_desc->m_estimate_disk = m_data_bytes_disk;
}
switch (m_snapshot_state) {
case CLONE_SNAPSHOT_FILE_COPY:
state_desc->m_num_files = m_num_data_files;
break;
case CLONE_SNAPSHOT_PAGE_COPY:
state_desc->m_num_files = m_num_pages;
break;
case CLONE_SNAPSHOT_REDO_COPY:
state_desc->m_num_files = m_num_redo_files;
/* Minimum of two redo files need to be created. */
if (state_desc->m_num_files < 2) {
state_desc->m_num_files = 2;
}
break;
case CLONE_SNAPSHOT_DONE:
/* fall thorugh */
case CLONE_SNAPSHOT_INIT:
state_desc->m_num_files = 0;
break;
default:
ut_ad(false);
}
}
void Clone_Snapshot::set_state_info(Clone_Desc_State *state_desc) {
ut_ad(mutex_own(&m_snapshot_mutex));
ut_ad(state_desc->m_state == m_snapshot_state);
m_num_current_chunks = state_desc->m_num_chunks;
if (m_snapshot_state == CLONE_SNAPSHOT_FILE_COPY) {
m_num_data_files = state_desc->m_num_files;
m_num_data_chunks = state_desc->m_num_chunks;
m_data_bytes_disk = state_desc->m_estimate_disk;
m_data_file_vector.resize(m_num_data_files, nullptr);
m_monitor.init_state(srv_stage_clone_file_copy.m_key, m_enable_pfs);
m_monitor.add_estimate(state_desc->m_estimate);
m_monitor.change_phase();
} else if (m_snapshot_state == CLONE_SNAPSHOT_PAGE_COPY) {
m_num_pages = state_desc->m_num_files;
m_monitor.init_state(srv_stage_clone_page_copy.m_key, m_enable_pfs);
m_monitor.add_estimate(state_desc->m_estimate);
m_monitor.change_phase();
} else if (m_snapshot_state == CLONE_SNAPSHOT_REDO_COPY) {
m_num_redo_files = state_desc->m_num_files;
m_num_redo_chunks = state_desc->m_num_chunks;
m_redo_file_vector.resize(m_num_redo_files, nullptr);
m_monitor.init_state(srv_stage_clone_redo_copy.m_key, m_enable_pfs);
m_monitor.add_estimate(state_desc->m_estimate);
m_monitor.change_phase();
} else if (m_snapshot_state == CLONE_SNAPSHOT_DONE) {
ut_ad(m_num_current_chunks == 0);
m_monitor.init_state(PSI_NOT_INSTRUMENTED, m_enable_pfs);
} else {
ut_ad(false);
}
}
Snapshot_State Clone_Snapshot::get_next_state() {
Snapshot_State next_state;
ut_ad(m_snapshot_state != CLONE_SNAPSHOT_NONE);
if (m_snapshot_state == CLONE_SNAPSHOT_INIT) {
next_state = CLONE_SNAPSHOT_FILE_COPY;
} else if (m_snapshot_state == CLONE_SNAPSHOT_FILE_COPY) {
if (m_snapshot_type == HA_CLONE_HYBRID ||
m_snapshot_type == HA_CLONE_PAGE) {
next_state = CLONE_SNAPSHOT_PAGE_COPY;
} else if (m_snapshot_type == HA_CLONE_REDO) {
next_state = CLONE_SNAPSHOT_REDO_COPY;
} else {
ut_ad(m_snapshot_type == HA_CLONE_BLOCKING);
next_state = CLONE_SNAPSHOT_DONE;
}
} else if (m_snapshot_state == CLONE_SNAPSHOT_PAGE_COPY) {
next_state = CLONE_SNAPSHOT_REDO_COPY;
} else {
ut_ad(m_snapshot_state == CLONE_SNAPSHOT_REDO_COPY);
next_state = CLONE_SNAPSHOT_DONE;
}
return (next_state);
}
bool Clone_Snapshot::attach(Clone_Handle_Type hdl_type, bool pfs_monitor) {
bool ret = false;
mutex_enter(&m_snapshot_mutex);
if (m_num_clones == 0) {
m_enable_pfs = pfs_monitor;
}
if (m_allow_new_clone && hdl_type == m_snapshot_handle_type &&
m_num_clones < MAX_CLONES_PER_SNAPSHOT) {
++m_num_clones;
if (in_transit_state()) {
++m_num_clones_current;
}
ret = true;
}
mutex_exit(&m_snapshot_mutex);
return (ret);
}
uint Clone_Snapshot::detach() {
uint num_clones_left;
mutex_enter(&m_snapshot_mutex);
ut_ad(m_num_clones > 0);
if (in_transit_state()) {
--m_num_clones_current;
}
num_clones_left = --m_num_clones;
mutex_exit(&m_snapshot_mutex);
return (num_clones_left);
}
int Clone_Snapshot::change_state(Clone_Desc_State *state_desc,
Snapshot_State new_state, byte *temp_buffer,
uint temp_buffer_len, Clone_Alert_Func cbk,
uint &pending_clones) {
ut_ad(m_snapshot_state != CLONE_SNAPSHOT_NONE);
mutex_enter(&m_snapshot_mutex);
if (m_snapshot_state != CLONE_SNAPSHOT_INIT) {
m_allow_new_clone = false;
}
/* Initialize transition if not started yet by other clones. */
if (!in_transit_state()) {
m_num_clones_current = m_num_clones;
m_snapshot_next_state = new_state;
m_num_clones_next = 0;
}
/* Move clone over to next state */
--m_num_clones_current;
++m_num_clones_next;
pending_clones = m_num_clones_current;
/* Need to wait for other clones to move over. */
if (pending_clones > 0) {
mutex_exit(&m_snapshot_mutex);
return (0);
}
/* Last clone requesting the state change. All other clones have
already moved over to next state and waiting for the transition
to complete. Now it is safe to do the snapshot state transition. */
m_snapshot_state = m_snapshot_next_state;
m_snapshot_next_state = CLONE_SNAPSHOT_NONE;
m_num_clones_current = 0;
m_num_clones_next = 0;
/* Initialize the new state. */
auto err = init_state(state_desc, temp_buffer, temp_buffer_len, cbk);
mutex_exit(&m_snapshot_mutex);
return (err);
}
uint Clone_Snapshot::check_state(Snapshot_State new_state, bool exit_on_wait) {
uint pending_clones;
mutex_enter(&m_snapshot_mutex);
pending_clones = 0;
if (in_transit_state() && new_state == m_snapshot_next_state) {
pending_clones = m_num_clones_current;
}
if (pending_clones != 0 && exit_on_wait) {
++m_num_clones_current;
--m_num_clones_next;
}
mutex_exit(&m_snapshot_mutex);
return (pending_clones);
}
Clone_File_Meta *Clone_Snapshot::get_file_by_index(uint index) {
Clone_File_Meta *file_meta;
if (m_snapshot_state == CLONE_SNAPSHOT_FILE_COPY ||
m_snapshot_state == CLONE_SNAPSHOT_PAGE_COPY) {
ut_ad(index < m_num_data_files);
file_meta = m_data_file_vector[index];
} else if (m_snapshot_state == CLONE_SNAPSHOT_REDO_COPY) {
ut_ad(index < m_num_redo_files);
file_meta = m_redo_file_vector[index];
} else {
ut_ad(false);
file_meta = nullptr;
}
return (file_meta);
}
int Clone_Snapshot::iterate_files(File_Cbk_Func &&func) {
if (m_snapshot_state != CLONE_SNAPSHOT_FILE_COPY &&
m_snapshot_state != CLONE_SNAPSHOT_REDO_COPY) {
return (0);
}
auto &file_vector = (m_snapshot_state == CLONE_SNAPSHOT_FILE_COPY)
? m_data_file_vector
: m_redo_file_vector;
for (auto file_meta : file_vector) {
auto err = func(file_meta);
if (err != 0) {
return (err);
}
}
return (0);
}
int Clone_Snapshot::get_next_block(uint chunk_num, uint &block_num,
Clone_File_Meta *file_meta,
ib_uint64_t &data_offset, byte *&data_buf,
uint &data_size) {
uint64_t start_offset = 0;
uint start_index;
Clone_File_Meta *current_file;
/* File index for last chunk. This index value is always increasing
for a task. We skip all previous index while searching for new file. */
start_index = file_meta->m_file_index;
if (m_snapshot_state == CLONE_SNAPSHOT_PAGE_COPY) {
/* Copy the page from buffer pool. */
auto err = get_next_page(chunk_num, block_num, file_meta, data_offset,
data_buf, data_size);
return (err);
} else if (m_snapshot_state == CLONE_SNAPSHOT_FILE_COPY) {
/* Get file for the chunk. */
current_file =
get_file(m_data_file_vector, m_num_data_files, chunk_num, start_index);
} else {
/* For redo copy header and trailer are returned in buffer. */
ut_ad(m_snapshot_state == CLONE_SNAPSHOT_REDO_COPY);
if (chunk_num == (m_num_current_chunks - 1)) {
/* Last but one chunk is the redo header. */
if (block_num != 0) {
block_num = 0;
return (0);
}
++block_num;
current_file = m_redo_file_vector.front();
*file_meta = *current_file;
data_offset = 0;
data_buf = m_redo_header;
ut_ad(data_buf != nullptr);
data_size = m_redo_header_size;
return (0);
} else if (chunk_num == m_num_current_chunks) {
/* Last chunk is the redo trailer. */
if (block_num != 0 || m_redo_trailer_size == 0) {
block_num = 0;
return (0);
}
++block_num;
current_file = m_redo_file_vector.back();
*file_meta = *current_file;
data_offset = m_redo_trailer_offset;
data_buf = m_redo_trailer;
ut_ad(data_buf != nullptr);
data_size = m_redo_trailer_size;
return (0);
}
/* This is not header or trailer chunk. Need to get redo
data from archived file. */
current_file =
get_file(m_redo_file_vector, m_num_redo_files, chunk_num, start_index);
if (current_file->m_begin_chunk == 1) {
/* Set start offset for the first file. */
start_offset = m_redo_start_offset;
}
/* Dummy redo file entry. Need to send metadata. */
if (current_file->m_file_size == 0) {
if (block_num != 0) {
block_num = 0;
return (0);
}
++block_num;
*file_meta = *current_file;
data_buf = nullptr;
data_size = 0;
data_offset = 0;
return (0);
}
}
/* We have identified the file to transfer data at this point.
Get the data offset for next block to transfer. */
uint num_blocks;
data_buf = nullptr;
uint64_t file_chnuk_num = chunk_num - current_file->m_begin_chunk;
/* Offset in pages for current chunk. */
uint64_t chunk_offset = file_chnuk_num << m_chunk_size_pow2;
/* Find number of blocks in current chunk. */
if (chunk_num == current_file->m_end_chunk) {
/* If it is last chunk, we need to adjust the size. */
ib_uint64_t size_in_pages;
uint aligned_sz;
ut_ad(current_file->m_file_size >= start_offset);
size_in_pages = ut_uint64_align_up(current_file->m_file_size - start_offset,
UNIV_PAGE_SIZE);
size_in_pages /= UNIV_PAGE_SIZE;
ut_ad(size_in_pages >= chunk_offset);
size_in_pages -= chunk_offset;
aligned_sz = static_cast<uint>(size_in_pages);
ut_ad(aligned_sz == size_in_pages);
aligned_sz = ut_calc_align(aligned_sz, block_size());
num_blocks = aligned_sz >> m_block_size_pow2;
} else {
num_blocks = blocks_per_chunk();
}
/* Current block is the last one. No more blocks in current chunk. */
if (block_num == num_blocks) {
block_num = 0;
return (0);
}
ut_ad(block_num < num_blocks);
/* Calculate the offset of next block. */
ib_uint64_t block_offset;
block_offset = static_cast<ib_uint64_t>(block_num);
block_offset *= block_size();
data_offset = chunk_offset + block_offset;
data_size = block_size();
++block_num;
*file_meta = *current_file;
/* Convert offset and length in bytes. */
data_size *= UNIV_PAGE_SIZE;
data_offset *= UNIV_PAGE_SIZE;
data_offset += start_offset;
ut_ad(data_offset < current_file->m_file_size);
/* Adjust length for last block in last chunk. */
if (chunk_num == current_file->m_end_chunk && block_num == num_blocks) {
ut_ad((data_offset + data_size) >= current_file->m_file_size);
data_size = static_cast<uint>(current_file->m_file_size - data_offset);
}
#ifdef UNIV_DEBUG
if (m_snapshot_state == CLONE_SNAPSHOT_REDO_COPY) {
/* Current file is the last redo file */
if (current_file == m_redo_file_vector.back() && m_redo_trailer_size != 0) {
/* Should not exceed/overwrite the trailer */
ut_ad(data_offset + data_size <= m_redo_trailer_offset);
}
}
#endif /* UNIV_DEBUG */
return (0);
}
void Clone_Snapshot::update_block_size(uint buff_size) {
mutex_enter(&m_snapshot_mutex);
/* Transfer data block is used only for direct IO. */
if (m_snapshot_state != CLONE_SNAPSHOT_INIT || !srv_is_direct_io()) {
mutex_exit(&m_snapshot_mutex);
return;
}
/* Try to set block size bigger than the transfer buffer. */
while (buff_size > (block_size() * UNIV_PAGE_SIZE) &&
m_block_size_pow2 < SNAPSHOT_MAX_BLOCK_SIZE_POW2) {
++m_block_size_pow2;
}
mutex_exit(&m_snapshot_mutex);
}
int Clone_Snapshot::init_state(Clone_Desc_State *state_desc, byte *temp_buffer,
uint temp_buffer_len, Clone_Alert_Func cbk) {
int err = 0;
m_num_current_chunks = 0;
if (!is_copy()) {
err = init_apply_state(state_desc);
return (err);
}
switch (m_snapshot_state) {
case CLONE_SNAPSHOT_NONE:
case CLONE_SNAPSHOT_INIT:
ut_ad(false);
err = ER_INTERNAL_ERROR;
my_error(err, MYF(0), "Innodb Clone Snapshot Invalid state");
break;
case CLONE_SNAPSHOT_FILE_COPY:
ib::info(ER_IB_CLONE_OPERATION) << "Clone State BEGIN FILE COPY";
m_monitor.init_state(srv_stage_clone_file_copy.m_key, m_enable_pfs);
err = init_file_copy();
m_monitor.change_phase();
DEBUG_SYNC_C("clone_start_page_archiving");
DBUG_EXECUTE_IF("clone_crash_during_page_archiving", DBUG_SUICIDE(););
break;
case CLONE_SNAPSHOT_PAGE_COPY:
ib::info(ER_IB_CLONE_OPERATION) << "Clone State BEGIN PAGE COPY";
m_monitor.init_state(srv_stage_clone_page_copy.m_key, m_enable_pfs);
err = init_page_copy(temp_buffer, temp_buffer_len);
m_monitor.change_phase();
DEBUG_SYNC_C("clone_start_redo_archiving");
break;
case CLONE_SNAPSHOT_REDO_COPY:
ib::info(ER_IB_CLONE_OPERATION) << "Clone State BEGIN REDO COPY";
m_monitor.init_state(srv_stage_clone_redo_copy.m_key, m_enable_pfs);
err = init_redo_copy(cbk);
m_monitor.change_phase();
break;
case CLONE_SNAPSHOT_DONE:
ib::info(ER_IB_CLONE_OPERATION) << "Clone State DONE ";
m_monitor.init_state(PSI_NOT_INSTRUMENTED, m_enable_pfs);
m_redo_ctx.release();
break;
}
return (err);
}
Clone_File_Meta *Clone_Snapshot::get_file(Clone_File_Vec &file_vector,
uint num_files, uint chunk_num,
uint start_index) {
Clone_File_Meta *current_file = nullptr;
uint idx;
ut_ad(file_vector.size() >= num_files);
/* Scan through the file vector matching chunk number. */
for (idx = start_index; idx < num_files; idx++) {
current_file = file_vector[idx];
ut_ad(chunk_num >= current_file->m_begin_chunk);
if (chunk_num <= current_file->m_end_chunk) {
break;
}
}
ut_ad(idx < num_files);
return (current_file);
}
int Clone_Snapshot::get_next_page(uint chunk_num, uint &block_num,
Clone_File_Meta *file_meta,
ib_uint64_t &data_offset, byte *&data_buf,
uint &data_size) {
Clone_Page clone_page;
Clone_File_Meta *page_file;
uint page_index;
uint file_index;
ut_ad(data_size >= UNIV_PAGE_SIZE);
if (block_num == chunk_size()) {
block_num = 0;
return (0);
}
/* For "page copy", each block is a page. */
page_index = chunk_size() * (chunk_num - 1);
page_index += block_num;
/* For last chunk, actual number of blocks could be less
than chunk_size. */
if (page_index >= m_page_vector.size()) {
ut_ad(page_index == m_page_vector.size());
block_num = 0;
return (0);
}
clone_page = m_page_vector[page_index];
++block_num;
/* Get the data file for current page. */
bool found;
const page_size_t &page_size =
fil_space_get_page_size(clone_page.m_space_id, &found);
ut_ad(found);
file_index = m_data_file_map[clone_page.m_space_id];
ut_ad(file_index > 0);
--file_index;
page_file = m_data_file_vector[file_index];
ut_ad(page_file->m_space_id == clone_page.m_space_id);
/* Data offset could be beyond 32 BIT integer. */
data_offset = static_cast<ib_uint64_t>(clone_page.m_page_no);
data_offset *= page_size.physical();
/* Check if the page belongs to other nodes of the tablespace. */
while (m_num_data_files > file_index + 1) {
Clone_File_Meta *page_file_next;
page_file_next = m_data_file_vector[file_index + 1];
/* Next node belongs to same tablespace and data offset
exceeds current node size */
if (page_file_next->m_space_id == clone_page.m_space_id &&
data_offset >= page_file->m_file_size) {
data_offset -= page_file->m_file_size;
file_index++;
page_file = m_data_file_vector[file_index];
} else {
break;
}
}
*file_meta = *page_file;
/* Get page from buffer pool. */
page_id_t page_id(clone_page.m_space_id, clone_page.m_page_no);
auto err =
get_page_for_write(page_id, page_size, file_meta, data_buf, data_size);
/* Update size from space header page. */
if (clone_page.m_page_no == 0) {
auto space_size = fsp_header_get_field(data_buf, FSP_SIZE);
auto size_bytes = static_cast<uint64_t>(space_size);
size_bytes *= page_size.physical();
if (file_meta->m_file_size < size_bytes) {
file_meta->m_file_size = size_bytes;
}
}
return (err);
}
bool Clone_Snapshot::encrypt_key_in_log_header(byte *log_header,
uint32_t header_len) {
byte encryption_key[ENCRYPTION_KEY_LEN];
byte encryption_iv[ENCRYPTION_KEY_LEN];
size_t offset = LOG_ENCRYPTION + LOG_HEADER_CREATOR_END;
ut_a(offset + ENCRYPTION_INFO_SIZE <= header_len);
auto encryption_info = log_header + offset;
/* Get log Encryption Key and IV. */
auto success = Encryption::decode_encryption_info(
&encryption_key[0], &encryption_iv[0], encryption_info, false);
if (success) {
/* Encrypt with master key and fill encryption information. */
success = Encryption::fill_encryption_info(
&encryption_key[0], &encryption_iv[0], encryption_info, false, true);
}
return (success);
}
bool Clone_Snapshot::encrypt_key_in_header(const page_size_t &page_size,
byte *page_data) {
byte encryption_key[ENCRYPTION_KEY_LEN];
byte encryption_iv[ENCRYPTION_KEY_LEN];
auto offset = fsp_header_get_encryption_offset(page_size);
ut_ad(offset != 0 && offset + ENCRYPTION_INFO_SIZE <= UNIV_PAGE_SIZE);
auto encryption_info = page_data + offset;
/* Get tablespace Encryption Key and IV. */
auto success = Encryption::decode_encryption_info(
&encryption_key[0], &encryption_iv[0], encryption_info, false);
if (!success) {
return (false);
}
/* Encrypt with master key and fill encryption information. */
success = Encryption::fill_encryption_info(
&encryption_key[0], &encryption_iv[0], encryption_info, false, true);
if (!success) {
return (false);
}
const auto frame_lsn =
static_cast<lsn_t>(mach_read_from_8(page_data + FIL_PAGE_LSN));
/* Update page checksum */
page_update_for_flush(page_size, frame_lsn, page_data);
return (true);
}
void Clone_Snapshot::decrypt_key_in_header(fil_space_t *space,
const page_size_t &page_size,
byte *&page_data) {
byte encryption_info[ENCRYPTION_INFO_SIZE];
/* Get tablespace encryption information. */
Encryption::fill_encryption_info(space->encryption_key, space->encryption_iv,
encryption_info, false, false);
/* Set encryption information in page. */
auto offset = fsp_header_get_encryption_offset(page_size);
ut_ad(offset != 0 && offset < UNIV_PAGE_SIZE);
memcpy(page_data + offset, encryption_info, sizeof(encryption_info));
}
void Clone_Snapshot::page_update_for_flush(const page_size_t &page_size,
lsn_t page_lsn, byte *&page_data) {
/* For compressed table, must copy the compressed page. */
if (page_size.is_compressed()) {
page_zip_des_t page_zip;
auto data_size = page_size.physical();
page_zip_set_size(&page_zip, data_size);
page_zip.data = page_data;
#ifdef UNIV_DEBUG
page_zip.m_start =
#endif /* UNIV_DEBUG */
page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0;
buf_flush_init_for_writing(nullptr, page_data, &page_zip, page_lsn, false,
false);
} else {
buf_flush_init_for_writing(nullptr, page_data, nullptr, page_lsn, false,
false);
}
}
int Clone_Snapshot::get_page_for_write(const page_id_t &page_id,
const page_size_t &page_size,
Clone_File_Meta *file_meta,
byte *&page_data, uint &data_size) {
auto space = fil_space_get(page_id.space());
mtr_t mtr;
mtr_start(&mtr);
ut_ad(data_size >= 2 * page_size.physical());
data_size = page_size.physical();
/* Space header page is modified with SX latch while extending. Also,
we would like to serialize with page flush to disk. */
auto block =
buf_page_get_gen(page_id, page_size, RW_SX_LATCH, nullptr,
Page_fetch::POSSIBLY_FREED, __FILE__, __LINE__, &mtr);
auto bpage = &block->page;
buf_page_mutex_enter(block);
ut_ad(!fsp_is_checksum_disabled(bpage->id.space()));
/* Get oldest and newest page modification LSN for dirty page. */
auto oldest_lsn = bpage->oldest_modification;
auto newest_lsn = bpage->newest_modification;
buf_page_mutex_exit(block);
bool page_is_dirty = (oldest_lsn > 0);
byte *src_data;
if (bpage->zip.data != nullptr) {
ut_ad(bpage->size.is_compressed());
/* If the page is not dirty, then zip descriptor always has the latest
flushed page copy with LSN and checksum set properly. If the page is
dirty, the latest modified page is in uncompressed form for uncompressed
page types. The LSN in such case is to be taken from block newest LSN and
checksum needs to be recalculated. */
if (page_is_dirty && page_is_uncompressed_type(block->frame)) {
src_data = block->frame;
} else {
src_data = bpage->zip.data;
}
} else {
ut_ad(!bpage->size.is_compressed());
src_data = block->frame;
}
memcpy(page_data, src_data, data_size);
auto cur_lsn = log_get_lsn(*log_sys);
const auto frame_lsn =
static_cast<lsn_t>(mach_read_from_8(page_data + FIL_PAGE_LSN));
/* First page of a encrypted tablespace. */
if (space->encryption_type != Encryption::NONE && page_id.page_no() == 0) {
/* Update unencrypted tablespace key in page 0 to be send over
SSL connection. */
decrypt_key_in_header(space, page_size, page_data);
/* Force to recalculate the checksum if the page is not dirty. */
if (!page_is_dirty) {
page_is_dirty = true;
newest_lsn = frame_lsn;
}
}
/* If the page is not dirty but frame LSN is zero, it could be half
initialized page left from incomplete operation. Assign valid LSN and checksum
before copy. */
if (frame_lsn == 0 && oldest_lsn == 0) {
page_is_dirty = true;
newest_lsn = cur_lsn;
}
/* If page is dirty, we need to set checksum and page LSN. */
if (page_is_dirty) {
ut_ad(newest_lsn > 0);
page_update_for_flush(page_size, newest_lsn, page_data);
}
BlockReporter reporter(false, page_data, page_size, false);
const auto page_lsn =
static_cast<lsn_t>(mach_read_from_8(page_data + FIL_PAGE_LSN));
const auto page_checksum = static_cast<uint32_t>(
mach_read_from_4(page_data + FIL_PAGE_SPACE_OR_CHKSUM));
int err = 0;
if (reporter.is_corrupted() || page_lsn > cur_lsn ||
(page_checksum != 0 && page_lsn == 0)) {
ut_ad(false);
my_error(ER_INTERNAL_ERROR, MYF(0), "Innodb Clone Corrupt Page");
err = ER_INTERNAL_ERROR;
}
auto encrypted_data = page_data + data_size;
/* Data length could be less for compressed page */
auto data_len = data_size;
/* Do transparent page compression if needed. */
if (page_id.page_no() != 0 && file_meta->m_punch_hole &&
space->compression_type != Compression::NONE) {
auto compressed_data = page_data + data_size;
memset(compressed_data, 0, data_size);
IORequest request(IORequest::WRITE);
request.compression_algorithm(space->compression_type);
ulint compressed_len = 0;
auto buf_ptr = os_file_compress_page(
request.compression_algorithm(), file_meta->m_fsblk_size, page_data,
data_size, compressed_data, &compressed_len);
if (buf_ptr != page_data) {
encrypted_data = page_data;
page_data = compressed_data;
data_len = static_cast<uint>(compressed_len);
}
}
IORequest request(IORequest::WRITE);
fil_io_set_encryption(request, page_id, space);
/* Encrypt page if TDE is enabled. */
if (err == 0 && request.is_encrypted()) {
Encryption encryption(request.encryption_algorithm());
ulint encrypt_len = data_len;
memset(encrypted_data, 0, data_size);
auto ret_data = encryption.encrypt(request, page_data, data_len,
encrypted_data, &encrypt_len);
if (ret_data != page_data) {
page_data = encrypted_data;
data_len = static_cast<uint>(encrypt_len);
}
}
mtr_commit(&mtr);
return (err);
}