433 lines
17 KiB
Python
433 lines
17 KiB
Python
# Copyright 2021 Alibaba Group Holding Limited.
|
||
#
|
||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
# you may not use this file except in compliance with the License.
|
||
# You may obtain a copy of the License at
|
||
#
|
||
# http://www.apache.org/licenses/LICENSE-2.0
|
||
#
|
||
# Unless required by applicable law or agreed to in writing, software
|
||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
# See the License for the specific language governing permissions and
|
||
# limitations under the License.
|
||
|
||
import collections
|
||
import configparser
|
||
import fcntl
|
||
import json
|
||
import os
|
||
import shutil
|
||
import subprocess
|
||
import sys
|
||
import time
|
||
import pymysql as mysql
|
||
|
||
import click
|
||
from core.context import Context
|
||
from core.log import LogFactory
|
||
from core.convention import *
|
||
from core.context.mycnf_renderer import MycnfRenderer
|
||
from core.backup_restore.storage.filestream_client import FileStreamClient, BackupStorage
|
||
from core.backup_restore.utils import check_run_process
|
||
import wget
|
||
import requests
|
||
|
||
RESTORE_TEMP_DIR = "/data/mysql/restore"
|
||
CONN_TIMEOUT = 30
|
||
INTERNAL_MARK = '/* rds internal mark */ '
|
||
|
||
|
||
@click.group(name="restore")
|
||
def restore_group():
|
||
pass
|
||
|
||
|
||
@click.command(name='start')
|
||
@click.option('--restore_context', required=True, type=str)
|
||
def start(restore_context):
|
||
logger = LogFactory.get_logger("restore.log")
|
||
|
||
with open(restore_context, 'r') as f:
|
||
params = json.load(f)
|
||
commit_index = params["backupCommitIndex"]
|
||
backup_file_path = params["backupFilePath"]
|
||
binlog_dir_path = params["binlogDirPath"]
|
||
storage_name = params["storageName"]
|
||
sink = params["sink"]
|
||
pitr_endpoint = params["pitrEndpoint"] if "pitrEndpoint" in params else ""
|
||
pitr_xstore = params["pitrXStore"] if "pitrXStore" in params else ""
|
||
|
||
logger.info('start restore: backup_file_path=%s' % backup_file_path)
|
||
|
||
context = Context()
|
||
node_role = context.node_role()
|
||
if node_role != NODE_ROLE_CANDIDATE and node_role != NODE_ROLE_LEARNER:
|
||
logger.info("pod role is %s, no need to download backup." % node_role)
|
||
return
|
||
|
||
filestream_client = FileStreamClient(context, BackupStorage[str.upper(storage_name)], sink)
|
||
|
||
mkdir_needed(context)
|
||
|
||
backup_file_name = backup_file_path.split("/")[-1]
|
||
|
||
download_backup_file(backup_file_path, backup_file_name, filestream_client, logger)
|
||
|
||
decompress_backup_file(backup_file_name, context, logger)
|
||
|
||
initialize_local_mycnf(context, logger)
|
||
|
||
apply_backup_file(context, logger)
|
||
|
||
mysql_bin_list = download_binlogbackup_file(binlog_dir_path, filestream_client, logger) if len(
|
||
pitr_endpoint) == 0 else download_pitr_binloglist(context, pitr_endpoint, pitr_xstore, logger)
|
||
|
||
copy_binlog_to_new_path(mysql_bin_list, context, logger)
|
||
|
||
cluster_start_index = get_xtrabackup_binlog_info_from_instance_local(context)
|
||
logger.info("cluster_start_index is: %s" % cluster_start_index)
|
||
|
||
chown_data_dir(context, logger)
|
||
|
||
last_binlog, first_binlog = show_last_and_first_binlog(context, logger)
|
||
|
||
end_index, end_term = xdb_show_binlog_index(last_binlog, context, logger)
|
||
logger.info("end_index:%s;end_term:%s" % (end_index, end_term))
|
||
|
||
init_mysqld_metadata(cluster_start_index, commit_index, context, end_term, node_role, logger)
|
||
|
||
p = subprocess.Popen([
|
||
os.path.join(context.engine_home, 'bin', 'mysqld'),
|
||
"--defaults-file=" + context.mycnf_path,
|
||
"--user=mysql"
|
||
], stdout=sys.stdout)
|
||
|
||
wait_binlog_apply_ready(context.port_access(), end_index, logger)
|
||
|
||
p.kill()
|
||
p.wait()
|
||
|
||
sync_cluster_metadata(context, logger)
|
||
|
||
context.mark_node_initialized()
|
||
|
||
|
||
def mkdir_needed(context):
|
||
if not os.path.exists(RESTORE_TEMP_DIR):
|
||
os.mkdir(RESTORE_TEMP_DIR)
|
||
if not os.path.exists(context.volume_path(VOLUME_DATA, "data")):
|
||
os.mkdir(context.volume_path(VOLUME_DATA, "data"))
|
||
if not os.path.exists(context.volume_path(VOLUME_DATA, "log")):
|
||
os.mkdir(context.volume_path(VOLUME_DATA, "log"))
|
||
if not os.path.exists(context.volume_path(VOLUME_DATA, "tmp")):
|
||
os.mkdir(context.volume_path(VOLUME_DATA, "tmp"))
|
||
if not os.path.exists(context.volume_path(VOLUME_DATA, "run")):
|
||
os.mkdir(context.volume_path(VOLUME_DATA, "run"))
|
||
shutil.chown(context.volume_path(VOLUME_DATA, "data"), "mysql", "mysql")
|
||
shutil.chown(context.volume_path(VOLUME_DATA, "log"), "mysql", "mysql")
|
||
shutil.chown(context.volume_path(VOLUME_DATA, "tmp"), "mysql", "mysql")
|
||
shutil.chown(context.volume_path(VOLUME_DATA, "run"), "mysql", "mysql")
|
||
|
||
|
||
def download_backup_file(backup_file_path, backup_file_name, filestream_client, logger):
|
||
backup_stream_file = os.path.join(RESTORE_TEMP_DIR, backup_file_name)
|
||
filestream_client.download_to_file(remote=backup_file_path, local=backup_stream_file, logger=logger)
|
||
logger.info("backup file downloaded!")
|
||
|
||
|
||
def download_binlogbackup_file(binlog_dir_path, filestream_client, logger):
|
||
binlog_list_path = os.path.join(RESTORE_TEMP_DIR, "binlog_list")
|
||
filestream_client.download_to_file(remote=os.path.join(binlog_dir_path, "binlog_list"), local=binlog_list_path,
|
||
logger=logger)
|
||
with open(binlog_list_path, 'r') as f:
|
||
mysql_binlog_list = f.read().splitlines()
|
||
for binlog in mysql_binlog_list:
|
||
filestream_client.download_to_file(remote=os.path.join(binlog_dir_path, binlog),
|
||
local=os.path.join(RESTORE_TEMP_DIR, binlog), logger=logger)
|
||
logger.info("binlog backup file download")
|
||
logger.info("mysql_binlog_list:%s" % mysql_binlog_list)
|
||
return mysql_binlog_list
|
||
|
||
|
||
def download_pitr_binloglist(context, pitrEndpoint, xstore, logger):
|
||
binlogListUrl = "/".join([pitrEndpoint, "binlogs"]) + ("?xstore=%s" % xstore)
|
||
response = requests.get(binlogListUrl)
|
||
mysql_binlog_list = []
|
||
if response.status_code == 200:
|
||
logger.info("binlogs http response %s" % response.content)
|
||
binlogs = json.loads(response.content)
|
||
for binlog in binlogs:
|
||
mysql_binlog_list.append(binlog['filename'])
|
||
else:
|
||
raise Exception("failed to get binlogs url = %s" % binlogListUrl)
|
||
for binlog in mysql_binlog_list:
|
||
downloadUrl = "/".join([pitrEndpoint, "download", "binlog"]) + ("?xstore=%s&only_meta=true" % xstore) + "&" + (
|
||
"filename=%s" % binlog)
|
||
response = requests.get(downloadUrl)
|
||
if response.status_code == 200:
|
||
binlog_datasource = response.content.decode("utf-8")
|
||
cmd = " ".join(
|
||
[os.path.join("/tools/xstore/current/bin", "polardbx-job"), "-job-type=PitrDownloadFile",
|
||
"-output=" + os.path.join(RESTORE_TEMP_DIR, binlog), "-binlog-source='%s'" % binlog_datasource])
|
||
logger.info("binlog_datasource %s" % response.content)
|
||
p = subprocess.Popen(cmd, shell=True, stdout=sys.stdout)
|
||
p.wait()
|
||
|
||
if p.returncode > 0:
|
||
raise Exception("failed to get download binlog url = %s " % downloadUrl)
|
||
# logger.info("apply backup")
|
||
else:
|
||
raise Exception("failed to get download binlog url = %s" % downloadUrl)
|
||
return mysql_binlog_list
|
||
|
||
|
||
def copy_binlog_to_new_path(mysql_bin_list, context, logger):
|
||
# copy backup binlog to new binlog path
|
||
log_dir = context.volume_path(VOLUME_DATA, "log")
|
||
index_file = os.path.join(log_dir, "mysql_bin.index")
|
||
with open(index_file, 'w+') as f:
|
||
for binlog in mysql_bin_list:
|
||
binlog_file_path = os.path.join(RESTORE_TEMP_DIR, binlog)
|
||
new_binlog_path = os.path.join(log_dir, binlog)
|
||
shutil.copy(binlog_file_path, new_binlog_path)
|
||
shutil.chown(new_binlog_path, "mysql", "mysql")
|
||
f.write(new_binlog_path)
|
||
f.write('\n')
|
||
logger.info("binlog_file_path:%s;new_binlog_path:%s" % (binlog_file_path, new_binlog_path))
|
||
shutil.chown(index_file, "mysql", "mysql")
|
||
logger.info("copy binlog to log_path")
|
||
|
||
|
||
def decompress_backup_file(backup_file_name, context, logger):
|
||
decompress_cmd = "%s/xbstream -x < %s -C %s" % (
|
||
context.xtrabackup_home, os.path.join(RESTORE_TEMP_DIR, backup_file_name),
|
||
context.volume_path(VOLUME_DATA, "data"))
|
||
logger.info("decompress_cmd:%s" % decompress_cmd)
|
||
with subprocess.Popen(decompress_cmd, shell=True, stdout=sys.stdout):
|
||
logger.info("decompress!")
|
||
|
||
|
||
def sort_config(config: configparser.ConfigParser) -> configparser.ConfigParser:
|
||
for section in config._sections:
|
||
# noinspection PyUnresolvedReferences
|
||
config._sections[section] = collections.OrderedDict(
|
||
sorted(config._sections[section].items(), key=lambda t: t[0]))
|
||
config._sections = collections.OrderedDict(sorted(config._sections.items(), key=lambda t: t[0]))
|
||
return config
|
||
|
||
|
||
def initialize_local_mycnf(context: Context, logger):
|
||
indicate = context.current_indicate()
|
||
force = indicate and indicate.reset_config
|
||
if not os.path.exists(context.mysql_conf):
|
||
os.mkdir(context.mysql_conf)
|
||
|
||
with open(context.mycnf_path, 'w') as mycnf_file:
|
||
# Acquire the file lock
|
||
fcntl.flock(mycnf_file.fileno(), fcntl.LOCK_EX)
|
||
|
||
# Render and write.
|
||
if force or not os.path.exists(context.mycnf_override_path):
|
||
override_config = context.mycnf_override_config()
|
||
with open(context.mycnf_override_path, 'w') as f:
|
||
override_config = sort_config(override_config)
|
||
override_config.write(fp=f)
|
||
|
||
override_config = configparser.ConfigParser(allow_no_value=True)
|
||
override_config.read(context.mycnf_override_path)
|
||
|
||
overrides = [context.mycnf_system_config(), override_config]
|
||
if os.path.exists(context.file_config_override):
|
||
# override file has the highest priority
|
||
override_file_config = configparser.ConfigParser(allow_no_value=True)
|
||
override_file_config.read(context.file_config_override)
|
||
overrides += [override_file_config]
|
||
|
||
r = MycnfRenderer(context.mycnf_template_path)
|
||
r.render(extras=overrides, fp=mycnf_file)
|
||
|
||
# Release the lock
|
||
fcntl.flock(mycnf_file.fileno(), fcntl.LOCK_UN)
|
||
logger.info("local mycnf initialized!")
|
||
|
||
|
||
def apply_backup_file(context, logger):
|
||
# 应用全量备份集
|
||
apply_backup_cmd = ""
|
||
if context.is_galaxy80():
|
||
apply_backup_cmd = "%s --defaults-file=%s --prepare --target-dir=%s 2> %s/applybackup.log" \
|
||
% (context.xtrabackup, context.mycnf_path, context.volume_path(VOLUME_DATA, 'data'),
|
||
context.volume_path(VOLUME_DATA, "log"))
|
||
elif context.is_xcluster57():
|
||
apply_backup_cmd = "%s --defaults-file=%s --apply-log %s 2> %s/applybackup.log" \
|
||
% (context.xtrabackup, context.mycnf_path, context.volume_path(VOLUME_DATA, 'data'),
|
||
context.volume_path(VOLUME_DATA, "log"))
|
||
logger.info("apply_backup_cmd:%s" % apply_backup_cmd)
|
||
with subprocess.Popen(apply_backup_cmd, shell=True, stdout=sys.stdout):
|
||
logger.info("apply backup")
|
||
|
||
|
||
def chown_data_dir(context, logger):
|
||
check_run_process(['chown', '-R', 'mysql:mysql', context.volume_path(VOLUME_DATA, "data")], logger=logger)
|
||
|
||
|
||
def init_mysqld_metadata(cluster_start_index, commit_index, context, end_term, pod_role, logger):
|
||
if pod_role == NODE_ROLE_VOTER:
|
||
cluster_start_index = commit_index
|
||
if cluster_start_index is None:
|
||
raise Exception("can NOT get logger commit index")
|
||
logger.info("got logger commit index :%s" % cluster_start_index)
|
||
|
||
# mysqld 元数据初始化
|
||
init_metadata_cmd = [os.path.join(context.engine_home, 'bin', 'mysqld'),
|
||
"--defaults-file=" + context.mycnf_path,
|
||
"--cluster-current-term=" + str(end_term),
|
||
"--cluster-info=" + context.xcluster_info_argument(local=True),
|
||
"--cluster-force-change-meta=ON",
|
||
"--cluster-force-single-mode=ON",
|
||
"--loose-cluster-force-recover-index=" + str(cluster_start_index),
|
||
"--cluster-start-index=" + str(cluster_start_index)
|
||
]
|
||
logger.info("init_metadata_cmd" + str(init_metadata_cmd))
|
||
check_run_process(init_metadata_cmd, logger=logger)
|
||
|
||
|
||
# 同步集群元数据
|
||
def sync_cluster_metadata(context, logger):
|
||
sync_metadata_cmd = [os.path.join(context.engine_home, 'bin', 'mysqld'),
|
||
"--defaults-file=" + context.mycnf_path,
|
||
"--cluster-info=" + context.xcluster_info_argument(name_from_env=True),
|
||
"--cluster-force-change-meta=ON"
|
||
]
|
||
logger.info("sync_metadata_cmd" + str(sync_metadata_cmd))
|
||
check_run_process(sync_metadata_cmd, logger=logger)
|
||
|
||
|
||
def get_xtrabackup_binlog_info_from_instance_local(context: Context):
|
||
data_dir = context.volume_path(VOLUME_DATA, "data")
|
||
xtrabackup_binlog_info_path = os.path.join(data_dir, "xtrabackup_binlog_info")
|
||
if not os.path.exists(xtrabackup_binlog_info_path):
|
||
return None
|
||
with open(xtrabackup_binlog_info_path, "r") as f:
|
||
binlog_info = f.read().strip()
|
||
if binlog_info:
|
||
str_list = binlog_info.split()
|
||
return str_list[1] if len(str_list) >= 2 else str_list[0]
|
||
|
||
|
||
def wait_binlog_apply_ready(mysql_port, end_log_index, logger):
|
||
timeout = 48 * 60 * 60
|
||
deadline = time.time() + timeout
|
||
while time.time() < deadline:
|
||
logger.info("wait applying binlog")
|
||
try:
|
||
time.sleep(10)
|
||
if check_binlog_apply_index_status(mysql_port, end_log_index, logger):
|
||
return
|
||
except Exception as e:
|
||
logger.info(e)
|
||
raise TimeoutError("binlog apply timeout!")
|
||
|
||
|
||
def check_binlog_apply_index_status(mysql_port, end_log_index, logger):
|
||
sql_list = "select * from information_schema.alisql_cluster_local"
|
||
logger.info("Execute SQL: %s" % sql_list)
|
||
|
||
stat, output = execute_mysqlcmd(mysql_port, sql_list, db='information_schema')
|
||
logger.info("get local node info: %s, %s", stat, output)
|
||
if not output:
|
||
raise Exception("can not get xdb full health info")
|
||
|
||
rows = output.split("\n")
|
||
|
||
for row in rows:
|
||
columns = row.split("\t")
|
||
logger.info("columns: %s" % columns)
|
||
logger.info("last apply index: %s" % columns[-3])
|
||
logger.info("end_log_index: %s" % end_log_index)
|
||
if int(columns[-3]) < int(end_log_index):
|
||
return False
|
||
return True
|
||
|
||
|
||
def execute_mysqlcmd(port, cmd, db=None, host='127.0.0.1', user='root', autocommit=False, **kwargs):
|
||
kwargs_base = {
|
||
'db': db,
|
||
'init_command': '',
|
||
'connect_timeout': CONN_TIMEOUT
|
||
}
|
||
kwargs_base.update(kwargs)
|
||
new_kwargs = dict([(k, v) for k, v in kwargs_base.items() if v])
|
||
result = []
|
||
conn = None
|
||
try:
|
||
conn = mysql.connect(host=host, port=int(port), user=user, passwd='', **new_kwargs)
|
||
if isinstance(cmd, str):
|
||
cmd = [cmd]
|
||
row_count = 0
|
||
if autocommit:
|
||
# autocommit default false, unless specify
|
||
conn.autocommit(autocommit)
|
||
cursor = conn.cursor()
|
||
for c in cmd:
|
||
sql = INTERNAL_MARK + c
|
||
row_count += cursor.execute(sql)
|
||
rows = cursor.fetchall()
|
||
|
||
for row in rows:
|
||
row_str = '\t'.join([item.strip("'") for item in map(conn.literal, row)])
|
||
result.append(row_str)
|
||
conn.commit()
|
||
result_str = '\n'.join(result)
|
||
except Exception as e:
|
||
return 1, str(e)
|
||
finally:
|
||
if conn:
|
||
conn.close()
|
||
return 0, result_str
|
||
|
||
|
||
def show_last_and_first_binlog(context, logger):
|
||
log_dir = context.volume_path(VOLUME_DATA, "log")
|
||
index_file = os.path.join(log_dir, "mysql_bin.index")
|
||
last_file_index = -1
|
||
first_file_index = 1000000000
|
||
with open(index_file, "r") as f:
|
||
for text_line in f.readlines():
|
||
last_file_index = max(int(text_line.split('.')[-1]), last_file_index)
|
||
first_file_index = min(int(text_line.split('.')[-1]), first_file_index)
|
||
last_binlog = "mysql_bin.%06d" % last_file_index
|
||
first_binlog = "mysql_bin.%06d" % first_file_index
|
||
last_binlog = os.path.join(log_dir, last_binlog)
|
||
first_binlog = os.path.join(log_dir, first_binlog)
|
||
logger.info("get last binlog:%s;first binlog:%s" % (last_binlog, first_binlog))
|
||
return last_binlog, first_binlog
|
||
|
||
|
||
def xdb_show_binlog_index(binlog_path, context, logger):
|
||
"""
|
||
../bin/mysqlbinlogtailor --show-index-info mysql-bin.xxxxxx,
|
||
输出格式为 [start_index:start_term, end_index:end_term]
|
||
:param binlog_path:
|
||
:return:
|
||
"""
|
||
cmd = [context.mysqlbinlogtailor,
|
||
"--show-index-info",
|
||
binlog_path
|
||
]
|
||
|
||
logger.info("show_binlog_cmd:%s" % cmd)
|
||
with subprocess.Popen(cmd, stdout=subprocess.PIPE) as proc:
|
||
index_info = proc.stdout.read().decode('utf-8')
|
||
logger.info("xdb_show_binlog_index out" + index_info)
|
||
|
||
temp = index_info.strip().strip('[[]]').replace(' ', '')
|
||
end_index = temp.split(',')[1].split(':')[0]
|
||
end_term = temp.split(',')[1].split(':')[1]
|
||
logger.info("end_index:%s;end_term:%s" % (end_index, end_term))
|
||
return end_index, end_term
|
||
|
||
|
||
restore_group.add_command(start)
|