polardbxengine/mysql-test/suite/group_replication/t/gr_suspect_member_resumes.test

172 lines
6.6 KiB
Plaintext

#######################################################################################
# WL11570 - GR: options to defer member eviction after a suspicion
#
# In a group of 3 servers, we suspend one of them for 10 seconds and test if the
# non-responding server remains in the group and for the configured waiting time,
# instead of being immediately expelled.
# Test :
# 0. This test requires a group with 3 servers
# 1. Set the group_replication_member_expel_timeout parameter to 300 seconds
# 2. Create table t1 and insert some data, checking that it is replicated to all
# servers.
# 3. Suspend server 3 by sending a signal SIGSTOP to it.
# This will make server 3 not answer to "I am alive" GCS messages and it will
# eventually be considered faulty.
# 4. Check that all members are still in the group on servers 1 and 2, which should
# both be ONLINE.
# Server 3 should still be in the group but UNREACHABLE.
# 5. Insert data into table t1 on server 2 and check that it is replicated to server 1.
# 6. After 10 seconds, resume server 3 by sending a signal SIGCONT to it.
# After some time, its state should change to ONLINE.
# 7. Check that server 3 retrieves the data inserted by server 2 while it was
# suspended.
# Then, insert new data into table t1 which should be replicated to servers 1 and 2.
# 8. Clean up.
#######################################################################################
# Test involves sending SIGSTOP and SIGCONT signals using kill Linux command.
--source include/not_valgrind.inc
--source include/linux.inc
--source include/big_test.inc
--source include/force_restart.inc
--source include/have_group_replication_plugin.inc
--let $rpl_server_count= 3
--source include/group_replication.inc
--echo
--echo ############################################################
--echo # 1. Set group_replication_member_expel_timeout to
--echo # 300 seconds.
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
SET GLOBAL group_replication_member_expel_timeout = 300;
SELECT @@GLOBAL.group_replication_member_expel_timeout;
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
SET GLOBAL group_replication_member_expel_timeout = 300;
SELECT @@GLOBAL.group_replication_member_expel_timeout;
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
SET GLOBAL group_replication_member_expel_timeout = 300;
SELECT @@GLOBAL.group_replication_member_expel_timeout;
--echo
--echo ############################################################
--echo # 2. Create a table and insert some data. Check that
--echo # it is replicated to all servers.
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
CREATE TABLE t1 (c1 INT NOT NULL PRIMARY KEY);
INSERT INTO t1 VALUES (1);
--source include/rpl_sync.inc
--echo
--echo ############################################################
--echo # 3. Suspend server 3 by sending signal SIGSTOP to it.
--echo # This will make server 3 not answer to "I am alive"
--echo # GCS messages and it will eventually be considered
--echo # faulty.
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
--echo # Get server 3 pid.
SET SESSION sql_log_bin= 0;
CREATE TABLE pid_table(pid_no INT);
--let $pid_file= `SELECT @@GLOBAL.pid_file`
--replace_result $pid_file pid_file
--eval LOAD DATA LOCAL INFILE '$pid_file' INTO TABLE pid_table
--let $server_pid=`SELECT pid_no FROM pid_table`
DROP TABLE pid_table;
SET SESSION sql_log_bin= 1;
--echo # Suspending server 3...
--exec kill -19 $server_pid
--echo
--echo ############################################################
--echo # 4. Check that all members are still in the group on
--echo # servers 1 and 2, which should both be ONLINE.
--echo # Server 3 should still be in the group but UNREACHABLE.
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
let $wait_condition=SELECT COUNT(*)=2 FROM performance_schema.replication_group_members where MEMBER_STATE="ONLINE";
--source include/wait_condition.inc
let $wait_condition=SELECT COUNT(*)=1 FROM performance_schema.replication_group_members where MEMBER_STATE="UNREACHABLE";
--source include/wait_condition.inc
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
let $wait_condition=SELECT COUNT(*)=2 FROM performance_schema.replication_group_members where MEMBER_STATE="ONLINE";
--source include/wait_condition.inc
let $wait_condition=SELECT COUNT(*)=1 FROM performance_schema.replication_group_members where MEMBER_STATE="UNREACHABLE";
--source include/wait_condition.inc
--echo
--echo ############################################################
--echo # 5. Insert data into table t1 on server 2 and check that
--echo # it is replicated to server 1.
INSERT INTO t1 VALUES (2);
let $wait_condition=SELECT COUNT(*)=2 FROM t1;
--source include/wait_condition.inc
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
let $wait_condition=SELECT COUNT(*)=2 FROM t1;
--source include/wait_condition.inc
--echo
--echo ############################################################
--echo # 6. Resume server 3 by sending a signal SIGCONT to it.
--echo # Server should change to ONLINE state.
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
--echo # Send signal SIGCONT to server 3.
--exec kill -18 $server_pid
--let $rpl_connection_name= server1
--source include/rpl_connection.inc
let $wait_condition=SELECT COUNT(*)=3 FROM performance_schema.replication_group_members where MEMBER_STATE="ONLINE";
--source include/wait_condition.inc
--let $rpl_connection_name= server2
--source include/rpl_connection.inc
let $wait_condition=SELECT COUNT(*)=3 FROM performance_schema.replication_group_members where MEMBER_STATE="ONLINE";
--source include/wait_condition.inc
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
let $wait_condition=SELECT COUNT(*)=3 FROM performance_schema.replication_group_members where MEMBER_STATE="ONLINE";
--source include/wait_condition.inc
--echo
--echo ############################################################
--echo # 7. Check that server 3 retrieves the data inserted while
--echo # it was suspended. Then, insert new data into table t1
--echo # which should be replicated to servers 1 and 2.
--let $rpl_connection_name= server3
--source include/rpl_connection.inc
let $wait_condition=SELECT COUNT(*)=2 FROM t1;
--source include/wait_condition.inc
INSERT INTO t1 VALUES (3);
--source include/rpl_sync.inc
--let $diff_tables=server1:t1, server2:t1, server3:t1
--source include/diff_tables.inc
--echo
--echo ############################################################
--echo # 8. Clean up.
DROP TABLE t1;
--source include/group_replication_end.inc