388 lines
10 KiB
C++
388 lines
10 KiB
C++
/*
|
|
Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License, version 2.0,
|
|
as published by the Free Software Foundation.
|
|
|
|
This program is also distributed with certain software (including
|
|
but not limited to OpenSSL) that is licensed under separate terms,
|
|
as designated in a particular file or component or in included license
|
|
documentation. The authors of MySQL hereby grant you an additional
|
|
permission to link the program and your derivative works with the
|
|
separately licensed software that they have included with MySQL.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License, version 2.0, for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include <ndb_global.h>
|
|
#include <ndb_opts.h>
|
|
#include <time.h>
|
|
|
|
#include <mgmapi.h>
|
|
#include <NdbOut.hpp>
|
|
#include <NdbSleep.h>
|
|
#include <NdbTick.h>
|
|
#include <portlib/ndb_localtime.h>
|
|
|
|
#include <NDBT.hpp>
|
|
|
|
#include <kernel/NodeBitmask.hpp>
|
|
|
|
#include "my_alloc.h"
|
|
|
|
static int
|
|
waitClusterStatus(const char* _addr, ndb_mgm_node_status _status);
|
|
|
|
static int _no_contact = 0;
|
|
static int _not_started = 0;
|
|
static int _single_user = 0;
|
|
static int _timeout = 120; // Seconds
|
|
static const char* _wait_nodes = 0;
|
|
static const char* _nowait_nodes = 0;
|
|
static NdbNodeBitmask nowait_nodes_bitmask;
|
|
|
|
static struct my_option my_long_options[] =
|
|
{
|
|
NDB_STD_OPTS("ndb_waiter"),
|
|
{ "no-contact", 'n', "Wait for cluster no contact",
|
|
(uchar**) &_no_contact, (uchar**) &_no_contact, 0,
|
|
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
|
|
{ "not-started", NDB_OPT_NOSHORT, "Wait for cluster not started",
|
|
(uchar**) &_not_started, (uchar**) &_not_started, 0,
|
|
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
|
|
{ "single-user", NDB_OPT_NOSHORT,
|
|
"Wait for cluster to enter single user mode",
|
|
(uchar**) &_single_user, (uchar**) &_single_user, 0,
|
|
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
|
|
{ "timeout", 't', "Timeout to wait in seconds",
|
|
(uchar**) &_timeout, (uchar**) &_timeout, 0,
|
|
GET_INT, REQUIRED_ARG, 120, 0, 0, 0, 0, 0 },
|
|
{ "wait-nodes", 'w', "Node ids to wait on, e.g. '1,2-4'",
|
|
(uchar**) &_wait_nodes, (uchar**) &_wait_nodes, 0,
|
|
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
|
|
{ "nowait-nodes", NDB_OPT_NOSHORT,
|
|
"Nodes that will not be waited for, e.g. '2,3,4-7'",
|
|
(uchar**) &_nowait_nodes, (uchar**) &_nowait_nodes, 0,
|
|
GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
|
|
{ 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
|
|
};
|
|
|
|
extern "C"
|
|
void catch_signal(int signum)
|
|
{
|
|
}
|
|
|
|
#include "../src/common/util/parse_mask.hpp"
|
|
|
|
int main(int argc, char** argv){
|
|
NDB_INIT(argv[0]);
|
|
Ndb_opts opts(argc, argv, my_long_options);
|
|
|
|
#ifndef DBUG_OFF
|
|
opt_debug= "d:t:O,/tmp/ndb_waiter.trace";
|
|
#endif
|
|
|
|
#ifndef _WIN32
|
|
// Catching signal to allow testing of EINTR safeness
|
|
// with "while killall -USR1 ndbwaiter; do true; done"
|
|
signal(SIGUSR1, catch_signal);
|
|
#endif
|
|
|
|
if (opts.handle_options())
|
|
return NDBT_ProgramExit(NDBT_WRONGARGS);
|
|
|
|
const char* connect_string = argv[0];
|
|
if (connect_string == 0)
|
|
connect_string = opt_ndb_connectstring;
|
|
|
|
enum ndb_mgm_node_status wait_status;
|
|
if (_no_contact)
|
|
{
|
|
wait_status= NDB_MGM_NODE_STATUS_NO_CONTACT;
|
|
}
|
|
else if (_not_started)
|
|
{
|
|
wait_status= NDB_MGM_NODE_STATUS_NOT_STARTED;
|
|
}
|
|
else if (_single_user)
|
|
{
|
|
wait_status= NDB_MGM_NODE_STATUS_SINGLEUSER;
|
|
}
|
|
else
|
|
{
|
|
wait_status= NDB_MGM_NODE_STATUS_STARTED;
|
|
}
|
|
|
|
if (_nowait_nodes)
|
|
{
|
|
int res = parse_mask(_nowait_nodes, nowait_nodes_bitmask);
|
|
if(res == -2 || (res > 0 && nowait_nodes_bitmask.get(0)))
|
|
{
|
|
ndbout_c("Invalid nodeid specified in nowait-nodes: %s",
|
|
_nowait_nodes);
|
|
exit(-1);
|
|
}
|
|
else if (res < 0)
|
|
{
|
|
ndbout_c("Unable to parse nowait-nodes argument: %s",
|
|
_nowait_nodes);
|
|
exit(-1);
|
|
}
|
|
}
|
|
|
|
if (_wait_nodes)
|
|
{
|
|
if (_nowait_nodes)
|
|
{
|
|
ndbout_c("Can not set both wait-nodes and nowait-nodes.");
|
|
exit(-1);
|
|
}
|
|
|
|
int res = parse_mask(_wait_nodes, nowait_nodes_bitmask);
|
|
if (res == -2 || (res > 0 && nowait_nodes_bitmask.get(0)))
|
|
{
|
|
ndbout_c("Invalid nodeid specified in wait-nodes: %s",
|
|
_wait_nodes);
|
|
exit(-1);
|
|
}
|
|
else if (res < 0)
|
|
{
|
|
ndbout_c("Unable to parse wait-nodes argument: %s",
|
|
_wait_nodes);
|
|
exit(-1);
|
|
}
|
|
|
|
// Don't wait for any other nodes than the ones we have set explicitly
|
|
nowait_nodes_bitmask.bitNOT();
|
|
}
|
|
|
|
if (waitClusterStatus(connect_string, wait_status) != 0)
|
|
return NDBT_ProgramExit(NDBT_FAILED);
|
|
return NDBT_ProgramExit(NDBT_OK);
|
|
}
|
|
|
|
#define MGMERR(h) \
|
|
ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
|
|
<< ", line="<<ndb_mgm_get_latest_error_line(h) \
|
|
<< endl;
|
|
|
|
NdbMgmHandle handle= NULL;
|
|
|
|
Vector<ndb_mgm_node_state> ndbNodes;
|
|
|
|
int
|
|
getStatus(){
|
|
int retries = 0;
|
|
struct ndb_mgm_cluster_state * status;
|
|
struct ndb_mgm_node_state * node;
|
|
|
|
ndbNodes.clear();
|
|
|
|
while(retries < 10){
|
|
status = ndb_mgm_get_status(handle);
|
|
if (status == NULL){
|
|
ndbout << "status==NULL, retries="<<retries<<endl;
|
|
MGMERR(handle);
|
|
retries++;
|
|
ndb_mgm_disconnect(handle);
|
|
if (ndb_mgm_connect(handle, opt_connect_retries - 1, opt_connect_retry_delay, 1)) {
|
|
MGMERR(handle);
|
|
g_err << "Reconnect failed" << endl;
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
int count = status->no_of_nodes;
|
|
for (int i = 0; i < count; i++){
|
|
node = &status->node_states[i];
|
|
switch(node->node_type){
|
|
case NDB_MGM_NODE_TYPE_NDB:
|
|
if (!nowait_nodes_bitmask.get(node->node_id))
|
|
ndbNodes.push_back(*node);
|
|
break;
|
|
case NDB_MGM_NODE_TYPE_MGM:
|
|
/* Don't care about MGM nodes */
|
|
break;
|
|
case NDB_MGM_NODE_TYPE_API:
|
|
/* Don't care about API nodes */
|
|
break;
|
|
default:
|
|
if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
|
|
node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
|
|
retries++;
|
|
ndbNodes.clear();
|
|
free(status);
|
|
status = NULL;
|
|
count = 0;
|
|
|
|
ndbout << "kalle"<< endl;
|
|
break;
|
|
}
|
|
abort();
|
|
break;
|
|
}
|
|
}
|
|
if(status == 0){
|
|
ndbout << "status == 0" << endl;
|
|
continue;
|
|
}
|
|
free(status);
|
|
return 0;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
static
|
|
char*
|
|
getTimeAsString(char* pStr, size_t len)
|
|
{
|
|
// Get current time
|
|
time_t now;
|
|
time(&now);
|
|
|
|
// Convert to local timezone
|
|
tm tm_buf;
|
|
ndb_localtime_r(&now, &tm_buf);
|
|
|
|
// Print to string buffer
|
|
BaseString::snprintf(pStr, len,
|
|
"%02d:%02d:%02d",
|
|
tm_buf.tm_hour,
|
|
tm_buf.tm_min,
|
|
tm_buf.tm_sec);
|
|
return pStr;
|
|
}
|
|
|
|
static int
|
|
waitClusterStatus(const char* _addr,
|
|
ndb_mgm_node_status _status)
|
|
{
|
|
int _startphase = -1;
|
|
|
|
#ifndef _WIN32
|
|
/* Ignore SIGPIPE */
|
|
signal(SIGPIPE, SIG_IGN);
|
|
#endif
|
|
|
|
handle = ndb_mgm_create_handle();
|
|
if (handle == NULL){
|
|
g_err << "Could not create ndb_mgm handle" << endl;
|
|
return -1;
|
|
}
|
|
g_info << "Connecting to mgmsrv at " << _addr << endl;
|
|
if (ndb_mgm_set_connectstring(handle, _addr))
|
|
{
|
|
MGMERR(handle);
|
|
g_err << "Connectstring " << _addr << " invalid" << endl;
|
|
return -1;
|
|
}
|
|
if (ndb_mgm_connect(handle, opt_connect_retries - 1, opt_connect_retry_delay, 1)) {
|
|
MGMERR(handle);
|
|
g_err << "Connection to " << _addr << " failed" << endl;
|
|
return -1;
|
|
}
|
|
|
|
int attempts = 0;
|
|
int resetAttempts = 0;
|
|
const int MAX_RESET_ATTEMPTS = 10;
|
|
bool allInState = false;
|
|
|
|
NDB_TICKS start = NdbTick_getCurrentTicks();
|
|
NDB_TICKS now = start;
|
|
|
|
while (allInState == false){
|
|
if (_timeout > 0 &&
|
|
NdbTick_Elapsed(start,now).seconds() > (Uint64)_timeout){
|
|
/**
|
|
* Timeout has expired waiting for the nodes to enter
|
|
* the state we want
|
|
*/
|
|
bool waitMore = false;
|
|
/**
|
|
* Make special check if we are waiting for
|
|
* cluster to become started
|
|
*/
|
|
if(_status == NDB_MGM_NODE_STATUS_STARTED)
|
|
{
|
|
waitMore = true;
|
|
/**
|
|
* First check if any node is not starting
|
|
* then it's no idea to wait anymore
|
|
*/
|
|
for (unsigned n = 0; n < ndbNodes.size(); n++)
|
|
{
|
|
if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED &&
|
|
ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING)
|
|
{
|
|
waitMore = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
|
|
g_err << "waitNodeState("
|
|
<< ndb_mgm_get_node_status_string(_status)
|
|
<<", "<<_startphase<<")"
|
|
<< " timeout after " << attempts << " attempts" << endl;
|
|
return -1;
|
|
}
|
|
|
|
g_err << "waitNodeState("
|
|
<< ndb_mgm_get_node_status_string(_status)
|
|
<<", "<<_startphase<<")"
|
|
<< " resetting timeout "
|
|
<< resetAttempts << endl;
|
|
|
|
start = now;
|
|
|
|
resetAttempts++;
|
|
}
|
|
|
|
if (attempts > 0)
|
|
NdbSleep_MilliSleep(100);
|
|
if (getStatus() != 0){
|
|
return -1;
|
|
}
|
|
|
|
/* Assume all nodes are in state(if there is any) */
|
|
allInState = (ndbNodes.size() > 0);
|
|
|
|
/* Loop through all nodes and check their state */
|
|
for (unsigned n = 0; n < ndbNodes.size(); n++) {
|
|
ndb_mgm_node_state* ndbNode = &ndbNodes[n];
|
|
|
|
require(ndbNode != NULL);
|
|
|
|
g_info << "Node " << ndbNode->node_id << ": "
|
|
<< ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;
|
|
|
|
if (ndbNode->node_status != _status)
|
|
allInState = false;
|
|
}
|
|
|
|
if (!allInState) {
|
|
char timestamp[9];
|
|
g_info << "[" << getTimeAsString(timestamp, sizeof(timestamp)) << "] "
|
|
<< "Waiting for cluster enter state "
|
|
<< ndb_mgm_get_node_status_string(_status) << endl;
|
|
}
|
|
|
|
attempts++;
|
|
|
|
now = NdbTick_getCurrentTicks();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
template class Vector<ndb_mgm_node_state>;
|