3778 lines
130 KiB
C++
3778 lines
130 KiB
C++
/************************************************************************
|
|
*
|
|
* Copyright (c) 2016 Alibaba.com, Inc. All Rights Reserved
|
|
* $Id: paxos.cc,v 1.0 07/31/2016 04:16:32 PM yingqiang.zyq(yingqiang.zyq@alibaba-inc.com) $
|
|
*
|
|
************************************************************************/
|
|
|
|
/**
|
|
* @file paxos.cc
|
|
* @author yingqiang.zyq(yingqiang.zyq@alibaba-inc.com)
|
|
* @date 07/31/2016 04:16:32 PM
|
|
* @version 1.0
|
|
* @brief The implement of the PAXOS algorithm
|
|
*
|
|
**/
|
|
|
|
#include <sys/time.h>
|
|
#include "paxos.h"
|
|
|
|
namespace alisql {
|
|
|
|
Paxos::Paxos(uint64_t electionTimeout, std::shared_ptr<PaxosLog> log, uint64_t purgeLogTimeout)
|
|
:debugMaxSendLogIndex(0)
|
|
,config_(new StableConfiguration()) /*TODO:*/
|
|
,log_(log)
|
|
,clusterId_(0)
|
|
,shutdown_(false)
|
|
,maxPacketSize_(1000000)
|
|
,maxDelayIndex_(10000)
|
|
,minDelayIndex_(100)
|
|
,largeBatchRatio_(5)
|
|
,pipeliningTimeout_(3)
|
|
,electionTimeout_(electionTimeout)
|
|
,heartbeatTimeout_(electionTimeout/5)
|
|
,purgeLogTimeout_(purgeLogTimeout)
|
|
,currentTerm_(1)
|
|
,leaderStepDowning_(false)
|
|
,commitIndex_(0)
|
|
,leaderId_(0)
|
|
,leaderAddr_("")
|
|
,votedFor_(0)
|
|
,forceRequestMode_(false)
|
|
,currentEpoch_(0)
|
|
,forceSyncEpochDiff_(0)
|
|
,state_(FOLLOWER)
|
|
,subState_(SubNone)
|
|
,weightElecting_(false)
|
|
,leaderForceSyncStatus_(true)
|
|
,consensusAsync_(false)
|
|
,replicateWithCacheLog_(false)
|
|
,optimisticHeartbeat_(false)
|
|
//,changeStateWorkers_(0)
|
|
,autoPurge_(false)
|
|
,useAppliedIndex_(true)
|
|
,minMatchIndex_(0)
|
|
,appliedIndex_(0)
|
|
,followerMetaNo_(0)
|
|
,lastSyncMetaNo_(0)
|
|
,syncMetaInterval_(1)
|
|
,maxDelayIndex4NewMember_(100)
|
|
,maxMergeReportTimeout_(2000)
|
|
,nextEpochCheckStatemachine_(0)
|
|
,compactOldMode_(true)
|
|
,enableLogCache_(false)
|
|
,enableDynamicEasyIndex_(false)
|
|
,enableLearnerPipelining_(false)
|
|
,enableAutoResetMatchIndex_(false)
|
|
,enableLearnerAutoResetMatchIndex_(false)
|
|
,stateChangeCb_(nullptr)
|
|
,checksumCb_(nullptr)
|
|
,checksum_mode_(false)
|
|
,port_(0)
|
|
{
|
|
}
|
|
|
|
Paxos::~Paxos ()
|
|
{
|
|
if (!shutdown_.load())
|
|
shutdown();
|
|
}
|
|
|
|
void Paxos::shutdown()
|
|
{
|
|
/* We should stop all ThreadTimer before close ThreadTimerService in Service::shutdown */
|
|
lock_.lock();
|
|
if (state_ == LEADER)
|
|
{
|
|
log_->setMetaData(keyLastLeaderTerm, currentTerm_);
|
|
log_->setMetaData(keyLastLeaderLogIndex, commitIndex_);
|
|
}
|
|
shutdown_.store(true);
|
|
if (ccMgr_.prepared)
|
|
{
|
|
ccMgr_.aborted= 1;
|
|
ccMgr_.cond.notify_all();
|
|
}
|
|
ccMgr_.autoChangeAddr = "";
|
|
ccMgr_.condChangeDone.notify_all();
|
|
lock_.unlock();
|
|
cond_.notify_all();
|
|
electionTimer_.reset();
|
|
epochTimer_.reset();
|
|
purgeLogTimer_.reset();
|
|
purgeLogQueue_.stop();
|
|
changeStateQueue_.stop();
|
|
appendLogQueue_.stop();
|
|
commitDepQueue_.stop();
|
|
config_->forEach(&Server::stop, NULL);
|
|
config_->forEachLearners(&Server::stop, NULL);
|
|
srv_->closeThreadPool();
|
|
srv_->shutdown();
|
|
/* When Service::shutdown return, there is not backend worker left, so we can release config_ now. */
|
|
config_.reset();
|
|
}
|
|
|
|
void Paxos::stop()
|
|
{
|
|
/* We should stop all ThreadTimer before close ThreadTimerService in Service::shutdown */
|
|
electionTimer_->stop();
|
|
epochTimer_->stop();
|
|
purgeLogTimer_->stop();
|
|
config_.reset();
|
|
srv_->stop();
|
|
}
|
|
|
|
void Paxos::changeState_(enum State newState)
|
|
{
|
|
/* We call sendAsyncEvent every time if the term change or state change. */
|
|
/*
|
|
if (state_ == newState)
|
|
return;
|
|
*/
|
|
easy_error_log("Server %d : Paxos state change from %s to %s !!\n", localServer_->serverId, stateString[state_], stateString[newState]);
|
|
/* only leader run purge log timer */
|
|
if (state_ == LEADER) {
|
|
purgeLogTimer_->stop();
|
|
}
|
|
|
|
if (newState != CANDIDATE)
|
|
{
|
|
forceRequestMode_= false;
|
|
}
|
|
|
|
state_.store(newState);
|
|
leaderForceSyncStatus_.store(true);
|
|
if (newState == LEADER)
|
|
{
|
|
if (autoPurge_ == true)
|
|
{
|
|
purgeLogTimer_->restart();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
subState_.store(SubNone);
|
|
weightElecting_ = false;
|
|
}
|
|
|
|
if (newState == LEADER)
|
|
{
|
|
leaderId_.store(localServer_->serverId);
|
|
leaderAddr_= localServer_->strAddr;
|
|
option.extraStore->setRemote(option.extraStore->getLocal());
|
|
}
|
|
|
|
log_->resetMetaCache();
|
|
|
|
if (stateChangeCb_)
|
|
{
|
|
if (changeStateQueue_.push(new ChangeStateArgType(state_, currentTerm_, commitIndex_, this)))
|
|
srv_->sendAsyncEvent(&SingleProcessQueue<ChangeStateArgType>::process, &changeStateQueue_, Paxos::execStateChangeCb);
|
|
}
|
|
|
|
cond_.notify_all();
|
|
}
|
|
|
|
void Paxos::membershipChangeHistoryUpdate_(const MembershipChangeType &mc)
|
|
{
|
|
if (membershipChangeHistory_.size() >= 10)
|
|
membershipChangeHistory_.erase(membershipChangeHistory_.begin());
|
|
membershipChangeHistory_.push_back(mc);
|
|
}
|
|
|
|
int Paxos::applyConfigureChangeNoLock_(uint64_t logIndex)
|
|
{
|
|
LogEntry entry;
|
|
uint64_t index= 0;
|
|
if (logIndex == 0) // for defensive
|
|
return 0;
|
|
if (log_->getEntry(logIndex, entry, false))
|
|
{
|
|
easy_error_log("Server %d: fail to get log entry when apply configure change, index %llu", localServer_->serverId, logIndex);
|
|
return 0;
|
|
}
|
|
assert(entry.optype() == kConfigureChange);
|
|
|
|
ConfigureChangeValue val;
|
|
val.ParseFromString(std::move(entry.value()));
|
|
|
|
MembershipChangeType mc;
|
|
mc.cctype = (CCOpTypeT)val.cctype();
|
|
mc.optype = (CCOpTypeT)val.optype();
|
|
if (val.addrs().size())
|
|
mc.address = *(val.addrs().begin());
|
|
if (val.cctype() == CCMemberOp)
|
|
{
|
|
//for membership change
|
|
const std::string& addr= *(val.addrs().begin());
|
|
if (val.optype() == CCAddNode)
|
|
{
|
|
assert(val.addrs_size() == 1);
|
|
if (state_ != LEARNER)
|
|
config_->addMember(addr, this);
|
|
else if (addr == localServer_->strAddr)
|
|
{
|
|
/* learner change to follower */
|
|
std::vector<std::string> strConfig;
|
|
for (auto& it : val.allservers())
|
|
{
|
|
strConfig.push_back(it);
|
|
if (it == addr)
|
|
index= strConfig.size();
|
|
}
|
|
assert(index != 0);
|
|
/* the allservers already include the new add server(local server). */
|
|
//strConfig.push_back(addr);
|
|
|
|
/* The learner's localServer may be AliSQLServer, we should pass it to installConfig. */
|
|
std::dynamic_pointer_cast<StableConfiguration>(config_)->installConfig(strConfig, index, this, localServer_);
|
|
config_->forEach(&Server::connect, (void *)NULL);
|
|
changeState_(FOLLOWER);
|
|
electionTimer_->start();
|
|
|
|
/* Init learners */
|
|
std::vector<std::string> strConfigL;
|
|
for (auto& addr : val.alllearners())
|
|
{
|
|
if (addr != localServer_->strAddr)
|
|
strConfigL.push_back(std::move(addr));
|
|
}
|
|
config_->delAllLearners();
|
|
config_->addLearners(strConfigL, this, true);
|
|
|
|
log_->setMetaData(Paxos::keyLearnerConfigure, config_->learnersToString());
|
|
log_->setMetaData(Paxos::keyMemberConfigure, config_->membersToString(localServer_->strAddr));
|
|
|
|
/* Print Log */
|
|
std::string logBuf;
|
|
for (auto& addr : strConfig)
|
|
{
|
|
logBuf += addr;
|
|
logBuf += " ";
|
|
}
|
|
std::string logBufL;
|
|
for (auto& addr : strConfigL)
|
|
{
|
|
logBufL += addr;
|
|
logBufL += " ";
|
|
}
|
|
easy_error_log("Server %d : Init follower from learner, new members(%s) new learners(%s)\n", localServer_->serverId, logBuf.c_str(), logBufL.c_str());
|
|
}
|
|
else
|
|
{
|
|
std::vector<std::string> strConfig;
|
|
strConfig.push_back(std::move(addr));
|
|
config_->delLearners(strConfig, this);
|
|
}
|
|
if (ccMgr_.autoChangeAddr == addr) {
|
|
ccMgr_.autoChangeAddr = "";
|
|
ccMgr_.autoChangeRet = 0;
|
|
ccMgr_.condChangeDone.notify_all();
|
|
}
|
|
}
|
|
else if (val.optype() == CCDelNode)
|
|
{
|
|
if (state_ != LEARNER)
|
|
{
|
|
if (addr != localServer_->strAddr)
|
|
config_->delMember(addr, this);
|
|
else
|
|
{
|
|
/* This node is removed from the cluster, shutdown myself */
|
|
easy_error_log("Server %d : This node is removed from the cluster, shutdown myself currentTerm(%llu) lli(%llu) ccIndex(%llu)!!\n", localServer_->serverId, currentTerm_.load(), log_->getLastLogIndex(), logIndex);
|
|
localServer_->serverId += 1000;
|
|
stop();
|
|
}
|
|
|
|
}
|
|
}
|
|
else if (val.optype() == CCDowngradeNode)
|
|
{
|
|
std::vector<std::string> strConfig;
|
|
strConfig.push_back(addr);
|
|
if (state_ != LEARNER)
|
|
{
|
|
if (addr != localServer_->strAddr)
|
|
{
|
|
config_->delMember(addr, this);
|
|
config_->addLearners(strConfig, this);
|
|
//if (state_ == LEADER)
|
|
// config_->forEachLearners(&Server::connect, (void *)NULL);
|
|
}
|
|
else
|
|
{
|
|
auto oldId= localServer_->serverId;
|
|
std::shared_ptr<LocalServer> localServer= std::dynamic_pointer_cast<LocalServer>(config_->getServer(oldId));
|
|
assert(localServer != nullptr);
|
|
std::dynamic_pointer_cast<StableConfiguration>(config_)->installConfig(strConfig, 1, this, localServer);
|
|
config_->addLearners(strConfig, this);
|
|
log_->setMetaData(Paxos::keyLearnerConfigure, config_->learnersToString());
|
|
log_->setMetaData(Paxos::keyMemberConfigure, config_->membersToString());
|
|
/* We set the init serverId to 100 for tmp, this serverId will change when we */
|
|
localServer_->serverId= 100;
|
|
this->state_.store(LEARNER);
|
|
this->electionTimer_->stop();
|
|
easy_error_log("Server %d : This server is downgrade from follower(%llu) to learner(%llu)!!", localServer_->serverId, oldId, localServer_->serverId);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
config_->addLearners(strConfig, this);
|
|
}
|
|
}
|
|
else if (val.optype() == CCConfigureNode)
|
|
{
|
|
mc.forceSync = val.forcesync();
|
|
mc.electionWeight = val.electionweight();
|
|
if (state_ != LEARNER)
|
|
{
|
|
auto server= config_->getServer(val.serverid());
|
|
if (server == nullptr || addr != server->strAddr)
|
|
{
|
|
easy_error_log("Server %d : Can't find the target server(id:%llu, addr:%s) in the configure!! Current member configure:%s\n", localServer_->serverId, val.serverid(), addr.c_str(), config_->membersToString(localServer_->strAddr).c_str());
|
|
}
|
|
else
|
|
{
|
|
config_->configureMember(val.serverid(), val.forcesync(), val.electionweight(), this);
|
|
if (val.serverid() == localServer_->serverId)
|
|
electionTimer_->setRandWeight(val.electionweight());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (val.cctype() == CCLearnerOp)
|
|
{
|
|
if (val.optype() == CCAddNode || val.optype() == CCAddLearnerAutoChange)
|
|
{
|
|
std::vector<std::string> strConfig, strLearners;
|
|
for (auto& addr : val.addrs())
|
|
strConfig.push_back(std::move(addr));
|
|
for (auto& addr : val.alllearners())
|
|
strLearners.push_back(std::move(addr));
|
|
if (StableConfiguration::isServerInVector(localServer_->strAddr, strConfig))
|
|
{
|
|
config_->delAllLearners();
|
|
config_->addLearners(strLearners, this, true);
|
|
}
|
|
config_->addLearners(strConfig, this);
|
|
/* The old learner will skip the connect call. */
|
|
if (state_ == LEADER)
|
|
config_->forEachLearners(&Server::connect, (void *)NULL);
|
|
if (val.optype() == CCAddLearnerAutoChange)
|
|
{
|
|
// strConfig.size is 1 in this case
|
|
ccMgr_.autoChangeAddr = strConfig[0];
|
|
}
|
|
}
|
|
else if (val.optype() == CCDelNode)
|
|
{
|
|
std::vector<std::string> strConfig;
|
|
for (auto& addr : val.addrs())
|
|
strConfig.push_back(std::move(addr));
|
|
config_->delLearners(strConfig, this);
|
|
/* autoChange case, wakeup addFollower if deleted */
|
|
auto findret = std::find(strConfig.begin(), strConfig.end(), ccMgr_.autoChangeAddr);
|
|
if (ccMgr_.autoChangeAddr != "" && findret != strConfig.end())
|
|
{
|
|
ccMgr_.autoChangeAddr = "";
|
|
ccMgr_.autoChangeRet = -2;
|
|
ccMgr_.condChangeDone.notify_all();
|
|
}
|
|
}
|
|
else if (val.optype() == CCConfigureNode)
|
|
{
|
|
auto server= config_->getServer(val.serverid());
|
|
auto source = config_->getServer(val.learnersource());
|
|
const std::string& addr= *(val.addrs().begin());
|
|
mc.learnerSource = (source ? source->strAddr : "");
|
|
mc.sendByAppliedIndex = val.applymode();
|
|
|
|
if (server == nullptr || addr != server->strAddr)
|
|
{
|
|
easy_error_log("Server %d : Can't find the target server(id:%llu, addr:%s) in the configure!! Current learner configure:%s\n", localServer_->serverId, val.serverid(), addr.c_str(), config_->learnersToString().c_str());
|
|
}
|
|
else
|
|
{
|
|
if (server->learnerSource == localServer_->serverId)
|
|
{
|
|
server->stepDown(nullptr);
|
|
}
|
|
config_->configureLearner(val.serverid(), val.learnersource(), this);
|
|
server->sendByAppliedIndex = val.applymode();
|
|
/* We should also init learner if we're leader and learner source is 0. */
|
|
if (server->learnerSource == localServer_->serverId || (state_ == LEADER && server->learnerSource == 0))
|
|
{
|
|
easy_error_log("Server %d : a new learner %d is sourced from me!!\n", localServer_->serverId, server->serverId);
|
|
server->beginLeadership(nullptr);
|
|
server->connect(nullptr);
|
|
}
|
|
}
|
|
}
|
|
else if (val.optype() == CCSyncLearnerAll)
|
|
{
|
|
std::vector<std::string> strConfig;
|
|
std::string strServers;
|
|
for (auto& addr : val.alllearners())
|
|
{
|
|
strServers += addr;
|
|
strServers += ";";
|
|
strConfig.push_back(std::move(addr));
|
|
}
|
|
|
|
if (strServers.size() > 0)
|
|
strServers.resize(strServers.size() - 1);
|
|
|
|
auto strLearners= config_->learnersToString();
|
|
if (strServers != strLearners)
|
|
{
|
|
easy_error_log("Server %d : Error: local learner meta error local:%s leader:%s!!\n", localServer_->serverId, strLearners.c_str(), strServers.c_str());
|
|
easy_error_log("Server %d : SyncLearnerAll: update local learner config from %s to %s\n", localServer_->serverId, strLearners.c_str(), strServers.c_str());
|
|
config_->delAllLearners();
|
|
config_->addLearners(strConfig, this, true);
|
|
|
|
log_->setMetaData(Paxos::keyLearnerConfigure, config_->learnersToString());
|
|
}
|
|
else
|
|
{
|
|
easy_error_log("Server %d : SyncLearnerAll: local learner is match with leader %s\n", localServer_->serverId, strServers.c_str());
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
assert(0);
|
|
}
|
|
|
|
membershipChangeHistoryUpdate_(mc);
|
|
|
|
uint64_t itmp;
|
|
log_->getMetaData(std::string(keyScanIndex), &itmp);
|
|
if (itmp <= logIndex)
|
|
log_->setMetaData(keyScanIndex, 0);
|
|
|
|
if (ccMgr_.needNotify == 1)
|
|
{
|
|
ccMgr_.applied= 1;
|
|
ccMgr_.aborted= 0;
|
|
ccMgr_.cond.notify_all();
|
|
}
|
|
|
|
easy_error_log("Server %d : applyConfigureChange_ done! logIndex(%llu) currentTerm(%ld) val.cctype(%d) val.optype(%d)\n", localServer_->serverId, logIndex, currentTerm_.load(), val.cctype(), val.optype());
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::leaderTransfer_(uint64_t targetId)
|
|
{
|
|
if (state_ != LEADER)
|
|
return PaxosErrorCode::PE_NOTLEADR;
|
|
auto server = config_->getServer(targetId);
|
|
if (nullptr == server || targetId == 0)
|
|
return PaxosErrorCode::PE_NOTFOUND;
|
|
if (subState_ == SubLeaderTransfer)
|
|
{
|
|
easy_error_log("Server %d : leaderTransfer to server(%ld), Now we're in another leader transfer, skip this action!", localServer_->serverId, targetId);
|
|
return PaxosErrorCode::PE_CONFLICTS;
|
|
}
|
|
if (cdrMgr_.inRecovery)
|
|
{
|
|
easy_error_log("Server %d : leaderTransfer to server(%ld), Now we're in commit dependency recovery, skip this action!", localServer_->serverId, targetId);
|
|
return PaxosErrorCode::PE_CONFLICTS;
|
|
}
|
|
if (targetId == localServer_->serverId)
|
|
return PaxosErrorCode::PE_NONE;
|
|
if (std::dynamic_pointer_cast<RemoteServer>(server)->isLearner) // server == nullptr already checked
|
|
{
|
|
easy_error_log("Server %d : leaderTransfer to server(%ld), it is a learner, skip this action!", localServer_->serverId, targetId);
|
|
return PaxosErrorCode::PE_NOTFOLLOWER;
|
|
}
|
|
|
|
++ (stats_.countLeaderTransfer);
|
|
|
|
easy_error_log("Server %d : leaderTransfer to server(%ld), currentTerm(%ld), lli(%ld)\n", localServer_->serverId, targetId, currentTerm_.load(), log_->getLastLogIndex());
|
|
/* Stop new replicateLog */
|
|
subState_.store(SubLeaderTransfer);
|
|
MembershipChangeType mc;
|
|
mc.cctype = CCMemberOp;
|
|
mc.optype = CCLeaderTransfer;
|
|
mc.address = server->strAddr;
|
|
membershipChangeHistoryUpdate_(mc);
|
|
|
|
auto term= currentTerm_.load();
|
|
auto lli= log_->getLastLogIndex();
|
|
|
|
lock_.unlock();
|
|
auto slli= log_->getSafeLastLogIndex();
|
|
// sleep for 500ms to let log sync to disk
|
|
if (lli < slli)
|
|
msleep(500);
|
|
int ret= leaderTransferSend_(targetId, term, slli, 5);
|
|
lock_.lock();
|
|
return ret;
|
|
}
|
|
|
|
int Paxos::leaderTransfer(uint64_t targetId)
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
return leaderTransfer_(targetId);
|
|
}
|
|
|
|
int Paxos::leaderTransfer(const std::string& addr)
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
uint64_t targetId = config_->getServerIdFromAddr(addr);
|
|
return leaderTransfer_(targetId);
|
|
}
|
|
|
|
int Paxos::leaderTransferSend_(uint64_t targetId, uint64_t term, uint64_t logIndex, uint64_t leftCnt)
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
|
|
-- leftCnt;
|
|
|
|
if (checkLeaderTransfer(targetId, term, logIndex, leftCnt) > 0)
|
|
{
|
|
std::shared_ptr<RemoteServer> server= std::dynamic_pointer_cast<RemoteServer>(config_->getServer(targetId));
|
|
if (server == nullptr)
|
|
{
|
|
easy_error_log("Server %d : try transfer leader to id(%d), which is not in the configuration!!", localServer_->serverId, targetId);
|
|
return PaxosErrorCode::PE_NOTFOUND;
|
|
}
|
|
if (commitIndex_ == logIndex && commitIndex_ == server->matchIndex)
|
|
leaderCommand(LeaderTransfer, server);
|
|
else
|
|
{
|
|
easy_error_log("Server %d : skip send cmd LeaderTransfer because the pos is not catch up. commitIndex(%llu), lli(%llu), target matchIndex(%llu)", localServer_->serverId, commitIndex_, log_->getLastLogIndex(), server->matchIndex.load());
|
|
}
|
|
// TODO we also need to call leaderCommand in tryUpdateCommitIndex_
|
|
|
|
/* do not conflict with heartbeat timeout */
|
|
new ThreadTimer(srv_->getThreadTimerService(), srv_, getLeaderTransferInterval_(), ThreadTimer::Oneshot, &Paxos::leaderTransferSend_, this, targetId, term, logIndex, leftCnt);
|
|
}
|
|
|
|
return PaxosErrorCode::PE_NONE;
|
|
}
|
|
|
|
int Paxos::checkLeaderTransfer(uint64_t targetId, uint64_t term, uint64_t& logIndex, uint64_t leftCnt)
|
|
{
|
|
uint64_t lastLogIndex= log_->getLastLogIndex();
|
|
if ( state_ == LEADER && subState_ == SubLeaderTransfer && term == currentTerm_)
|
|
{
|
|
if (lastLogIndex > logIndex)
|
|
{
|
|
easy_error_log("Server %d : checkLeaderTransfer: In transfer to server %ld local lli:%llu is bigger than target lli:%llu, we update target lli to current lli.\n", localServer_->serverId, targetId, lastLogIndex, logIndex);
|
|
logIndex= lastLogIndex;
|
|
}
|
|
|
|
if (leftCnt > 0)
|
|
{
|
|
easy_warn_log("Server %d : checkLeaderTransfer: LeaderTransfer to server %ld not complete, left check time %llu", localServer_->serverId, targetId, leftCnt);
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
subState_.store(SubNone);
|
|
weightElecting_ = false;
|
|
easy_error_log("Server %d : checkLeaderTransfer: LeaderTransfer to server %ld fail because of timeout currentTerm(%ld), lli(%ld)\n", localServer_->serverId, targetId, term, logIndex);
|
|
return -1;
|
|
}
|
|
}
|
|
else if (state_ == FOLLOWER && currentTerm_ > term && lastLogIndex > logIndex && leaderId_ == targetId)
|
|
{
|
|
easy_error_log("Server %d : checkLeaderTransfer: LeaderTransfer success target(id:%ld t:%ld lli:%ld) current(t:%ld lli:%ld)\n", localServer_->serverId, targetId, term, logIndex, currentTerm_.load(), lastLogIndex);
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
subState_.store(SubNone);
|
|
weightElecting_ = false;
|
|
easy_error_log("Server %d : checkLeaderTransfer: Nonleader election may happened during the leadertransfer, please check the status! target(id:%ld t:%ld lli:%ld) current(id:%ld t:%ld lli:%ld)\n", localServer_->serverId, targetId, term, logIndex, leaderId_.load(), currentTerm_.load(), lastLogIndex);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::checkConfigure_(CCOpTypeT cctype, CCOpTypeT type, std::vector<std::string>& strConfig, const std::vector<Configuration::ServerRef>& servers)
|
|
{
|
|
for (auto it= strConfig.begin(); it != strConfig.end(); )
|
|
{
|
|
if (type == CCAddNode || type == CCAddLearnerAutoChange)
|
|
{
|
|
bool dup= false;
|
|
for (auto& server : servers)
|
|
{
|
|
if (server && server->strAddr == *it)
|
|
{
|
|
it= strConfig.erase(it);
|
|
dup= true;
|
|
break;
|
|
}
|
|
}
|
|
/* In add learner case we should also check servers */
|
|
if (!dup && cctype == CCLearnerOp)
|
|
{
|
|
auto& members= config_->getServers();
|
|
for (auto& server : members)
|
|
{
|
|
if (server && server->strAddr == *it)
|
|
{
|
|
it= strConfig.erase(it);
|
|
dup= true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (!dup)
|
|
++ it;
|
|
}
|
|
else if (type == CCDelNode)
|
|
{
|
|
bool found= false;
|
|
uint64_t i= 0;
|
|
for (auto& server : servers)
|
|
{
|
|
if (server && server->strAddr == *it)
|
|
{
|
|
found= true;
|
|
break;
|
|
}
|
|
++ i;
|
|
}
|
|
if (found)
|
|
++ it;
|
|
else
|
|
it= strConfig.erase(it);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
inline void Paxos::prepareConfigureChangeEntry_(const LogEntry& entry, PaxosMsg *msg, bool fromCache)
|
|
{
|
|
if (ccMgr_.prepared == 0)
|
|
{
|
|
ccMgr_.prepared= 1;
|
|
ccMgr_.preparedIndex= entry.index();
|
|
}
|
|
else
|
|
{
|
|
// uint64_t leaderCommitIndex = fromCache? logRecvCache_.getCommitIndex(): msg->commitindex();
|
|
// potantial bug here
|
|
// assert(ccMgr_.preparedIndex <= leaderCommitIndex || ccMgr_.preparedIndex <= commitIndex_);
|
|
applyConfigureChangeNoLock_(ccMgr_.preparedIndex);
|
|
ccMgr_.prepared= 1;
|
|
ccMgr_.preparedIndex= entry.index();
|
|
}
|
|
log_->setMetaData(keyScanIndex, ccMgr_.preparedIndex);
|
|
}
|
|
|
|
int Paxos::sendConfigureAndWait_(const ConfigureChangeValue& val, std::unique_lock<std::mutex>& ul)
|
|
{
|
|
int ret= 0;
|
|
std::string buf;
|
|
val.SerializeToString(&buf);
|
|
|
|
LogEntry entry;
|
|
entry.set_optype(kConfigureChange);
|
|
entry.set_value(buf);
|
|
|
|
// Step 3: send log entry, and wait for applied
|
|
uint64_t index;
|
|
if (ccMgr_.prepared == 0)
|
|
{
|
|
log_->setMetaData(keyScanIndex, log_->getLastLogIndex());
|
|
if ((index= replicateLog_(entry, false)) > 0)
|
|
{
|
|
if (entry.index() > commitIndex_)
|
|
{
|
|
ccMgr_.prepared= 1;
|
|
ccMgr_.preparedIndex= entry.index();
|
|
ccMgr_.needNotify= 1;
|
|
if (ccMgr_.waitTimeout.load() == 0)
|
|
{
|
|
while (ccMgr_.applied == 0 && ccMgr_.aborted == 0)
|
|
ccMgr_.cond.wait(ul);
|
|
}
|
|
else
|
|
{
|
|
bool waitRet = ccMgr_.cond.wait_for(ul,
|
|
std::chrono::milliseconds(ccMgr_.waitTimeout.load()), [this]() {
|
|
return (ccMgr_.applied != 0 || ccMgr_.aborted != 0);
|
|
});
|
|
if (waitRet == false)
|
|
{
|
|
ccMgr_.needNotify= 0;
|
|
easy_error_log("Server %d : configureChange wait timeout, preparedIndex(%d).\n", localServer_->serverId, ccMgr_.preparedIndex);
|
|
return PaxosErrorCode::PE_TIMEOUT;
|
|
}
|
|
}
|
|
if (ccMgr_.aborted == 1)
|
|
{
|
|
easy_error_log("Server %d : configureChange aborted, preparedIndex(%d).\n", localServer_->serverId, ccMgr_.preparedIndex);
|
|
ret= PaxosErrorCode::PE_DEFAULT;
|
|
}
|
|
else
|
|
{
|
|
if (state_ == FOLLOWER && ccMgr_.preparedIndex != entry.index())
|
|
{
|
|
/*
|
|
* Refering to fucntion prepareConfigureChangeEntry_,
|
|
* leader changed during configureChange,
|
|
* just return timeout as a result to let the client check and retry
|
|
*/
|
|
ccMgr_.aborted= ccMgr_.applied= ccMgr_.needNotify= 0;
|
|
easy_error_log("Server %d : configureChange timeout after leader transfer, old preparedIndex(%d), current preparedIndex(%d).\n", localServer_->serverId, entry.index(), ccMgr_.preparedIndex);
|
|
return PaxosErrorCode::PE_TIMEOUT;
|
|
}
|
|
assert(ccMgr_.preparedIndex == entry.index());
|
|
// success, ret is 0
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* one node case */
|
|
applyConfigureChangeNoLock_(entry.index());
|
|
if (ccMgr_.needNotify != 1)
|
|
ccMgr_.clear();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
log_->setMetaData(keyScanIndex, 0);
|
|
ret= PaxosErrorCode::PE_REPLICATEFAIL;
|
|
}
|
|
}
|
|
else
|
|
ret= PaxosErrorCode::PE_CONFLICTS;
|
|
return ret;
|
|
}
|
|
|
|
void Paxos::setConfigureChangeTimeout(uint64_t t)
|
|
{
|
|
ccMgr_.waitTimeout.store(t);
|
|
}
|
|
|
|
int Paxos::configureChange_(CCOpTypeT cctype, CCOpTypeT optype, std::vector<std::string>& strConfigArg, const std::vector<Configuration::ServerRef>& servers)
|
|
{
|
|
if (cctype != CCMemberOp && cctype != CCLearnerOp)
|
|
return PaxosErrorCode::PE_INVALIDARGUMENT;
|
|
if (optype != CCAddNode && optype != CCDelNode && optype != CCSyncLearnerAll && optype != CCAddLearnerAutoChange)
|
|
return PaxosErrorCode::PE_INVALIDARGUMENT;
|
|
|
|
int ret= PaxosErrorCode::PE_NONE;
|
|
std::vector<std::string> strConfig= strConfigArg;
|
|
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
|
|
if (optype == CCAddLearnerAutoChange)
|
|
{
|
|
if (strConfig.size() > 1)
|
|
{
|
|
easy_error_log("Server %d : Learner auto change to follower only support one learner at a time.\n", localServer_->serverId);
|
|
return PaxosErrorCode::PE_INVALIDARGUMENT;
|
|
}
|
|
if (ccMgr_.autoChangeAddr != "")
|
|
{
|
|
easy_error_log("Server %d : Previous learner auto change to follower not finish.\n", localServer_->serverId);
|
|
return PaxosErrorCode::PE_DEFAULT;
|
|
}
|
|
}
|
|
|
|
if (optype == CCAddNode || optype == CCAddLearnerAutoChange || optype == CCDelNode)
|
|
{
|
|
/* Step 1: remove dup servers */
|
|
checkConfigure_(cctype, optype, strConfig, servers);
|
|
if (strConfig.size() == 0)
|
|
{
|
|
easy_error_log("Server %d : New add member already exist or delete member not found!!\n", localServer_->serverId);
|
|
if (optype == CCDelNode)
|
|
return PaxosErrorCode::PE_NOTFOUND;
|
|
else
|
|
return PaxosErrorCode::PE_EXISTS;
|
|
}
|
|
}
|
|
|
|
/* Step 2: build log entry */
|
|
std::string logBuf;
|
|
ConfigureChangeValue val;
|
|
for (auto& addr : strConfig)
|
|
{
|
|
val.add_addrs(addr);
|
|
logBuf += addr;
|
|
logBuf += " ";
|
|
}
|
|
easy_error_log("Server %d : configureChange begin: cctype(%d) optype(%d) term(%llu) lli(%llu) addrs(%s)\n", localServer_->serverId, cctype, optype, currentTerm_.load(), log_->getLastLogIndex(), logBuf.c_str());
|
|
val.set_cctype(cctype);
|
|
val.set_optype(optype);
|
|
if (cctype == CCMemberOp && optype == CCAddNode)
|
|
{
|
|
auto& newServerAddr= *(strConfig.begin());
|
|
bool addNew= false;
|
|
auto newServer= config_->getLearnerByAddr(newServerAddr);
|
|
if (newServer == nullptr)
|
|
{
|
|
easy_error_log("Server %d : Try to add member from learner %s which is not exist!!\n", localServer_->serverId, newServerAddr.c_str());
|
|
return PaxosErrorCode::PE_NOTFOUND;
|
|
}
|
|
else if(newServer->getMatchIndex() + maxDelayIndex4NewMember_ < log_->getLastLogIndex())
|
|
{
|
|
easy_error_log("Server %d : Try to add member from learner %d, which is delay too much, matchIndex(%llu), lli(%llu)!!\n", localServer_->serverId, newServer->serverId, newServer->getMatchIndex(), log_->getLastLogIndex());
|
|
return PaxosErrorCode::PE_DELAY;
|
|
}
|
|
for (auto& server : servers)
|
|
{
|
|
if (server)
|
|
val.add_allservers(StableConfiguration::memberToString(server));
|
|
else
|
|
{
|
|
if (! addNew)
|
|
{
|
|
val.add_allservers(newServerAddr);
|
|
addNew= true;
|
|
}
|
|
else
|
|
val.add_allservers("0");
|
|
}
|
|
}
|
|
if (! addNew)
|
|
val.add_allservers(newServerAddr);
|
|
for (auto& learner : config_->getLearners())
|
|
{
|
|
if (learner && newServerAddr != learner->strAddr)
|
|
val.add_alllearners(StableConfiguration::learnerToString(learner));
|
|
else
|
|
val.add_alllearners("0");
|
|
}
|
|
}
|
|
else if (cctype == CCLearnerOp && (optype == CCAddNode || optype == CCAddLearnerAutoChange || optype == CCSyncLearnerAll))
|
|
{
|
|
for (auto& learner : config_->getLearners())
|
|
{
|
|
if (learner)
|
|
val.add_alllearners(StableConfiguration::learnerToString(learner));
|
|
else
|
|
val.add_alllearners("0");
|
|
}
|
|
}
|
|
|
|
ret= sendConfigureAndWait_(val, ul);
|
|
easy_error_log("Server %d : configureChange return: cctype(%d) optype(%d) addrs(%s) return(%d) success(%d) preparedIndex(%llu) lli(%llu)\n", localServer_->serverId, cctype, optype, logBuf.c_str(), ret, ccMgr_.applied, ccMgr_.preparedIndex, log_->getLastLogIndex());
|
|
/*
|
|
* In some cases we cannot clear ccMgr flags:
|
|
* 1. an old configureChange exist and sendConfigureAndWait_ return error directly
|
|
* 2. replicateLog fail because of some reasons (leader change...)
|
|
* 3. configureChange timeout (needNotify is set to 0 in sendConfigureAndWait_ )
|
|
*/
|
|
if (ret != PaxosErrorCode::PE_REPLICATEFAIL && ret != PaxosErrorCode::PE_CONFLICTS && ret != PaxosErrorCode::PE_TIMEOUT)
|
|
ccMgr_.clear();
|
|
return ret;
|
|
}
|
|
|
|
int Paxos::changeLearners(CCOpTypeT type, std::vector<std::string>& strConfig)
|
|
{
|
|
return configureChange_(CCLearnerOp, type, strConfig, config_->getLearners());
|
|
}
|
|
|
|
int Paxos::changeMember(CCOpTypeT type, std::string& strAddr)
|
|
{
|
|
std::vector<std::string> tmpConfig{strAddr};
|
|
if (type == CCAddLearnerAutoChange)
|
|
{
|
|
/* addFollower procedure: add learner -> wait recv log -> change to follower */
|
|
int ret= configureChange_(CCLearnerOp, CCAddLearnerAutoChange, tmpConfig, config_->getLearners());
|
|
if (ret != 0)
|
|
{
|
|
easy_error_log("Server %d : addFollower configChange stage 1 fail, error code %d.\n", localServer_->serverId, ret);
|
|
return ret;
|
|
}
|
|
/* wait learner become follower */
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
if (ccMgr_.waitTimeout.load() == 0)
|
|
ccMgr_.condChangeDone.wait(ul, [this](){ return ccMgr_.autoChangeAddr == ""; });
|
|
else
|
|
{
|
|
ret = ccMgr_.condChangeDone.wait_for(ul, std::chrono::milliseconds(ccMgr_.waitTimeout.load()),
|
|
[this](){ return ccMgr_.autoChangeAddr == ""; });
|
|
if (!ret)
|
|
{
|
|
easy_error_log("Server %d : addFollower wait timeout (%d ms).\n", localServer_->serverId, ccMgr_.waitTimeout.load());
|
|
return PaxosErrorCode::PE_TIMEOUT;
|
|
}
|
|
}
|
|
return ccMgr_.autoChangeRet;
|
|
}
|
|
else
|
|
{
|
|
return configureChange_(CCMemberOp, type, tmpConfig, config_->getServers());
|
|
}
|
|
}
|
|
|
|
int Paxos::autoChangeLearnerAction()
|
|
{
|
|
std::vector<std::string> tmpConfig;
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (ccMgr_.autoChangeAddr == "")
|
|
return PaxosErrorCode::PE_DEFAULT;
|
|
tmpConfig.push_back(ccMgr_.autoChangeAddr);
|
|
}
|
|
int ret= configureChange_(CCMemberOp, CCAddNode, tmpConfig, config_->getServers());
|
|
if (ccMgr_.autoChangeAddr != "") {
|
|
ccMgr_.autoChangeAddr = "";
|
|
ccMgr_.autoChangeRet = ret;
|
|
ccMgr_.condChangeDone.notify_all();
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int Paxos::configureLearner_(uint64_t serverId, uint64_t source, bool applyMode, std::unique_lock<std::mutex> &ul)
|
|
{
|
|
auto server= config_->getServer(serverId);
|
|
int ret= PaxosErrorCode::PE_NONE;
|
|
|
|
if (!server)
|
|
{
|
|
easy_error_log("Server %d : can't find server %llu in configureLearner\n", localServer_->serverId, serverId);
|
|
return PaxosErrorCode::PE_NOTFOUND;
|
|
}
|
|
if (server->learnerSource == source && server->sendByAppliedIndex == applyMode)
|
|
{
|
|
easy_warn_log("Server %d : nothing changed in configureLearner server %llu, learnerSource:%llu\n", localServer_->serverId, serverId, source);
|
|
return PaxosErrorCode::PE_NONE;
|
|
}
|
|
|
|
easy_error_log("Server %d : configureLearner: change learnerSource from %llu to %llu\n", localServer_->serverId, server->learnerSource, source);
|
|
|
|
ConfigureChangeValue val;
|
|
val.set_cctype(CCLearnerOp);
|
|
val.set_optype(CCConfigureNode);
|
|
/* For check. */
|
|
val.add_addrs(server->strAddr);
|
|
val.set_serverid(serverId);
|
|
val.set_learnersource(source);
|
|
val.set_applymode(applyMode);
|
|
|
|
ret= sendConfigureAndWait_(val, ul);
|
|
easy_error_log("Server %d : configureLearner return: serverid(%d) return(%d) success(%d) preparedIndex(%llu) lli(%llu)\n", localServer_->serverId, serverId, ret, ccMgr_.applied, ccMgr_.preparedIndex, log_->getLastLogIndex());
|
|
if (ret != PaxosErrorCode::PE_REPLICATEFAIL && ret != PaxosErrorCode::PE_CONFLICTS && ret != PaxosErrorCode::PE_TIMEOUT)
|
|
ccMgr_.clear();
|
|
return ret;
|
|
}
|
|
|
|
int Paxos::configureLearner(uint64_t serverId, uint64_t source, bool applyMode)
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
return configureLearner_(serverId, source, applyMode, ul);
|
|
}
|
|
|
|
int Paxos::configureLearner(const std::string& addr, const std::string& sourceAddr, bool applyMode)
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
uint64_t serverId = config_->getServerIdFromAddr(addr);
|
|
uint64_t source = config_->getServerIdFromAddr(sourceAddr);
|
|
if (serverId < 100 || source == 0)
|
|
return PaxosErrorCode::PE_NOTFOUND;
|
|
/* We make a trick here: If you want to clear the current learner source, just source the address to itself */
|
|
if (serverId == source)
|
|
source = 0;
|
|
return configureLearner_(serverId, source, applyMode, ul);
|
|
}
|
|
|
|
int Paxos::configureMember_(uint64_t serverId, bool forceSync, uint electionWeight, std::unique_lock<std::mutex> &ul)
|
|
{
|
|
if (electionWeight > 9)
|
|
{
|
|
easy_error_log("Server %d : Fail to change electionWeight. Max electionWeight is 9.", localServer_->serverId);
|
|
return PaxosErrorCode::PE_INVALIDARGUMENT;
|
|
}
|
|
auto server= config_->getServer(serverId);
|
|
int ret= PaxosErrorCode::PE_NONE;
|
|
|
|
if (!server)
|
|
{
|
|
easy_error_log("Server %d : can't find server %llu in configureMember\n", localServer_->serverId, serverId);
|
|
return PaxosErrorCode::PE_NOTFOUND;
|
|
}
|
|
|
|
if (serverId >= 100)
|
|
{
|
|
easy_error_log("Server %d : can't configure learner %llu in configureMember\n", localServer_->serverId, serverId);
|
|
return PaxosErrorCode::PE_WEIGHTLEARNER;
|
|
}
|
|
|
|
if (server->forceSync == forceSync && server->electionWeight == electionWeight)
|
|
{
|
|
easy_warn_log("Server %d : nothing changed in configureMember server %llu, forceSync:%u electionWeight:%u\n", localServer_->serverId, serverId, forceSync, electionWeight);
|
|
return PaxosErrorCode::PE_NONE;
|
|
}
|
|
|
|
ConfigureChangeValue val;
|
|
val.set_cctype(CCMemberOp);
|
|
val.set_optype(CCConfigureNode);
|
|
/* For check. */
|
|
val.add_addrs(server->strAddr);
|
|
val.set_serverid(serverId);
|
|
val.set_forcesync(forceSync);
|
|
val.set_electionweight(electionWeight);
|
|
|
|
ret= sendConfigureAndWait_(val, ul);
|
|
easy_error_log("Server %d : configureMember return: serverid(%d) return(%d) success(%d) preparedIndex(%llu) lli(%llu)\n", localServer_->serverId, serverId, ret, ccMgr_.applied, ccMgr_.preparedIndex, log_->getLastLogIndex());
|
|
if (ret != PaxosErrorCode::PE_REPLICATEFAIL && ret != PaxosErrorCode::PE_CONFLICTS && ret != PaxosErrorCode::PE_TIMEOUT)
|
|
ccMgr_.clear();
|
|
return ret;
|
|
}
|
|
|
|
int Paxos::configureMember(uint64_t serverId, bool forceSync, uint electionWeight)
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
return configureMember_(serverId, forceSync, electionWeight, ul);
|
|
}
|
|
|
|
int Paxos::configureMember(const std::string& addr, bool forceSync, uint electionWeight)
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
uint64_t serverId = config_->getServerIdFromAddr(addr);
|
|
return configureMember_(serverId, forceSync, electionWeight, ul);
|
|
}
|
|
|
|
int Paxos::downgradeMember_(uint64_t serverId, std::unique_lock<std::mutex> &ul)
|
|
{
|
|
auto server= config_->getServer(serverId);
|
|
int ret= 0;
|
|
|
|
if (serverId >= 100)
|
|
{
|
|
easy_error_log("Server %d : try to downgrade server %d which is already a learner!!\n", localServer_->serverId, serverId);
|
|
return PaxosErrorCode::PE_DOWNGRADLEARNER;
|
|
}
|
|
|
|
if (!server)
|
|
{
|
|
easy_error_log("Server %d : can't find server %llu in configureMember!!\n", localServer_->serverId, serverId);
|
|
return PaxosErrorCode::PE_NOTFOUND;
|
|
}
|
|
|
|
if (localServer_->serverId == serverId && state_ == LEADER)
|
|
{
|
|
easy_error_log("Server %d : can't downgrade leader(%llu) to learner!!\n", localServer_->serverId, serverId);
|
|
return PaxosErrorCode::PE_DOWNGRADELEADER;
|
|
}
|
|
|
|
ConfigureChangeValue val;
|
|
val.set_cctype(CCMemberOp);
|
|
val.set_optype(CCDowngradeNode);
|
|
/* For check. */
|
|
val.add_addrs(server->strAddr);
|
|
|
|
ret= sendConfigureAndWait_(val, ul);
|
|
easy_error_log("Server %d : downgradeMember return: serverid(%d) return(%d) success(%d) preparedIndex(%llu) lli(%llu)\n", localServer_->serverId, serverId, ret, ccMgr_.applied, ccMgr_.preparedIndex, log_->getLastLogIndex());
|
|
if (ret != PaxosErrorCode::PE_REPLICATEFAIL && ret != PaxosErrorCode::PE_CONFLICTS && ret != PaxosErrorCode::PE_TIMEOUT)
|
|
ccMgr_.clear();
|
|
return ret;
|
|
}
|
|
|
|
int Paxos::downgradeMember(uint64_t serverId)
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
return downgradeMember_(serverId, ul);
|
|
}
|
|
|
|
int Paxos::downgradeMember(const std::string& addr)
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
uint64_t serverId = config_->getServerIdFromAddr(addr);
|
|
return downgradeMember_(serverId, ul);
|
|
}
|
|
|
|
void Paxos::becameLeader_()
|
|
{
|
|
if (state_ != LEADER)
|
|
{
|
|
/* Deal with commit dependency case before set to LEADER */
|
|
LogEntry tmpEntry;
|
|
uint64_t tmpIndex = log_->getLastLogIndex();
|
|
if (log_->getEntry(tmpIndex, tmpEntry, false))
|
|
{
|
|
easy_error_log("Server %d: fail to get log entry when become leader to find out commit dependency, index %llu",
|
|
localServer_->serverId, tmpIndex);
|
|
exit(1);
|
|
}
|
|
if (tmpEntry.optype() == kCommitDep)
|
|
{
|
|
cdrMgr_.inRecovery = true;
|
|
cdrMgr_.lastLogIndex = tmpIndex;
|
|
cdrMgr_.lastNonCommitDepIndex = 0;
|
|
easy_error_log("Server %d : Last log optype is kCommitDep, will reset the log.\n", localServer_->serverId);
|
|
}
|
|
|
|
nextEpochCheckStatemachine_= getNextEpochCheckStatemachine_(currentEpoch_.load());
|
|
|
|
/* Deal with the election weight things. */
|
|
if (config_->needWeightElection(localServer_->electionWeight))
|
|
{
|
|
easy_error_log("Server %d : Try weight election for this election term(%llu)!!\n", localServer_->serverId, currentTerm_.load());
|
|
subState_.store(SubLeaderTransfer);
|
|
weightElecting_ = true;
|
|
new ThreadTimer(srv_->getThreadTimerService(), srv_, electionTimeout_, ThreadTimer::Oneshot, &Paxos::electionWeightAction, this, currentTerm_.load(), currentEpoch_.fetch_add(1));
|
|
}
|
|
/* become leader. */
|
|
changeState_(LEADER);
|
|
|
|
/* We change timer form election to heartbeat type. */
|
|
electionTimer_->stop();
|
|
config_->forEach(&Server::beginLeadership, NULL);
|
|
config_->forEachLearners(&Server::beginLeadership, NULL);
|
|
/* we start epochTimer_ when we became candidate to detect all node when we became leader. */
|
|
/* epochTimer_ will stop when we became follower. */
|
|
//epochTimer_->restart();
|
|
|
|
if (!cdrMgr_.inRecovery)
|
|
{
|
|
/* Send an empty log entry to implicitly commit old entries */
|
|
LogEntry entry1;
|
|
log_->getEmptyEntry(entry1);
|
|
replicateLog_(entry1, false);
|
|
}
|
|
else
|
|
{
|
|
/* in commit dependency recovery */
|
|
if (commitDepQueue_.push(new commitDepArgType(cdrMgr_.lastLogIndex, currentTerm_.load(), this)))
|
|
srv_->sendAsyncEvent(&SingleProcessQueue<commitDepArgType>::process, &commitDepQueue_, Paxos::commitDepResetLog);
|
|
}
|
|
|
|
uint64_t lastLogIndex= log_->getLastLogIndex();
|
|
LogEntry entry;
|
|
log_->getEntry(lastLogIndex, entry, false); // ignore error
|
|
uint64_t lastLogTerm= entry.term();
|
|
|
|
easy_error_log("Server %d : become Leader (currentTerm %ld, lli:%ld, llt:%ld)!!\n", localServer_->serverId, currentTerm_.load(), lastLogIndex, lastLogTerm);
|
|
}
|
|
}
|
|
|
|
bool Paxos::cdrIsValid(commitDepArgType* arg)
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (cdrMgr_.inRecovery && currentTerm_ == arg->term)
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
|
|
void Paxos::cdrClear(commitDepArgType* arg)
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (currentTerm_ == arg->term)
|
|
cdrMgr_.clear();
|
|
}
|
|
|
|
void Paxos::commitDepResetLog(commitDepArgType* arg)
|
|
{
|
|
if (!arg->paxos->cdrIsValid(arg))
|
|
return;
|
|
std::shared_ptr<PaxosLog> log = arg->paxos->getLog();
|
|
std::shared_ptr<LocalServer> localServer = arg->paxos->getLocalServer();
|
|
easy_error_log("Server %d : start reset log because of commit dependency.\n", localServer->serverId);
|
|
LogEntry tmpEntry;
|
|
uint64_t tmpIndex = arg->lastLogIndex;
|
|
while(--tmpIndex > 0)
|
|
{
|
|
if (log->getEntry(tmpIndex, tmpEntry, false))
|
|
{
|
|
easy_error_log("Server %d: fail to get log entry when reset commit dependency, index %llu", localServer->serverId, tmpIndex);
|
|
exit(1);
|
|
}
|
|
if (tmpEntry.optype() != kCommitDep)
|
|
break;
|
|
}
|
|
easy_error_log("Server %d : commitDepResetLog reset from index %ld to %ld.\n", localServer->serverId, tmpIndex + 1, arg->lastLogIndex);
|
|
arg->paxos->truncateBackward_(tmpIndex + 1);
|
|
if (arg->paxos->debugResetLogSlow)
|
|
sleep(1);
|
|
tmpEntry.Clear();
|
|
log->getEmptyEntry(tmpEntry);
|
|
tmpEntry.set_term(arg->term);
|
|
while (log->getLastLogIndex() < arg->lastLogIndex)
|
|
{
|
|
if (debugResetLogSlow)
|
|
sleep(1);
|
|
tmpEntry.set_index(0);
|
|
tmpEntry.set_checksum(0);
|
|
/* do not use writeLog, lastSyncedIndex is larger than logindex */
|
|
/* no lock protection: PolarDB-X Engine log module ensure that you cannot append a log if you are a follower */
|
|
uint64_t reti = localServer->appendLog(tmpEntry);
|
|
/* avoid dead loop */
|
|
if (reti == 0)
|
|
{
|
|
/* fail term check */
|
|
easy_error_log("Server %d : fail to do log reset for index %llu, which means I am not the real leader.\n", log->getLastLogIndex() + 1);
|
|
break;
|
|
}
|
|
}
|
|
arg->paxos->cdrClear(arg);
|
|
easy_error_log("Server %d : finish commitDepResetLog.\n", localServer->serverId);
|
|
/* still Send an extra empty log entry to implicitly commit old entries */
|
|
arg->paxos->replicateLog_(tmpEntry, false);
|
|
}
|
|
|
|
uint64_t Paxos::replicateLog_(LogEntry &entry, const bool needLock)
|
|
{
|
|
uint64_t term= currentTerm_.load();
|
|
auto state= state_.load();
|
|
auto subState= subState_.load();
|
|
if (leaderStepDowning_.load() || state != LEADER || (subState == SubLeaderTransfer && needLock) || term != currentTerm_.load())
|
|
{
|
|
if (state != LEADER)
|
|
{
|
|
easy_error_log("Server %d : replicateLog fail because we're not leader!\n", localServer_->serverId);
|
|
}
|
|
else if (subState == SubLeaderTransfer)
|
|
{
|
|
easy_error_log("Server %d : replicateLog fail because we're in LeaderTransfer!\n", localServer_->serverId);
|
|
}
|
|
else
|
|
{
|
|
easy_error_log("Server %d : replicateLog fail because we're in LeaderTransfer!\n", localServer_->serverId);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
if (cdrMgr_.inRecovery)
|
|
{
|
|
entry.set_term(0);
|
|
easy_error_log("Server %d : replicateLog fail because we're in commit dependency recovery!\n", localServer_->serverId);
|
|
return 0;
|
|
}
|
|
|
|
entry.set_term(term);
|
|
/*
|
|
if (needLock)
|
|
lock_.unlock();
|
|
*/
|
|
|
|
++ (stats_.countReplicateLog);
|
|
|
|
easy_info_log("Server %d : replicateLog write start logTerm(%ld)\n", localServer_->serverId, term);
|
|
|
|
/* Traditional Path: write checksum before wirteLog, which is different from PolarDB-X Engine */
|
|
/* if checksum not 0, use the checksum from outside (for ut now) */
|
|
if (checksumCb_ && checksum_mode_ && entry.checksum() == 0)
|
|
{
|
|
const unsigned char* buf = reinterpret_cast<const unsigned char*>(entry.value().c_str());
|
|
entry.set_checksum((uint64_t)checksumCb_(0, buf, entry.value().size()));
|
|
}
|
|
|
|
auto logIndex= localServer_->writeLog(entry);
|
|
entry.set_index(logIndex);
|
|
if (entry.optype() != kCommitDep && logIndex > 0)
|
|
cdrMgr_.setLastNonCommitDepIndex(logIndex);
|
|
|
|
easy_info_log("Server %d : replicateLog write done logTerm(%ld), logIndex(%ld)\n", localServer_->serverId, term, logIndex);
|
|
|
|
/* TODO
|
|
* if we use sendAsyncEvent to append log here,too much workers will be used to
|
|
* appendLog and be blocked by mutex. So we use sendAsyncEvent only after we have
|
|
* the Group appnedLog function !!
|
|
*/
|
|
if (logIndex > 0)
|
|
//appendLog(false);
|
|
//srv_->sendAsyncEvent(AEAppendLog, NULL);
|
|
//srv_->sendAsyncEvent(&Paxos::appendLog, this, true);
|
|
if (appendLogQueue_.push(new (Paxos *)(this)))
|
|
srv_->sendAsyncEvent(&SingleProcessQueue<Paxos *>::mergeableSameProcess, &appendLogQueue_, Paxos::appendLogCb);
|
|
|
|
|
|
if (!shutdown_.load() && config_->getServerNumLockFree() == 1)
|
|
{
|
|
if (needLock)
|
|
tryUpdateCommitIndex();
|
|
else
|
|
tryUpdateCommitIndex_();
|
|
}
|
|
|
|
return logIndex;
|
|
}
|
|
|
|
int Paxos::requestVote(bool force)
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (shutdown_.load())
|
|
return -1;
|
|
if (state_ == LEADER)
|
|
{
|
|
/* TODO: should stepdown ? */
|
|
/* only connect to learner when i am leader */
|
|
if (debugWitnessTest)
|
|
{
|
|
PaxosMsg msg;
|
|
msg.set_term(currentTerm_);
|
|
msg.set_msgtype(RequestVote);
|
|
msg.set_candidateid(localServer_->serverId);
|
|
msg.set_addr(localServer_->strAddr);
|
|
msg.set_force((uint64_t)force);
|
|
uint64_t lastLogIndex;
|
|
msg.set_lastlogindex(lastLogIndex= log_->getLastLogIndex());
|
|
LogEntry entry;
|
|
log_->getEntry(lastLogIndex, entry, false);
|
|
msg.set_lastlogterm(entry.term());
|
|
config_->forEachLearners(&Server::sendMsg, (void *)&msg);
|
|
}
|
|
return -1;
|
|
}
|
|
if (state_ == LEARNER)
|
|
{
|
|
easy_error_log("Server %d : Skip requestVote because I am learner.", localServer_->serverId);
|
|
return -1;
|
|
}
|
|
|
|
if (localServer_->electionWeight == 0)
|
|
{
|
|
easy_error_log("Server %d : Skip requestVote because electionWeight is 0 currentTerm(%ld)\n", localServer_->serverId, currentTerm_.load());
|
|
return -1;
|
|
}
|
|
|
|
/* For debug */
|
|
if (debugDisableElection)
|
|
{
|
|
easy_error_log("Server %d : Skip requestVote because of debugDisableElection currentTerm(%ld)\n", localServer_->serverId, currentTerm_.load());
|
|
return -2;
|
|
}
|
|
|
|
uint64_t lastLogIndex = log_->getLastLogIndex();
|
|
LogEntry entry;
|
|
if (log_->getEntry(lastLogIndex, entry, false))
|
|
{
|
|
easy_error_log("Server %d: fail to get log entry when request vote, index %llu", localServer_->serverId, lastLogIndex);
|
|
}
|
|
else
|
|
{
|
|
++currentTerm_;
|
|
log_->setTerm(currentTerm_);
|
|
log_->setMetaData(keyCurrentTerm, currentTerm_);
|
|
leaderId_.store(0);
|
|
leaderAddr_= std::string("");
|
|
option.extraStore->setRemote("");
|
|
config_->forEach(&Server::beginRequestVote, NULL);
|
|
forceRequestMode_= force;
|
|
changeState_(CANDIDATE);
|
|
easy_warn_log("Server %d : Epoch task currentEpoch(%llu) during requestVote\n", localServer_->serverId, currentEpoch_.load());
|
|
currentEpoch_.fetch_add(1);
|
|
epochTimer_->restart();
|
|
votedFor_= localServer_->serverId;
|
|
log_->setMetaData(keyVoteFor, votedFor_);
|
|
easy_error_log("Server %d : Start new requestVote: new term(%ld)\n", localServer_->serverId, currentTerm_.load());
|
|
|
|
PaxosMsg msg;
|
|
msg.set_term(currentTerm_);
|
|
msg.set_msgtype(RequestVote);
|
|
msg.set_candidateid(localServer_->serverId);
|
|
msg.set_addr(localServer_->strAddr);
|
|
msg.set_force((uint64_t)force);
|
|
msg.set_lastlogindex(lastLogIndex);
|
|
msg.set_lastlogterm(entry.term());
|
|
|
|
config_->forEach(&Server::sendMsg, (void *)&msg);
|
|
}
|
|
|
|
electionTimer_->restart(electionTimeout_, true);
|
|
|
|
if (config_->getServerNum() == 1)
|
|
{
|
|
/* Only me in the cluster, became leader immediately. */
|
|
becameLeader_();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::onRequestVote(PaxosMsg *msg, PaxosMsg *rsp)
|
|
{
|
|
++ (stats_.countOnMsgRequestVote);
|
|
|
|
rsp->set_msgid(msg->msgid());
|
|
rsp->set_msgtype(RequestVoteResponce);
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (shutdown_.load())
|
|
return -1;
|
|
rsp->set_serverid(localServer_->serverId);
|
|
|
|
if (state_ == LEARNER)
|
|
{
|
|
rsp->set_term(msg->term());
|
|
rsp->set_votegranted(0);
|
|
easy_error_log("Server %d : Receive a RequestVote from server %d, term(%llu) when I'm LEARNER!! Just reject!!\n", localServer_->serverId, msg->candidateid(), msg->term());
|
|
return 0;
|
|
}
|
|
|
|
auto server= std::dynamic_pointer_cast<RemoteServer>(config_->getServer(msg->candidateid()));
|
|
if (server == nullptr || server->strAddr != msg->addr())
|
|
{
|
|
rsp->set_term(currentTerm_);
|
|
rsp->set_votegranted(0);
|
|
easy_error_log("Server %d : reject RequestVote because this server is not in the current configure, server(id:%llu, addr:%s).\n", localServer_->serverId, msg->candidateid(), msg->addr().c_str());
|
|
return 0;
|
|
}
|
|
|
|
if (msg->term() < currentTerm_)
|
|
{
|
|
rsp->set_term(currentTerm_);
|
|
rsp->set_votegranted(0);
|
|
easy_error_log("Server %d : Receive an old RequestVote from server %d msg term(%d) current term(%d) reject!!\n", localServer_->serverId, msg->serverid(), msg->term(), currentTerm_.load());
|
|
return 0;
|
|
}
|
|
|
|
uint64_t lastLogIndex= log_->getLastLogIndex();
|
|
LogEntry entry;
|
|
if (log_->getEntry(lastLogIndex, entry, false))
|
|
{
|
|
rsp->set_term(currentTerm_);
|
|
rsp->set_votegranted(0);
|
|
easy_error_log("Server %d: fail to get log entry when on request vote, index %llu", localServer_->serverId, lastLogIndex);
|
|
return 0;
|
|
}
|
|
uint64_t lastLogTerm= entry.term();
|
|
|
|
bool logCheck= (msg->lastlogterm() > lastLogTerm ||
|
|
(msg->lastlogterm() == lastLogTerm && msg->lastlogindex() >= lastLogIndex));
|
|
|
|
easy_error_log("Server %d : leaderStickiness check: msg::force(%d) state_:%d electionTimer_::Stage:%d leaderId_:%llu .\n", localServer_->serverId, msg->force(), state_.load(), electionTimer_->getCurrentStage(), leaderId_.load());
|
|
//if (state_ == LEADER || (state_ == FOLLOWER && electionTimer_->getCurrentStage() == 0 && !msg->force()))
|
|
if (!msg->force() && (state_ == LEADER || (state_ == FOLLOWER && (electionTimer_->getCurrentStage() == 0 && leaderId_ != 0) && !Paxos::debugDisableElection)))
|
|
{
|
|
rsp->set_term(currentTerm_);
|
|
rsp->set_votegranted(0);
|
|
easy_error_log("Server %d : reject RequestVote because of leaderStickiness, local(lli:%ld, llt:%ld); msg(candidateid: %d, term: %ld lli:%ld, llt:%ld) .\n", localServer_->serverId, lastLogIndex, lastLogTerm, msg->candidateid(), msg->term(), msg->lastlogindex(), msg->lastlogterm());
|
|
|
|
if (state_ == LEADER)
|
|
rsp->set_force(1);
|
|
return 0;
|
|
}
|
|
|
|
if (msg->term() > currentTerm_)
|
|
{
|
|
/* Enter New Term */
|
|
easy_error_log("Server %d : New Term in onRequestVote !! server %d 's term(%d) is bigger than me(%d).\n", localServer_->serverId, msg->candidateid(), msg->term(), currentTerm_.load());
|
|
newTerm(msg->term());
|
|
|
|
//TODO handle leader case. need stepDown ?
|
|
if (state_ == LEADER)
|
|
;
|
|
}
|
|
|
|
rsp->set_term(currentTerm_);
|
|
rsp->set_votegranted(logCheck && votedFor_ == 0);
|
|
if (rsp->votegranted())
|
|
{
|
|
votedFor_= msg->candidateid();
|
|
log_->setMetaData(keyVoteFor, votedFor_);
|
|
electionTimer_->restart();
|
|
}
|
|
easy_error_log("Server %d : isVote: %d, local(lli:%llu, llt:%d); msg(candidateid: %d, term: %d lli:%llu, llt:%d) .\n", localServer_->serverId, rsp->votegranted(), lastLogIndex, lastLogTerm, msg->candidateid(), msg->term(), msg->lastlogindex(), msg->lastlogterm());
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::onClusterIdNotMatch(PaxosMsg *msg)
|
|
{
|
|
assert(msg->msgtype() == ClusterIdNotMatch);
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (shutdown_.load())
|
|
return -1;
|
|
auto server= std::dynamic_pointer_cast<RemoteServer>(config_->getServer(msg->serverid()));
|
|
if (server == nullptr)
|
|
{
|
|
easy_error_log("Server %d : onClusterIdNotMatch receive a msg msgId(%llu) from server %llu which has been deleted already!\n", localServer_->serverId, msg->msgid(), msg->serverid());
|
|
return -1;
|
|
}
|
|
|
|
server->disconnect(nullptr);
|
|
easy_error_log("Server %d : server %llu has different cluster id(%llu), local cluster id(%llu). we should remove this server from the current onfiguration!!\n", localServer_->serverId, msg->serverid(), msg->newclusterid(), clusterId_.load());
|
|
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::onMsgPreCheck(PaxosMsg *msg, PaxosMsg *rsp)
|
|
{
|
|
if (msg->clusterid() != clusterId_.load())
|
|
{
|
|
easy_error_log("Server %d: Recieve a msg from cluster(%llu), current cluster(%llu), msg type(%d), these nodes belong to different clusters.\n", localServer_->serverId, msg->clusterid(), clusterId_.load(), msg->msgtype());
|
|
rsp->set_msgtype(Consensus::ClusterIdNotMatch);
|
|
rsp->set_serverid(msg->serverid());
|
|
rsp->set_term(msg->term());
|
|
rsp->set_msgid(msg->msgid());
|
|
rsp->set_clusterid(msg->clusterid());
|
|
rsp->set_newclusterid(clusterId_.load());
|
|
return 1;
|
|
}
|
|
else if (msg->entries_size() && log_->entriesPreCheck(msg->entries()))
|
|
{
|
|
easy_error_log("Server %d: msgId(%llu) onMsgPreCheck, entries pre-check is failed, msg type(%d), skip AppendLog.\n", localServer_->serverId, msg->msgid(), msg->msgtype());
|
|
rsp->set_msgtype(Consensus::PreCheckFailedResponce);
|
|
rsp->set_serverid(msg->serverid());
|
|
rsp->set_term(msg->term());
|
|
rsp->set_msgid(msg->msgid());
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::onMsgPreCheckFailed(PaxosMsg *msg)
|
|
{
|
|
assert(msg->msgtype() == PreCheckFailedResponce || msg->msgtype() == ClusterIdNotMatch);
|
|
|
|
// ClusterIdNotMatch
|
|
if (msg->msgtype() == ClusterIdNotMatch)
|
|
return onClusterIdNotMatch(msg);
|
|
|
|
// PreCheckFailedResponce
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (shutdown_.load())
|
|
return -1;
|
|
|
|
auto server= std::dynamic_pointer_cast<RemoteServer>(config_->getServer(msg->serverid()));
|
|
if (server != nullptr)
|
|
server->disconnect(nullptr);
|
|
|
|
easy_error_log("Server %d: msgId(%llu) onMsgPreCheckFailed, entries pre-check is failed, msg type(%d).", localServer_->serverId, msg->msgid(), msg->msgtype());
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::onRequestVoteResponce(PaxosMsg *msg)
|
|
{
|
|
assert(msg->msgtype() == RequestVoteResponce);
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (shutdown_.load())
|
|
return -1;
|
|
auto server= std::dynamic_pointer_cast<RemoteServer>(config_->getServer(msg->serverid()));
|
|
if (server == nullptr)
|
|
{
|
|
easy_error_log("Server %d : onRequestVoteResponce receive a msg msgId(%llu) from server %llu which has been deleted already!\n", localServer_->serverId, msg->msgid(), msg->serverid());
|
|
return -2;
|
|
}
|
|
|
|
if (static_cast<bool>(server) == false) //for unittest consensus.Paxos_requestVote1
|
|
return 0;
|
|
|
|
server->setLastAckEpoch(currentEpoch_);
|
|
|
|
if (msg->term() > currentTerm_)
|
|
{
|
|
easy_error_log("Server %d : New Term in onRequestVoteResponce !! server %d 's term(%d) is bigger than me(%d).\n", localServer_->serverId, msg->serverid(), msg->term(), currentTerm_.load());
|
|
newTerm(msg->term());
|
|
}
|
|
else if (msg->term() < currentTerm_)
|
|
{
|
|
easy_error_log("Server %d : Receive an old RequestVoteResponce from server %d msg term(%d) current term(%d) skip!!\n", localServer_->serverId, msg->serverid(), msg->term(), currentTerm_.load());
|
|
if (msg->force())
|
|
{
|
|
/* We reset term, when we're reject because leaderStickiness. */
|
|
if (msg->term() >= log_->getLastLogTerm())
|
|
{
|
|
easy_error_log("Server %d : Downgrade term from %llu to %llu when onRequestVoteResponce, because there are leaderStickiness leader(%ld) exist!!\n", localServer_->serverId, currentTerm_.load(), msg->term(), msg->serverid());
|
|
newTerm(msg->term());
|
|
}
|
|
}
|
|
}
|
|
else if (msg->votegranted())
|
|
{
|
|
assert(msg->term() == currentTerm_);
|
|
server->hasVote= true;
|
|
|
|
easy_error_log("Server %d : server %d (term:%ld) vote me to became leader.\n", localServer_->serverId, msg->serverid(), msg->term());
|
|
|
|
if (config_->quorumAll(&Server::haveVote))
|
|
{
|
|
becameLeader_();
|
|
}
|
|
}
|
|
else
|
|
easy_error_log("Server %d : server %d refuse to let me became leader.\n", localServer_->serverId, msg->serverid());
|
|
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::appendLog(const bool needLock)
|
|
{
|
|
if (shutdown_.load())
|
|
return -1;
|
|
|
|
if (needLock)
|
|
lock_.lock();
|
|
if (state_ != LEADER)
|
|
{
|
|
if (needLock)
|
|
lock_.unlock();
|
|
return -1;
|
|
}
|
|
|
|
LogEntry entry;
|
|
|
|
PaxosMsg msg;
|
|
msg.set_term(currentTerm_);
|
|
msg.set_msgtype(AppendLog);
|
|
|
|
msg.set_leaderid(localServer_->serverId);
|
|
msg.set_commitindex(commitIndex_);
|
|
|
|
/*
|
|
* Some fields of msg are filled by appendLogFillForEach,
|
|
* called by RemoteServer::sendMsg.
|
|
*/
|
|
|
|
config_->forEach(&Server::sendMsg, (void *)&msg);
|
|
|
|
if (needLock)
|
|
lock_.unlock();
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::appendLogToLearner(std::shared_ptr<RemoteServer> server, bool needLock)
|
|
{
|
|
|
|
if (needLock)
|
|
lock_.lock();
|
|
|
|
/* XXX Now we support learner source to another learner */
|
|
if (state_ != LEADER && state_!= FOLLOWER && state_!= LEARNER)
|
|
{
|
|
if (needLock)
|
|
lock_.unlock();
|
|
return -1;
|
|
}
|
|
|
|
LogEntry entry;
|
|
|
|
PaxosMsg msg;
|
|
msg.set_term(currentTerm_);
|
|
msg.set_msgtype(AppendLog);
|
|
|
|
msg.set_leaderid(localServer_->serverId);
|
|
msg.set_commitindex(commitIndex_);
|
|
|
|
if (server == nullptr)
|
|
config_->forEachLearners(&Server::sendMsg, (void *)&msg);
|
|
else
|
|
{
|
|
server->sendMsg((void *)&msg);
|
|
}
|
|
|
|
if (needLock)
|
|
lock_.unlock();
|
|
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::appendLogToServer(std::weak_ptr<RemoteServer> wserver, bool needLock, bool force)
|
|
{
|
|
std::shared_ptr<RemoteServer> server;
|
|
|
|
if (!(server = wserver.lock()))
|
|
return -1;
|
|
|
|
return appendLogToServerByPtr(server, needLock, force);
|
|
}
|
|
|
|
int Paxos::appendLogToServerByPtr(std::shared_ptr<RemoteServer> server, bool needLock, bool force)
|
|
{
|
|
bool lockless4force= false;
|
|
|
|
if (!force)
|
|
{
|
|
if (needLock)
|
|
lock_.lock();
|
|
uint64_t lastLogIndex= replicateWithCacheLog_.load() ? log_->getLastCachedLogIndex() : log_->getLastLogIndex();
|
|
if ((server->nextIndex > lastLogIndex)
|
|
|| (server->isLearner && server->nextIndex > commitIndex_))
|
|
{
|
|
if (needLock)
|
|
lock_.unlock();
|
|
return -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (state_ != LEADER)
|
|
return -1;
|
|
assert(needLock);
|
|
if (!lock_.try_lock())
|
|
{
|
|
lockless4force= true;
|
|
}
|
|
}
|
|
|
|
LogEntry entry;
|
|
|
|
PaxosMsg msg;
|
|
if (lockless4force)
|
|
{
|
|
uint64_t savedTerm= currentTerm_.load();
|
|
if (leaderStepDowning_.load() || state_.load() != LEADER || savedTerm != currentTerm_.load())
|
|
return -1;
|
|
msg.set_term(savedTerm);
|
|
}
|
|
else
|
|
{
|
|
msg.set_term(currentTerm_);
|
|
}
|
|
msg.set_msgtype(AppendLog);
|
|
|
|
msg.set_leaderid(localServer_->serverId);
|
|
msg.set_commitindex(commitIndex_);
|
|
|
|
/*
|
|
* Some fields of msg are filled by appendLogFillForEach,
|
|
* called by RemoteServer::sendMsg.
|
|
*/
|
|
|
|
/* TODO is force necessary ! */
|
|
if (force)
|
|
{
|
|
if (server->waitForReply)
|
|
{
|
|
easy_warn_log("Server %d : server %d do not response in the last heartbeat period, force to send heartbeat msg.\n", localServer_->serverId, server->serverId);
|
|
server->waitForReply= 0;
|
|
}
|
|
|
|
}
|
|
server->sendMsgFunc(lockless4force, force, (void *)&msg);
|
|
|
|
if (needLock && !lockless4force)
|
|
lock_.unlock();
|
|
return 0;
|
|
}
|
|
|
|
// try to deal with heartbeat optimistically(without mutex),
|
|
// return true if we successfully processed this heartbeat,
|
|
// return false otherwise
|
|
bool Paxos::onHeartbeatOptimistically_(PaxosMsg *msg, PaxosMsg *rsp)
|
|
{
|
|
// next 2 load is not safe without mutex, but we just assume they remain unchanged
|
|
StateType state = state_.load();
|
|
uint64_t currentTerm = currentTerm_.load();
|
|
|
|
// state not right or different term, must process this heartbeat in a traditional way(with mutex)
|
|
if (state != FOLLOWER || msg->term() != currentTerm)
|
|
return false;
|
|
|
|
easy_error_log("msgId(%llu) received from leader(%d), term(%d), it is heartbeat and deal it optimistically!\n",
|
|
msg->msgid(), msg->leaderid(), msg->term());
|
|
|
|
electionTimer_->restart();
|
|
|
|
rsp->set_msgtype(AppendLogResponce);
|
|
rsp->set_msgid(msg->msgid());
|
|
// if `msg->serverid()` does not match local server id, leader will fail to process this responce
|
|
rsp->set_serverid(msg->serverid());
|
|
rsp->set_issuccess(false);
|
|
rsp->set_ignorecheck(true);
|
|
rsp->set_term(currentTerm);
|
|
rsp->set_appliedindex(0);
|
|
|
|
return true;
|
|
}
|
|
|
|
int Paxos::onAppendLog(PaxosMsg *msg, PaxosMsg *rsp)
|
|
{
|
|
++ (stats_.countOnMsgAppendLog);
|
|
assert(msg->msgtype() == AppendLog);
|
|
|
|
std::unique_lock<std::mutex> lg(lock_, std::defer_lock);
|
|
if (msg->entries_size() == 0 && optimisticHeartbeat_.load() == true) {
|
|
if (lg.try_lock() == false) {
|
|
if (onHeartbeatOptimistically_(msg, rsp) == true) {
|
|
return 0;
|
|
}
|
|
lg.lock();
|
|
}
|
|
// lock is already held if we reach here
|
|
} else {
|
|
lg.lock();
|
|
}
|
|
if (shutdown_.load())
|
|
return -1;
|
|
|
|
uint64_t lastLogIndex= log_->getLastLogIndex();
|
|
uint64_t prevLogIndex= msg->prevlogindex();
|
|
bool newTermFlag= false;
|
|
|
|
if (1 == config_->getServerNumLockFree() && state_.load() != LEARNER)
|
|
{
|
|
easy_error_log("Server %d : reject onAppendLog because this server is not in the current configure, server %llu\n", localServer_->serverId, msg->leaderid());
|
|
rsp->set_msgid(msg->msgid());
|
|
rsp->set_msgtype(AppendLogResponce);
|
|
rsp->set_serverid(msg->serverid());
|
|
rsp->set_issuccess(false);
|
|
rsp->set_lastlogindex(lastLogIndex);
|
|
rsp->set_ignorecheck(true);
|
|
rsp->set_term(currentTerm_);
|
|
rsp->set_appliedindex(0);
|
|
return 0;
|
|
}
|
|
|
|
rsp->set_msgid(msg->msgid());
|
|
rsp->set_msgtype(AppendLogResponce);
|
|
rsp->set_serverid(msg->serverid());
|
|
/* when add node and the node does not complete the initialization */
|
|
if (NULL == localServer_) {
|
|
int i = 0;
|
|
while (NULL == localServer_) {
|
|
/* avoid loop indefinately */
|
|
if (i > 60) break;
|
|
easy_warn_log("Local server has not be initialized, sleep 1 second!\n");
|
|
sleep(1);
|
|
i++;
|
|
}
|
|
}
|
|
assert(localServer_ != NULL);
|
|
if (localServer_->serverId != msg->serverid())
|
|
{
|
|
if (state_ != LEARNER)
|
|
{
|
|
easy_error_log("Server %d : the server id in the msg(%llu) is not match with local server id for a follower, this may happen during the configure change or hit a bug!!\n", localServer_->serverId, msg->serverid());
|
|
if (compactOldMode_ && msg->serverid() == leaderId_)
|
|
{
|
|
rsp->set_serverid(localServer_->serverId);
|
|
easy_warn_log("Server %d : receive a msg from old version leader, in compact mode we use %llu instead of %llu as server id \n", localServer_->serverId, localServer_->serverId, msg->serverid());
|
|
}
|
|
}
|
|
else
|
|
{
|
|
easy_error_log("Server %d : the server id in the msg(%llu) is not match with local server id for a learner, we change the local server id to %llu!!\n", localServer_->serverId, msg->serverid(), msg->serverid());
|
|
localServer_->serverId= msg->serverid();
|
|
}
|
|
}
|
|
|
|
easy_warn_log("Server %d : msgId(%llu) onAppendLog start, receive logs from leader(%d), msg.term(%d) lli(%llu)\n", localServer_->serverId, msg->msgid(), msg->leaderid(), msg->term(), lastLogIndex);
|
|
|
|
/*
|
|
* XXX About msg->lastlogindex
|
|
* when appendLog is success: msg->lastlogindex means the last log index in the msg (prevLogIndex + numEntries)
|
|
* when appendLog is unsuccess: msg->lastlogindex means the last log index in the follower's local log_
|
|
* when appendLog is success but in cached mode: msg->lastlogindex means the last log index in the follower's local log_(not include cache)
|
|
*/
|
|
rsp->set_issuccess(false);
|
|
rsp->set_lastlogindex(lastLogIndex);
|
|
rsp->set_ignorecheck(false);
|
|
rsp->set_appliedindex(appliedIndex_.load());
|
|
|
|
/* in some case we should step down */
|
|
if (msg->term() > currentTerm_)
|
|
{
|
|
easy_warn_log("Server %d : New Term in onAppendLog !! server %d 's term(%d) is bigger than me(%d).\n", localServer_->serverId, msg->leaderid(), msg->term(), currentTerm_.load());
|
|
if (state_.load() != LEADER)
|
|
{
|
|
newTerm(msg->term());
|
|
newTermFlag= true;
|
|
}
|
|
else
|
|
{
|
|
rsp->set_term(currentTerm_);
|
|
return -1;
|
|
}
|
|
}
|
|
else if (msg->term() < currentTerm_)
|
|
{
|
|
if (!forceRequestMode_ && leaderId_.load() == 0 && msg->term() >= log_->getLastLogTerm() && state_ == CANDIDATE)
|
|
{
|
|
easy_error_log("Server %d : Downgrade term from %llu to %llu when onAppendLog, because there are leaderStickiness leader(%ld) exist!!\n", localServer_->serverId, currentTerm_.load(), msg->term(), msg->serverid());
|
|
newTerm(msg->term());
|
|
}
|
|
else if (state_ == LEARNER && (enableLearnerAutoResetMatchIndex_ || msg->term() >= log_->getLastLogTerm()))
|
|
{
|
|
easy_error_log("Server %d : Downgrade term from %llu to %llu when onAppendLog, because I am learner!!\n", localServer_->serverId, currentTerm_.load(), msg->term());
|
|
newTerm(msg->term());
|
|
}
|
|
else
|
|
{
|
|
easy_warn_log("Server %d : msgId(%llu) receive logs from old leader(%ld) current leader(%ld). localTerm(%ld),msg.term(%d) \n", localServer_->serverId, msg->msgid(), msg->leaderid(), leaderId_.load(), currentTerm_.load(), msg->term());
|
|
rsp->set_term(currentTerm_);
|
|
return -1;
|
|
}
|
|
}
|
|
else if (state_ != FOLLOWER && state_ != LEARNER)
|
|
{
|
|
changeState_(FOLLOWER);
|
|
}
|
|
rsp->set_term(currentTerm_);
|
|
|
|
if (leaderId_ == 0)
|
|
{
|
|
leaderId_.store(msg->leaderid());
|
|
leaderAddr_= "";
|
|
option.extraStore->setRemote("");
|
|
rsp->set_force(1);
|
|
}
|
|
else if (leaderId_ != msg->leaderid())
|
|
{
|
|
/* TODO is this possible? */
|
|
easy_warn_log("Server %d : receive logs from different leader. old(%d),new(%d), term(%ld),msg.term(%d) \n", localServer_->serverId, leaderId_.load(), msg->leaderid(), currentTerm_.load(), msg->term());
|
|
leaderId_.store(msg->leaderid());
|
|
leaderAddr_= "";
|
|
option.extraStore->setRemote("");
|
|
rsp->set_force(1);
|
|
}
|
|
|
|
if (msg->has_addr())
|
|
{
|
|
leaderAddr_= msg->addr();
|
|
if (msg->has_extra())
|
|
option.extraStore->setRemote(msg->extra());
|
|
}
|
|
if (leaderAddr_ == "")
|
|
rsp->set_force(1);
|
|
|
|
if (state_ != LEARNER)
|
|
electionTimer_->restart();
|
|
|
|
if (!msg->has_prevlogterm())
|
|
{
|
|
rsp->set_ignorecheck(true);
|
|
easy_warn_log("Server %d : msgId(%llu) receive logs without prevlogterm. from server %ld, localTerm(%ld),msg.term(%d) lli:%ld\n", localServer_->serverId, msg->msgid(), msg->leaderid(), currentTerm_.load(), msg->term(), lastLogIndex);
|
|
return 0;
|
|
}
|
|
|
|
if (prevLogIndex > lastLogIndex)
|
|
{
|
|
uint64_t msgEntrieSize= msg->entries_size();
|
|
uint64_t msgLastIndex;
|
|
uint64_t beginTerm;
|
|
uint64_t beginIndex;
|
|
if (msgEntrieSize != 0)
|
|
{
|
|
msgLastIndex= prevLogIndex + msg->entries_size();
|
|
beginTerm= msg->entries().begin()->term();
|
|
beginIndex= msg->entries().begin()->index();
|
|
}
|
|
|
|
/* Now we allow hole in the log. we put the uncontinue log in the cache */
|
|
if (enableLogCache_ && !msg->nocache() && msgEntrieSize != 0 && beginTerm == (msg->entries().end()-1)->term() && beginTerm == currentTerm_)
|
|
{
|
|
logRecvCache_.put(beginIndex, msgLastIndex, *(msg->mutable_entries()));
|
|
logRecvCache_.setCommitIndex(msg->commitindex());
|
|
rsp->set_issuccess(true);
|
|
rsp->set_lastlogindex(log_->getLastLogIndex());
|
|
easy_warn_log("Server %d : receive uncontinue log local lastLogIndex(%ld, term:%ld); msgId(%llu) msg prevlogindex(%ld, term:%ld) has %llu entries firstIndex(%llu) lastIndex(%llu); put it in cache.\n", localServer_->serverId, lastLogIndex, currentTerm_.load(), msg->msgid(), msg->prevlogindex(), msg->prevlogterm(), msgEntrieSize, beginIndex, msgLastIndex);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* This is possible. It happened when the new leader send the first appendlog msg.
|
|
* We return a hint to let leader know our last log index.
|
|
*/
|
|
easy_warn_log("Server %d : msgId(%llu) receive log's prevlogindex(%ld, term:%ld) is bigger than lastLogIndex(%ld, term:%ld) reject.\n", localServer_->serverId, msg->msgid(), msg->prevlogindex(), msg->prevlogterm(), lastLogIndex, currentTerm_.load());
|
|
rsp->set_lastlogindex(log_->getLastLogIndex());
|
|
|
|
/* We clear cache here. */
|
|
//logRecvCache_.clear();
|
|
return -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
LogEntry prevLogEntry;
|
|
int error;
|
|
if ((error= log_->getEntry(prevLogIndex, prevLogEntry, false)) ||
|
|
(prevLogEntry.term() != msg->prevlogterm() && prevLogEntry.optype() != kMock)) //TODO should check start index here.
|
|
{
|
|
if (error)
|
|
{
|
|
easy_warn_log("Server %d: fail to get log entry on append log, index %llu", localServer_->serverId, prevLogIndex);
|
|
}
|
|
else
|
|
{
|
|
/* log is not match, reject it. the leader will send the correct log again. */
|
|
easy_warn_log("Server %d : msgId(%llu) msg's prevlogterm(%llu) is not match with local log's prevlogterm(%llu) with index(%llu) reject!", localServer_->serverId, msg->msgid(), msg->prevlogterm(), prevLogEntry.term(), prevLogIndex);
|
|
}
|
|
if (state_ == FOLLOWER)
|
|
{
|
|
/*
|
|
* In some rare case, leader thinks it is a learner but this node is still in follower state,
|
|
* because it has not received the downgrade configure change logEntry.
|
|
* Just set the role field in response msg and let leader be aware of this situation.
|
|
*/
|
|
rsp->set_role(state_);
|
|
}
|
|
return 0;
|
|
}
|
|
/*
|
|
else
|
|
easy_warn_log("Server %d : msgId(%llu) msg's prevlogterm(%llu) is match with local log's prevlogterm(%llu) pass.", localServer_->serverId, msg->msgid(), msg->prevlogterm(), prevLogEntry.term());
|
|
*/
|
|
|
|
// checksum test
|
|
if (checksum_mode_)
|
|
{
|
|
for (auto& entry : msg->entries())
|
|
{
|
|
if (log_checksum_test(entry))
|
|
{
|
|
easy_error_log("Server %d: msgId(%llu) log index %llu checksum fail.", localServer_->serverId, msg->msgid(), entry.index());
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
rsp->set_issuccess(true);
|
|
enableLogCache_ = true;
|
|
|
|
//assert(msg->entries_size() <= 1);
|
|
if (msg->entries_size() > 0)
|
|
{
|
|
easy_warn_log("Server %d : msgId(%llu) receive log has %ld entries, plt:%ld, pli:%ld, commitIndex:%ld\n", localServer_->serverId, msg->msgid(), msg->entries_size(), msg->prevlogterm(), msg->prevlogindex(), msg->commitindex());
|
|
bool appendDone= false;
|
|
if (msg->entries_size() != 0 && log_->getLastLogIndex() == prevLogIndex)
|
|
{
|
|
/* no need truncate */
|
|
uint64_t lli= log_->append(msg->entries());
|
|
if (lli == 0)
|
|
{
|
|
rsp->set_lastlogindex(log_->getLastLogIndex());
|
|
rsp->set_issuccess(false);
|
|
rsp->set_msgerror(PaxosMsg::APPEND);
|
|
easy_warn_log("Server %d : msgId(%llu) write log entries from index %llu, size %lu failed\n", localServer_->serverId, msg->msgid(), prevLogIndex + 1, msg->entries_size());
|
|
return 0;
|
|
}
|
|
appendDone= true;
|
|
rsp->set_lastlogindex(log_->getLastLogIndex());
|
|
assert(lli == (prevLogIndex + msg->entries_size()));
|
|
assert(lli == log_->getLastLogIndex());
|
|
if (true)//(state_ != LEARNER)
|
|
{
|
|
for (auto& entry : msg->entries())
|
|
if (entry.optype() == kConfigureChange)
|
|
{
|
|
prepareConfigureChangeEntry_(entry, msg);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
uint64_t msgLastIndex= prevLogIndex + msg->entries_size();
|
|
uint64_t lastLogIndex= log_->getLastLogIndex();
|
|
if (!newTermFlag && lastLogIndex >= msgLastIndex)
|
|
{
|
|
/* In some continues log entries, if the first entry and the last entry have the same term, all these entries have the same term. */
|
|
uint64_t beginTerm= msg->entries().begin()->term();
|
|
if (beginTerm == (msg->entries().end()-1)->term() && beginTerm == currentTerm_ && prevLogEntry.term() == currentTerm_)
|
|
{
|
|
easy_warn_log("Server %d : ignore %ld entries, plt:%ld, pli:%ld, commitIndex:%ld lliInMsg:%llu lli:%llu\n", localServer_->serverId, msg->entries_size(), msg->prevlogterm(), msg->prevlogindex(), msg->commitindex(), msgLastIndex, lastLogIndex);
|
|
appendDone= true;
|
|
rsp->set_lastlogindex(msgLastIndex);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!appendDone)
|
|
{
|
|
uint64_t index= prevLogIndex;
|
|
rsp->set_lastlogindex(prevLogIndex + msg->entries_size());
|
|
int dupcnt = 0;
|
|
for (auto it= msg->entries().begin(); it != msg->entries().end(); ++it)
|
|
{
|
|
++index;
|
|
const LogEntry &entry= *it;
|
|
|
|
easy_warn_log("Server %d : parse entries index:%ld, entry.term:%ld, entry.index:%ld\n", localServer_->serverId, index, entry.term(), entry.index());
|
|
assert(entry.index() == index);
|
|
|
|
if (log_->getLastLogIndex() >= index)
|
|
{
|
|
/* need truncate */
|
|
LogEntry en;
|
|
if (!log_->getEntry(index, en, false) && (en.term() == entry.term() || en.optype() == kMock))
|
|
{
|
|
/* The duplicate log entry, that has already received. */
|
|
dupcnt++;
|
|
easy_warn_log("Server %d : duplicate log entry, ignore, entry.term:%ld, entry.index:%ld\n", localServer_->serverId, entry.term(), entry.index());
|
|
continue;
|
|
}
|
|
// commit index might be bigger than last log index if we set learner auto reset match index on
|
|
if (enableLearnerAutoResetMatchIndex_ && commitIndex_ >= index && index > 1)
|
|
commitIndex_ = index - 1;
|
|
/* Truncate the log start from the index. */
|
|
truncateBackward_(index);
|
|
++ (stats_.countTruncateBackward);
|
|
easy_error_log("Server %d : truncate paxos log from(include) %ld in appendLog msg, lli:%ld\n", localServer_->serverId, index, log_->getLastLogIndex());
|
|
break;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
int msgEntrieSize= msg->entries_size();
|
|
msg->mutable_entries()->DeleteSubrange(0, dupcnt);
|
|
assert(msg->entries_size() == (msgEntrieSize - dupcnt));
|
|
easy_warn_log("Server %d : Duplicate entrys count %d, remaining entries count %d", localServer_->serverId, dupcnt, msg->entries_size());
|
|
if (msg->entries_size() > 0)
|
|
{
|
|
assert((log_->getLastLogIndex()+1) == msg->entries(0).index());
|
|
if (log_->append(msg->entries()) == 0)
|
|
{
|
|
rsp->set_lastlogindex(log_->getLastLogIndex());
|
|
rsp->set_issuccess(false);
|
|
rsp->set_msgerror(PaxosMsg::APPEND);
|
|
easy_warn_log("Server %d : msgId(%llu) write log entries from index %llu, size %lu failed\n", localServer_->serverId, msg->msgid(), msg->entries(0).index(), msg->entries_size());
|
|
return 0;
|
|
}
|
|
assert(log_->getLastLogIndex() == msg->entries(msg->entries_size() - 1).index());
|
|
}
|
|
// deal with ConfigureChange
|
|
for (auto& entry : msg->entries())
|
|
if (entry.optype() == kConfigureChange)
|
|
{
|
|
prepareConfigureChangeEntry_(entry, msg);
|
|
}
|
|
}
|
|
PaxosLogCacheNode *node= logRecvCache_.get(log_->getLastLogIndex() + 1);
|
|
if (node != NULL)
|
|
{
|
|
if (log_->append(node->entries) == 0)
|
|
{
|
|
rsp->set_lastlogindex(log_->getLastLogIndex());
|
|
rsp->set_issuccess(false);
|
|
rsp->set_msgerror(PaxosMsg::APPEND);
|
|
easy_warn_log("Server %d : msgId(%llu) write cached log entries from index %llu, size %lu failed\n", localServer_->serverId, msg->msgid(), log_->getLastLogIndex() + 1, node->entries.size());
|
|
delete node;
|
|
return 0;
|
|
}
|
|
rsp->set_lastlogindex(log_->getLastLogIndex());
|
|
for (auto& entry : node->entries)
|
|
if (entry.optype() == kConfigureChange)
|
|
{
|
|
prepareConfigureChangeEntry_(entry, msg, true);
|
|
}
|
|
easy_warn_log("Server %d : Get log from cache, beginIndex(%llu) endIndex(%llu) term(%llu)\n", localServer_->serverId, node->beginIndex, node->endIndex, node->entries.begin()->term());
|
|
}
|
|
delete node;
|
|
}
|
|
else
|
|
++ (stats_.countOnHeartbeat);
|
|
|
|
//rsp->set_lastlogindex(log_->getLastLogIndex());
|
|
|
|
/* Update commitIndex. */
|
|
if (msg->commitindex() > commitIndex_ && !debugSkipUpdateCommitIndex)
|
|
{
|
|
if (ccMgr_.prepared && ccMgr_.preparedIndex <= msg->commitindex() && ccMgr_.preparedIndex > commitIndex_)
|
|
{
|
|
//srv_->sendAsyncEvent(&Paxos::applyConfigureChange_, this, ccMgr_.preparedIndex);
|
|
applyConfigureChangeNoLock_(ccMgr_.preparedIndex);
|
|
if (ccMgr_.needNotify != 1)
|
|
ccMgr_.clear();
|
|
}
|
|
easy_warn_log("Server %d : Follower commitIndex change from %ld to %ld\n", localServer_->serverId, commitIndex_, msg->commitindex());
|
|
commitIndex_= msg->commitindex();
|
|
assert(commitIndex_ <= log_->getLastLogIndex());
|
|
|
|
/* already hold the lock_ by the caller. */
|
|
cond_.notify_all();
|
|
|
|
/* X-Paxos support learner get log from follower. */
|
|
appendLogToLearner();
|
|
/*
|
|
if (srv_->cs)
|
|
srv_->cs->set(entry.ikey(), entry.value());
|
|
*/
|
|
}
|
|
}
|
|
|
|
if (tryFillFollowerMeta_(rsp->mutable_cientries()))
|
|
easy_warn_log("Server %d : msgId(%llu) tryFillFollowerMeta\n", localServer_->serverId);
|
|
|
|
easy_warn_log("Server %d : msgId(%llu) onAppendLog end, is_success %d\n", localServer_->serverId, msg->msgid(), rsp->issuccess());
|
|
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::onAppendLogResponce(PaxosMsg *msg)
|
|
{
|
|
assert(msg->msgtype() == AppendLogResponce);
|
|
/* XXX Now we support learner source to another learner */
|
|
if (state_ != LEADER && state_ != FOLLOWER && state_ != LEARNER)
|
|
return -1;
|
|
|
|
/* update extra storage for Followers */
|
|
if (msg->has_extra())
|
|
option.extraStore->setRemote(msg->extra());
|
|
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (shutdown_.load())
|
|
return -1;
|
|
auto server= std::dynamic_pointer_cast<RemoteServer>(config_->getServer(msg->serverid()));
|
|
auto wserver= std::weak_ptr<RemoteServer>(server);
|
|
//easy_warn_log("Server %d : onAppendLogResponce receive a msg msgId(%llu) from server %llu\n", localServer_->serverId, msg->msgid(), msg->serverid());
|
|
if (server == nullptr)
|
|
{
|
|
easy_warn_log("Server %d : onAppendLogResponce receive a msg msgId(%llu) from server %llu which has been deleted already!\n", localServer_->serverId, msg->msgid(), msg->serverid());
|
|
return -2;
|
|
}
|
|
|
|
if(state_ == FOLLOWER && (server->learnerSource != localServer_->serverId || !server->isLearner))
|
|
{
|
|
easy_warn_log("Server %d : onAppendLogResponce receive a msg msgId(%llu) from server %llu learnerSource:%llu who's learnerSource not match or already not a leaner!\n", localServer_->serverId, msg->msgid(), msg->serverid(), server->learnerSource);
|
|
return -3;
|
|
}
|
|
|
|
//assert(server->waitForReply == 1);
|
|
if (msg->msgid() >= server->guardId)
|
|
server->waitForReply= 0;
|
|
else
|
|
easy_warn_log("Server %d : onAppendLogResponce skip reset waitForReply, msgid %llu guardid %llu", localServer_->serverId, msg->msgid(), server->guardId.load());
|
|
if (msg->has_force() && msg->force() == 1)
|
|
server->needAddr= true;
|
|
|
|
if (msg->term() > currentTerm_)
|
|
{
|
|
easy_warn_log("Server %d : New Term in onAppendLogResponce msgId(%llu) !! server %d 's term(%d) is bigger than me(%d).\n", localServer_->serverId, msg->msgid(), msg->serverid(), msg->term(), currentTerm_.load());
|
|
if (state_.load() != LEADER)
|
|
{
|
|
newTerm(msg->term());
|
|
}
|
|
else
|
|
{
|
|
if (server->matchIndex.load() != 0)
|
|
{
|
|
/* The follower(server) now is a naughty server, do not use pipelinng */
|
|
easy_warn_log("Server %d : msgId(%llu) server %d became a naughty server, reset match index from %lu to 0\n", localServer_->serverId, msg->msgid(), msg->serverid(), server->matchIndex.load());
|
|
server->resetMatchIndex(0);
|
|
}
|
|
}
|
|
}
|
|
else if (msg->term() < currentTerm_)
|
|
{
|
|
easy_warn_log("Server %d : Receive prev term's AppendLogResponce msgId(%llu) (term:%ld) from server(%ld), currentTerm(%ld) just ignore!!\n", localServer_->serverId, msg->msgid(), msg->term(), msg->serverid(), currentTerm_.load());
|
|
}
|
|
else
|
|
{
|
|
assert(msg->term() == currentTerm_);
|
|
/* Inc epoch for RemoteServer's. we reset RemoteServer's heartbeat in sendMsgFunc. */
|
|
server->setLastAckEpoch(currentEpoch_);
|
|
if (server->appliedIndex < msg->appliedindex())
|
|
server->appliedIndex= msg->appliedindex();
|
|
/*
|
|
* XXX About msg->lastlogindex
|
|
* when appendLog is success: msg->lastlogindex means the last log index in the msg (prevLogIndex + numEntries)
|
|
* when appendLog is unsuccess: msg->lastlogindex means the last log index in the follower's local log_
|
|
* when appendLog is success but in cached mode: msg->lastlogindex means the last log index in the follower's local log_(not include cache)
|
|
*/
|
|
if (msg->issuccess())
|
|
{
|
|
if (server->nextIndex != msg->lastlogindex() + 1 || server->matchIndex != 0)
|
|
{
|
|
uint64_t oldMatchIndex= server->matchIndex;
|
|
uint64_t oldNextIndex= server->nextIndex;
|
|
|
|
if (msg->lastlogindex() < server->nextIndex && server->matchIndex == 0)
|
|
{
|
|
easy_warn_log("Server %d : onAppendLogResponce this response of AppendLog to server %d may be a resend msg that we have already received, msg index(%ld) is smaller than nextIndex(%ld)\n", localServer_->serverId, msg->serverid(), msg->lastlogindex(), server->nextIndex.load());
|
|
}
|
|
else
|
|
{
|
|
server->hasMatched= true;
|
|
if (msg->lastlogindex() > server->matchIndex)
|
|
{
|
|
server->matchIndex= msg->lastlogindex();
|
|
/* trigger auto change to follower, use maxDelayIndex4NewMember_/2 to have more chance to success */
|
|
if (ccMgr_.autoChangeAddr == server->strAddr && state_ == LEADER && server->isLearner &&
|
|
(log_->getLastLogIndex() <= (maxDelayIndex4NewMember_ / 2 + server->matchIndex.load())))
|
|
{
|
|
srv_->sendAsyncEvent(&Paxos::autoChangeLearnerAction, this);
|
|
}
|
|
}
|
|
if (server->nextIndex < server->matchIndex + 1)
|
|
server->nextIndex= server->matchIndex + 1;
|
|
/*
|
|
* try to update commitIndex here,
|
|
* only if matchIndex is greater than commitIndex
|
|
*/
|
|
if (server->matchIndex > commitIndex_)
|
|
tryUpdateCommitIndex_();
|
|
}
|
|
|
|
easy_warn_log("Server %d : msgId(%llu) AppendLog to server %d success, matchIndex(old:%llu,new:%llu) and nextIndex(old:%llu,new:%llu) have changed\n", localServer_->serverId, msg->msgid(), msg->serverid(), oldMatchIndex, server->matchIndex.load(), oldNextIndex, server->nextIndex.load());
|
|
}
|
|
else if (server->matchIndex == 0)
|
|
{
|
|
// previously this server might be considered a naughty server, and its match index is set to 0,
|
|
// now we set it right, so when no log is replicated, match index will still be right
|
|
server->matchIndex= msg->lastlogindex();
|
|
easy_warn_log("Server %d : msgId(%llu) AppendLog to server %d success, this is a heartbeat responce, set match index from 0 to %llu. nextIndex(%llu) msg(lli:%llu term:%llu)\n",
|
|
localServer_->serverId, msg->msgid(), msg->serverid(), msg->lastlogindex(), server->nextIndex.load(), msg->lastlogindex(), msg->term());
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* We receive a heartbeat responce, before we commit any logEntry in this term.
|
|
* There must be some bug, or misorder msg.
|
|
*/
|
|
easy_warn_log("Server %d : msgId(%llu) AppendLog to server %d success, skip because this is a heartbeat responce. nextIndex(%llu) msg(lli:%llu term:%llu)\n",
|
|
localServer_->serverId, msg->msgid(), msg->serverid(), server->nextIndex.load(), msg->lastlogindex(), msg->term());
|
|
}
|
|
|
|
/* Update meta for learner source */
|
|
if (msg->cientries_size() > 0)
|
|
{
|
|
config_->mergeFollowerMeta(msg->cientries());
|
|
easy_warn_log("Server %d : msgId(%llu) mergeFollowerMeta from server %d\n", localServer_->serverId, msg->msgid(), msg->serverid());
|
|
}
|
|
// we should deal with matchIndex == 0 case.
|
|
|
|
/*
|
|
* Case when the follower is not uptodate,
|
|
* There is no need to wait for next heart beat to send the next log entry.
|
|
*/
|
|
//if (server->nextIndex <= log_->getLastLogIndex())
|
|
// appendLogToServer(server.get(), false);
|
|
//srv_->sendAsyncEvent(&Paxos::appendLogToServer, this, (RemoteServer *)server.get(), true, false);
|
|
appendLogToServer(std::move(wserver), false, false);
|
|
if (getState() == Paxos::FOLLOWER || getState() == Paxos::LEARNER)
|
|
{
|
|
updateFollowerMetaNo();
|
|
easy_warn_log("Server %d : updateFollowerMetaNo\n", localServer_->serverId);
|
|
}
|
|
}
|
|
else if (msg->has_ignorecheck() && msg->ignorecheck())
|
|
{
|
|
easy_warn_log("Server %d : msgId(%llu) AppendLog to server %d without check\n", localServer_->serverId, msg->msgid(), msg->serverid());
|
|
if (server->isLearner)
|
|
appendLogToServer(std::move(wserver), false);
|
|
}
|
|
else
|
|
{
|
|
uint64_t oldNextIndex= server->nextIndex;
|
|
|
|
/* We also need to reset matchindex if leader thinks it is a learner but the node is still in follower state */
|
|
bool learnerStateNotMatch = server->isLearner && msg->has_role() && msg->role() == FOLLOWER;
|
|
if (learnerStateNotMatch)
|
|
server->resetMatchIndex(0); // to trigger decrement nextIndex case
|
|
if (!server->isLearner || learnerStateNotMatch)
|
|
{
|
|
uint64_t term= 0, optype= 0, info= 0;
|
|
if (consensusAsync_.load() ||
|
|
enableAutoResetMatchIndex_ ||
|
|
(!log_->getLogMeta(server->matchIndex.load() + 1, &term, &optype, &info) && (info & (1 << 5 | 1 << 6))))
|
|
{
|
|
if (!server->hasMatched && msg->lastlogindex() < server->matchIndex)
|
|
{
|
|
/* log is lost, it might happen due to follower does not sync the log after they receive it
|
|
* and there has been a crash recovery
|
|
* 1. sync log is not set
|
|
* 2. FLAG_BLOB | FLAG_BLOB_END, haven't got the chance to flush
|
|
**/
|
|
easy_error_log("Server %d : follower(%d) might lost some logs. matchIndex(%llu) is greater than follower's lli(%llu), we reset matchIndex to 0!",
|
|
localServer_->serverId, msg->serverid(), server->matchIndex.load(), msg->lastlogindex());
|
|
server->resetMatchIndex(0);
|
|
}
|
|
}
|
|
|
|
if (server->matchIndex > 0)
|
|
{
|
|
server->nextIndex= server->matchIndex + 1;
|
|
}
|
|
else
|
|
{
|
|
/* send correct log for this RemoteServer. */
|
|
if (server->nextIndex > 1)
|
|
{
|
|
-- (server->nextIndex);
|
|
}
|
|
|
|
/* XXX if the follower lost many logs, decrement nextIndex once. */
|
|
//if (server->nextIndex > msg->prevlogindex() + 1)
|
|
if (server->nextIndex > msg->lastlogindex() + 1)
|
|
{
|
|
server->nextIndex= msg->lastlogindex() + 1;
|
|
}
|
|
}
|
|
|
|
if (!msg->has_msgerror() || msg->msgerror() != PaxosMsg::APPEND)
|
|
appendLogToServer(std::move(wserver), false);
|
|
|
|
easy_warn_log("Server %d : msgId(%llu) AppendLog to server %d failed, msg error %d, "
|
|
"lastlogindex(%ld) is not match with the local nextIndex(%ld), "
|
|
"set local nextIndex to %ld, matchIndex(%llu).\n",
|
|
localServer_->serverId, msg->msgid(), msg->serverid(), msg->has_msgerror()? msg->msgerror(): 0,
|
|
msg->lastlogindex(), oldNextIndex,
|
|
server->nextIndex.load(), server->matchIndex.load());
|
|
}
|
|
else
|
|
{
|
|
if (enableLearnerAutoResetMatchIndex_)
|
|
{
|
|
server->resetMatchIndex(0);
|
|
|
|
// decrease next index until log match, treat learner like follower
|
|
if (server->nextIndex > 1)
|
|
{
|
|
--server->nextIndex;
|
|
}
|
|
|
|
if (server->nextIndex > msg->lastlogindex() + 1)
|
|
{
|
|
server->nextIndex= msg->lastlogindex() + 1;
|
|
}
|
|
|
|
easy_warn_log("Server %d : Learner(%d) match index reset to 0, nextIndex set to %llu.", localServer_->serverId, msg->serverid(), server->nextIndex.load());
|
|
|
|
appendLogToLearner(wserver.lock());
|
|
}
|
|
else
|
|
{
|
|
uint64_t oldNextIndex= server->nextIndex;
|
|
uint64_t oldMatchIndex= server->matchIndex;
|
|
server->resetMatchIndex(msg->lastlogindex());
|
|
server->nextIndex= msg->lastlogindex() + 1;
|
|
if (oldNextIndex != server->nextIndex || oldMatchIndex != server->matchIndex)
|
|
{
|
|
easy_warn_log("Server %d : Learner(%d) change its local log position! We reset server(learner)'s matchIndex(old:%llu,new:%llu) and nextIndex(old:%llu,new:%llu).", localServer_->serverId, msg->serverid(), oldMatchIndex, server->matchIndex.load(), oldNextIndex, server->nextIndex.load());
|
|
|
|
appendLogToLearner(wserver.lock());
|
|
}
|
|
else
|
|
{
|
|
easy_warn_log("Server %d : Learner(%d) change its local log position or term, which is not correct !! current matchIndex(%llu) nextIndex(%llu).", localServer_->serverId, msg->serverid(), server->matchIndex.load(), server->nextIndex.load());
|
|
}
|
|
}
|
|
}
|
|
/* Resend the correct log entry. */
|
|
/* TODO Need async */
|
|
}
|
|
}
|
|
|
|
/* send correct log if needed */
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::onAppendLogSendFail(PaxosMsg *msg, uint64_t *newId)
|
|
{
|
|
/* No need to resend the msg if this server is not leader or the prev term msg. */
|
|
if (state_ != LEADER || msg->term() != currentTerm_)
|
|
return -1;
|
|
|
|
if (msg->msgtype() != AppendLog)
|
|
return -2;
|
|
|
|
/* No need to resend the heartbeat msg. */
|
|
if (msg->entries_size() == 0 && msg->has_compressedentries() == false)
|
|
return -3;
|
|
|
|
lock_.lock();
|
|
auto server= std::dynamic_pointer_cast<RemoteServer>(config_->getServer(msg->serverid()));
|
|
auto wserver= std::weak_ptr<RemoteServer>(server);
|
|
if (server == nullptr)
|
|
{
|
|
easy_warn_log("Server %d : onAppendLogSendFail try resend msgId(%llu) to server %llu which has been deleted already!\n", localServer_->serverId, msg->msgid(), msg->serverid());
|
|
lock_.unlock();
|
|
return -4;
|
|
}
|
|
|
|
if (server->lostConnect.load() || server->disablePipelining.load())
|
|
{
|
|
lock_.unlock();
|
|
return -5;
|
|
}
|
|
|
|
lock_.unlock();
|
|
|
|
if (newId)
|
|
*newId= server->msgId.fetch_add(1);
|
|
|
|
/* TODO reset timer for the correspond RemoteServer. */
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::onLeaderCommand(PaxosMsg *msg, PaxosMsg *rsp)
|
|
{
|
|
++ (stats_.countOnLeaderCommand);
|
|
lock_.lock();
|
|
if (shutdown_.load())
|
|
return -1;
|
|
|
|
rsp->set_msgid(msg->msgid());
|
|
rsp->set_msgtype(LeaderCommandResponce);
|
|
rsp->set_serverid(localServer_->serverId);
|
|
rsp->set_term(currentTerm_);
|
|
|
|
if (msg->lctype() == LeaderTransfer) {
|
|
/* Update commitIndex. */
|
|
if (msg->commitindex() > commitIndex_)
|
|
{
|
|
if (ccMgr_.prepared && ccMgr_.preparedIndex <= msg->commitindex() && ccMgr_.preparedIndex > commitIndex_)
|
|
{
|
|
applyConfigureChangeNoLock_(ccMgr_.preparedIndex);
|
|
if (ccMgr_.needNotify != 1)
|
|
ccMgr_.clear();
|
|
}
|
|
easy_warn_log("Server %d : Follower commitIndex change from %ld to %ld during onLeaderCommand\n", localServer_->serverId, commitIndex_, msg->commitindex());
|
|
commitIndex_= msg->commitindex();
|
|
assert(commitIndex_ <= log_->getLastLogIndex());
|
|
/* notify waitCommitIndexUpdate */
|
|
cond_.notify_all();
|
|
}
|
|
if (msg->lastlogindex() == log_->getLastLogIndex() && msg->lastlogindex() == commitIndex_)
|
|
{
|
|
rsp->set_issuccess(true);
|
|
lock_.unlock();
|
|
requestVote();
|
|
}
|
|
else
|
|
{
|
|
rsp->set_issuccess(false);
|
|
lock_.unlock();
|
|
}
|
|
}
|
|
else if (msg->lctype() == PurgeLog) {
|
|
/* check for purge log */
|
|
easy_warn_log("Server %d : prepare to purge log, minMatchIndex %ld \n", localServer_->serverId, msg->minmatchindex());
|
|
purgeLogQueue_.push(new purgeLogArgType(msg->minmatchindex(), this));
|
|
srv_->sendAsyncEvent(&SingleProcessQueue<purgeLogArgType>::process, &purgeLogQueue_, Paxos::doPurgeLog);
|
|
rsp->set_issuccess(true);
|
|
lock_.unlock();
|
|
}
|
|
|
|
easy_warn_log("Server %d : msgId(%llu) receive leaderCommand from server(%ld), currentTerm(%ld), lli(%ld), issuccess(%d)\n", localServer_->serverId, msg->msgid(), msg->serverid(), currentTerm_.load(), log_->getLastLogIndex(), rsp->issuccess());
|
|
|
|
return 0;
|
|
}
|
|
int Paxos::leaderCommand(LcTypeT type, std::shared_ptr<RemoteServer> server)
|
|
{
|
|
/* We call from internal now. */
|
|
//std::lock_guard<std::mutex> lg(lock_);
|
|
|
|
/* just ensure only leader do leaderCommand */
|
|
if (state_ != LEADER)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
PaxosMsg msg;
|
|
msg.set_term(currentTerm_);
|
|
msg.set_msgtype(LeaderCommand);
|
|
msg.set_serverid(localServer_->serverId);
|
|
msg.set_lctype(type);
|
|
|
|
++ (stats_.countLeaderCommand);
|
|
|
|
if (type == LeaderTransfer)
|
|
{
|
|
easy_error_log("Server %d : leaderCommand(LeaderTransfer) to server(%ld), currentTerm(%ld), lli(%llu)\n", localServer_->serverId, server->serverId, currentTerm_.load(), log_->getLastLogIndex());
|
|
assert(commitIndex_ == log_->getLastLogIndex() && commitIndex_ == server->matchIndex);
|
|
msg.set_lastlogindex(log_->getLastLogIndex());
|
|
msg.set_commitindex(commitIndex_);
|
|
}
|
|
else if (type == PurgeLog)
|
|
{
|
|
easy_warn_log("Server %d : leaderCommand(PurgeLog) to all followers\n", localServer_->serverId);
|
|
/* broadcast minMatchIndex for purging log */
|
|
msg.set_minmatchindex(minMatchIndex_);
|
|
}
|
|
|
|
if (server != nullptr)
|
|
server->sendMsg((void *)&msg);
|
|
else
|
|
config_->forEach(&Server::sendMsg, (void *)&msg);
|
|
|
|
if (debugWitnessTest)
|
|
config_->forEachLearners(&Server::sendMsg, (void *)&msg);
|
|
|
|
return 0;
|
|
}
|
|
int Paxos::onLeaderCommandResponce(PaxosMsg *msg)
|
|
{
|
|
easy_warn_log("Server %d : msgId(%llu) receive leaderCommandResponce from server(%ld), currentTerm(%ld), lli(%llu)\n", localServer_->serverId, msg->msgid(), msg->serverid(), currentTerm_.load(), log_->getLastLogIndex());
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::forceSingleLeader()
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (state_.load() == LEARNER)
|
|
{
|
|
easy_error_log("Server %d : Execute forceSingleLeader for this learner!!", localServer_->serverId);
|
|
|
|
localServer_->serverId= 1;
|
|
changeState_(FOLLOWER);
|
|
config_->delAllLearners();
|
|
}
|
|
else
|
|
{
|
|
easy_error_log("Server %d : Execute forceSingleLeader for this server!!", localServer_->serverId);
|
|
|
|
config_->delAllRemoteServer(localServer_->strAddr, this);
|
|
}
|
|
|
|
log_->setMetaData(Paxos::keyLearnerConfigure, config_->learnersToString());
|
|
log_->setMetaData(Paxos::keyMemberConfigure, config_->membersToString(localServer_->strAddr));
|
|
|
|
srv_->sendAsyncEvent(&Paxos::requestVote, this, true);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::forceSingleLearner()
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (state_.load() == LEARNER)
|
|
{
|
|
easy_error_log("Server %d : Execute forceSingleLearner failed because this is already a learner!!", localServer_->serverId);
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
easy_error_log("Server %d : Execute forceSingleLearner for this server!!", localServer_->serverId);
|
|
|
|
config_->delAllLearners();
|
|
config_->delAllRemoteServer(localServer_->strAddr, this);
|
|
}
|
|
|
|
log_->setMetaData(Paxos::keyLearnerConfigure, config_->membersToString());
|
|
log_->setMetaData(Paxos::keyMemberConfigure, "");
|
|
|
|
changeState_(LEARNER);
|
|
electionTimer_->stop();
|
|
localServer_->serverId += 100;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::forcePromote()
|
|
{
|
|
/* send requestVote request immediately to try to become a leader */
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
srv_->sendAsyncEvent(&Paxos::requestVote, this, true);
|
|
return 0;
|
|
}
|
|
|
|
uint64_t Paxos::waitCommitIndexUpdate(uint64_t baseIndex, uint64_t term)
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
|
|
if (term != 0 && currentTerm_ != term)
|
|
return 0;
|
|
/* TODO maybe we can signal cond_ only when it is need by a min heap of baseIndex. */
|
|
while (commitIndex_ <= baseIndex && (term == 0 || currentTerm_ == term) && !shutdown_.load() &&
|
|
(state_ != LEADER || !consensusAsync_.load() || localServer_->lastSyncedIndex.load() <= baseIndex))
|
|
cond_.wait(ul);
|
|
|
|
if (term != 0 && currentTerm_ != term)
|
|
return 0;
|
|
|
|
return (state_ == LEADER && consensusAsync_.load()) ? localServer_->lastSyncedIndex.load() : commitIndex_;
|
|
}
|
|
|
|
uint64_t Paxos::checkCommitIndex(uint64_t baseIndex, uint64_t term)
|
|
{
|
|
uint64_t ret = 0;
|
|
/* should call the blocking interface (waitCommitIndexUpdate)
|
|
if term is 0 */
|
|
if (term == 0)
|
|
return 0;
|
|
/* double check term to make sure we have a valid commitIndex */
|
|
if (currentTerm_ != term)
|
|
return 0;
|
|
ret = getCommitIndex();
|
|
/* double check term & check shutdown */
|
|
if (currentTerm_ != term || shutdown_.load())
|
|
return 0;
|
|
return ret;
|
|
}
|
|
|
|
void Paxos::newTerm(uint64_t newTerm)
|
|
{
|
|
if (state_ == LEADER)
|
|
{
|
|
leaderStepDowning_.store(true);
|
|
easy_error_log("Server %d : new term(old:%ld,new:%ld), This is a Leader Step Down!!\n", localServer_->serverId, currentTerm_.load(), newTerm);
|
|
log_->setMetaData(keyLastLeaderTerm, currentTerm_);
|
|
log_->setMetaData(keyLastLeaderLogIndex, commitIndex_);
|
|
if (ccMgr_.autoChangeAddr != "") {
|
|
ccMgr_.autoChangeAddr = "";
|
|
ccMgr_.autoChangeRet = -1;
|
|
ccMgr_.condChangeDone.notify_all();
|
|
}
|
|
}
|
|
else
|
|
easy_error_log("Server %d : new term(old:%ld,new:%ld) !!\n", localServer_->serverId, currentTerm_.load(), newTerm);
|
|
currentTerm_.store(newTerm);
|
|
log_->setTerm(currentTerm_);
|
|
log_->setMetaData(keyCurrentTerm, currentTerm_);
|
|
leaderId_.store(0);
|
|
leaderAddr_= std::string("");
|
|
option.extraStore->setRemote("");
|
|
votedFor_= 0;
|
|
log_->setMetaData(keyVoteFor, votedFor_);
|
|
if (state_ != LEARNER)
|
|
{
|
|
changeState_(FOLLOWER);
|
|
//electionTimer_->restart(electionTimeout_, true);
|
|
}
|
|
else
|
|
{
|
|
changeState_(LEARNER);
|
|
}
|
|
leaderStepDowning_.store(false);
|
|
|
|
logRecvCache_.clear();
|
|
|
|
/* TODO only step down when we are Leader */
|
|
config_->forEach(&Server::stepDown, NULL);
|
|
config_->forEachLearners(&Server::stepDown, NULL);
|
|
config_->forEachLearners(&Server::disconnect, NULL);
|
|
epochTimer_->stop();
|
|
}
|
|
|
|
uint64_t Paxos::appendLogFillForEachAsync(PaxosMsg *msg, RemoteServer *server, LogFillModeT mode)
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
|
|
if(currentTerm_ != msg->term())
|
|
{
|
|
easy_warn_log("Server %d : skip sendMsg async, because term has already changed target(%llu), now(%llu)\n", localServer_->serverId, msg->term(), currentTerm_.load());
|
|
return 0;
|
|
}
|
|
|
|
return appendLogFillForEach(msg, server, mode);
|
|
}
|
|
|
|
uint64_t Paxos::appendLogFillForEach(PaxosMsg *msg, RemoteServer *server, LogFillModeT mode)
|
|
{
|
|
/* There is no need to lock in this function. */
|
|
uint64_t prevLogTerm;
|
|
uint64_t nextIndex= server->nextIndex;
|
|
uint64_t prevLogIndex= nextIndex - 1;
|
|
uint64_t lastLogIndex= replicateWithCacheLog_.load() ? log_->getLastCachedLogIndex() : log_->getLastLogIndex();
|
|
uint64_t size= 0;
|
|
if (cdrMgr_.inRecovery)
|
|
{
|
|
easy_warn_log("Server %d : fill nothing to msg during commit dependency recovery.\n", localServer_->serverId);
|
|
return size; /* size is 0 */
|
|
}
|
|
if(prevLogIndex > lastLogIndex)
|
|
{
|
|
easy_warn_log("Server %d : server %d 's prevLogIndex %ld larger than lastLogIndex %ld. Just ignore.\n", localServer_->serverId, server->serverId, prevLogIndex, lastLogIndex);
|
|
return size; /* size is 0 */
|
|
}
|
|
assert(prevLogIndex <= lastLogIndex);
|
|
|
|
if (prevLogIndex > 0)
|
|
{
|
|
LogEntry entry;
|
|
if (0 != log_->getEntry(prevLogIndex, entry, true, server->serverId))
|
|
{
|
|
easy_warn_log("Server %d :getEntry fail for prevLogIndex(%ld) in Fill AppendLog to server %d\n", localServer_->serverId, prevLogIndex, msg->serverid());
|
|
return size;
|
|
}
|
|
prevLogTerm= entry.term();
|
|
}
|
|
else
|
|
prevLogTerm= 0;
|
|
|
|
if (server->needAddr)
|
|
{
|
|
msg->set_addr(localServer_->strAddr);
|
|
msg->set_extra(option.extraStore->getLocal());
|
|
server->needAddr= false;
|
|
}
|
|
msg->set_prevlogindex(prevLogIndex);
|
|
msg->set_prevlogterm(prevLogTerm);
|
|
msg->set_nocache(true);
|
|
|
|
/* We reuse msg here, so there may some entries exist. (need to send to other follower) */
|
|
if (msg->entries_size() != 0)
|
|
msg->mutable_entries()->Clear();
|
|
/* We reuse msg here, since there may exists a compression. (need to send to other follower) */
|
|
if (msg->has_compressedentries())
|
|
msg->clear_compressedentries();
|
|
|
|
/* try to use appliedIndex instead of commitIndex for learner */
|
|
uint64_t lastSendLogIndex= lastLogIndex;
|
|
if (server->isLearner)
|
|
{
|
|
if (!server->sendByAppliedIndex)
|
|
lastSendLogIndex = commitIndex_;
|
|
else
|
|
lastSendLogIndex = appliedIndex_.load();
|
|
}
|
|
|
|
/* For debug */
|
|
if (debugMaxSendLogIndex != 0)
|
|
{
|
|
lastSendLogIndex= (lastSendLogIndex > debugMaxSendLogIndex) ? debugMaxSendLogIndex : lastSendLogIndex;
|
|
}
|
|
|
|
uint64_t maxPacketSize= maxPacketSize_;
|
|
if (mode == LargeBatchMode)
|
|
maxPacketSize *= largeBatchRatio_;
|
|
if (lastSendLogIndex >= nextIndex)
|
|
{
|
|
LogEntry entry;
|
|
::google::protobuf::RepeatedPtrField<LogEntry>* entries;
|
|
entries= msg->mutable_entries();
|
|
|
|
uint64_t lastIndex= 0;
|
|
uint64_t lastInfo= 0;
|
|
for (uint64_t i= nextIndex; i <= lastSendLogIndex; ++i)
|
|
{
|
|
if (0 != log_->getEntry(i, entry, true, server->serverId))
|
|
{
|
|
easy_warn_log("Server %d :getEntry fail for entries(i:%ld) in Fill AppendLog to server %d\n", localServer_->serverId, i, msg->serverid());
|
|
break;
|
|
}
|
|
log_->putLogMeta(entry.index(), entry.term(), entry.optype(), entry.info());
|
|
assert(entry.index() == i);
|
|
if (entry.optype() == kMock)
|
|
{
|
|
easy_error_log("Server %d : read mock log(index:%llu) when send to server %d, the configure of mock index may error or may hit bug!!", localServer_->serverId, i, server->serverId);
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Restriction from PolarDB-X Engine, possible info values:
|
|
* 1. FLAG_GU1 = 0x01, needGroup
|
|
* 2. FLAG_GU2 = 0x02, needGroup
|
|
* 3. FLAG_LARGE_TRX = 0x04, do not care
|
|
* 4. FLAG_LARGE_TRX_END = 0x08, do not care
|
|
*/
|
|
bool needGroup= false;
|
|
if ((lastInfo == 1 || lastInfo == 2) && lastInfo == entry.info())
|
|
needGroup= true;
|
|
if (entry.has_info())
|
|
lastInfo= entry.info();
|
|
else
|
|
lastInfo= 0;
|
|
|
|
auto entrySize= entry.ByteSize();
|
|
if (size + entrySize >= maxPacketSize && size != 0 && !needGroup)
|
|
break;
|
|
|
|
if (size + entrySize >= maxSystemPacketSize_)
|
|
{
|
|
if (size != 0)
|
|
{
|
|
easy_warn_log("Server %d : truncate the sending msg, because it may exceed system max packet size (current size:%llu, add size:%llu)", localServer_->serverId, size, entrySize);
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
easy_warn_log("Server %d : force send a msg, it may exceed system max packet size (current size:%llu, add size:%llu)", localServer_->serverId, size, entrySize);
|
|
}
|
|
}
|
|
|
|
*(entries->Add())= entry;
|
|
lastIndex= i;
|
|
size += entrySize;
|
|
/* packet size may exceed maxPacketSize a little bit. */
|
|
if (mode == EmptyMode)
|
|
{
|
|
/* XXX in EmptyMode we send 1 entty */
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We enable pipelining in two cases:
|
|
* 1. in a new term, a server has not matched once.
|
|
* 2. the server is learner (because the learner may change its local log pos in one term.)
|
|
*/
|
|
if ((!server->isLearner || enableLearnerPipelining_) && server->matchIndex != 0 && mode == NormalMode && lastIndex != 0)
|
|
{
|
|
server->nextIndex= lastIndex + 1;
|
|
msg->set_nocache(false);
|
|
easy_warn_log("Server %d : update server %d 's nextIndex(old:%llu,new:%llu)\n", localServer_->serverId, server->serverId, nextIndex, server->nextIndex.load());
|
|
}
|
|
}
|
|
|
|
msg->set_commitindex(std::min(msg->commitindex(), prevLogIndex + msg->entries_size()));
|
|
|
|
return size;
|
|
}
|
|
|
|
int Paxos::tryUpdateCommitIndex()
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
|
|
int ret= tryUpdateCommitIndex_();
|
|
|
|
//if (ret == 0)
|
|
//appendLog(false);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int Paxos::tryUpdateCommitIndex_()
|
|
{
|
|
if (state_ != LEADER)
|
|
return -1;
|
|
if (shutdown_.load())
|
|
return -1;
|
|
|
|
uint64_t newCommitIndex= config_->quorumMin(&Server::getMatchIndex);
|
|
uint64_t forceCommitIndex= config_->forceMin(&Server::getMatchIndex);
|
|
if (forceCommitIndex < newCommitIndex && leaderForceSyncStatus_.load())
|
|
newCommitIndex= forceCommitIndex;
|
|
|
|
if (commitIndex_ >= newCommitIndex)
|
|
return -1;
|
|
|
|
// in case leader does not write log to disk, unlikely to happen
|
|
if (newCommitIndex > log_->getLastLogIndex())
|
|
return -1;
|
|
|
|
// if async mode, skip log check
|
|
if (!consensusAsync_)
|
|
{
|
|
uint64_t term = 0, optype = 0, info = 0;
|
|
if (log_->getLogMeta(newCommitIndex, &term, &optype, &info))
|
|
return -1;
|
|
|
|
/* XXX leader don't commit for other term. */
|
|
if (term != currentTerm_)
|
|
return -1;
|
|
|
|
/* commit dependency case */
|
|
if (optype == kCommitDep)
|
|
{
|
|
easy_warn_log("Server %d : index %ld is kCommitDep, check lastNonCommitDepIndex %llu.\n", localServer_->serverId, newCommitIndex, cdrMgr_.lastNonCommitDepIndex.load());
|
|
if (cdrMgr_.lastNonCommitDepIndex > newCommitIndex)
|
|
return -1;
|
|
else
|
|
newCommitIndex = cdrMgr_.lastNonCommitDepIndex;
|
|
}
|
|
}
|
|
|
|
if (commitIndex_ >= newCommitIndex)
|
|
return -1;
|
|
|
|
if (ccMgr_.prepared && ccMgr_.preparedIndex <= newCommitIndex && ccMgr_.preparedIndex > commitIndex_)
|
|
{
|
|
applyConfigureChangeNoLock_(ccMgr_.preparedIndex);
|
|
/*
|
|
* Case: we prepare a change when we're follower, and we apply when we're leader.
|
|
* In this case, we should clear ccMgr info.
|
|
*/
|
|
if (ccMgr_.needNotify != 1)
|
|
ccMgr_.clear();
|
|
}
|
|
|
|
easy_warn_log("Server %d : Leader commitIndex change from %ld to %ld\n", localServer_->serverId, commitIndex_, newCommitIndex);
|
|
commitIndex_= newCommitIndex;
|
|
|
|
/* already hold the lock_ by the caller. */
|
|
cond_.notify_all();
|
|
|
|
appendLogToLearner();
|
|
return 0;
|
|
}
|
|
|
|
/* TODO should read from config file or cmd line */
|
|
int Paxos::init(const std::vector<std::string>& strConfig/*start 0*/, uint64_t current/*start 1*/, ClientService *cs, uint64_t ioThreadCnt, uint64_t workThreadCnt, std::shared_ptr<LocalServer> localServer, bool memory_usage_count, uint64_t heartbeatThreadCnt)
|
|
{
|
|
// set new seed for auto leader transfer
|
|
srand(time(0));
|
|
|
|
bool needSetMeta= false;
|
|
/* Init persistent variables */
|
|
uint64_t itmp;
|
|
if (! log_->getMetaData(std::string(keyClusterId), &itmp))
|
|
{
|
|
clusterId_.store(itmp);
|
|
}
|
|
|
|
if (! log_->getMetaData(std::string(keyCurrentTerm), &itmp))
|
|
{
|
|
currentTerm_= itmp;
|
|
log_->setTerm(currentTerm_);
|
|
}
|
|
|
|
if (! log_->getMetaData(std::string(keyVoteFor), &itmp))
|
|
votedFor_= itmp;
|
|
|
|
/* Init members and learners */
|
|
std::string config;
|
|
log_->getMetaData(std::string(keyMemberConfigure), config);
|
|
|
|
uint64_t metaCurrent= 0;
|
|
std::vector<std::string> strMembers= StableConfiguration::stringToVector(config, metaCurrent);
|
|
//TODO check strConfig and strMembers is equal
|
|
|
|
const std::vector<std::string> *pConfig= NULL;
|
|
uint64_t index;
|
|
if (strConfig.size() == 0)
|
|
{
|
|
if (metaCurrent == 0)
|
|
{
|
|
easy_error_log("Paxos::init: Can't find metaCurrent in MemberConfigure when init a follower node, there may have some error in meta, or this may be a learner!!");
|
|
assert(0);
|
|
return -1;
|
|
}
|
|
pConfig= &strMembers;
|
|
index= metaCurrent;
|
|
}
|
|
else
|
|
{
|
|
pConfig= &strConfig;
|
|
index= current;
|
|
/* We init from the arg(not the meta), so we should set the meta after init the configure! */
|
|
needSetMeta= true;
|
|
}
|
|
|
|
config.clear();
|
|
log_->getMetaData(std::string(keyLearnerConfigure), config);
|
|
std::vector<std::string> strLearners= StableConfiguration::stringToVector(config, metaCurrent);
|
|
|
|
/* Search logs and init ccMgr if any configurechange has not been applied. */
|
|
uint64_t startScanIndex= 0;
|
|
uint64_t lastLogIndex= log_->getLastLogIndex();
|
|
log_->getMetaData(keyScanIndex, &startScanIndex);
|
|
|
|
if (startScanIndex != 0)
|
|
{
|
|
if (startScanIndex > lastLogIndex)
|
|
{
|
|
/* We have not write the configure change logentry into the log, skip the scan. */
|
|
log_->setMetaData(keyScanIndex, 0);
|
|
}
|
|
else
|
|
{
|
|
easy_error_log("Server %d : Start scan log on startup from %llu to %llu for uncommit configure change log entries.\n", index, startScanIndex, lastLogIndex);
|
|
for (uint64_t i= startScanIndex; i <= lastLogIndex; ++i)
|
|
{
|
|
LogEntry entry;
|
|
if (log_->getEntry(i, entry, false))
|
|
{
|
|
easy_error_log("Fail to get log on startup, index %llu", i);
|
|
exit(1);
|
|
}
|
|
if (entry.optype() == kConfigureChange)
|
|
{
|
|
if (ccMgr_.prepared == 0)
|
|
{
|
|
ccMgr_.prepared= 1;
|
|
ccMgr_.preparedIndex= entry.index();
|
|
}
|
|
else
|
|
{
|
|
easy_error_log("Server %d : Scan log on startup find more than 1 uncommit configure change entries!!\n", index);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Check if the start scan index have already been clean up or hit some bug. */
|
|
log_->getMetaData(keyScanIndex, &startScanIndex);
|
|
if (startScanIndex != 0)
|
|
{
|
|
easy_error_log("Server %d : startScanIndex(%llu) does not been clean up after scan log, may hit a bug!! We clean it now!!\n", index);
|
|
log_->setMetaData(keyScanIndex, 0);
|
|
}
|
|
}
|
|
|
|
log_->initMetaCache();
|
|
|
|
/* Init Service */
|
|
srv_= std::shared_ptr<Service>(new Service(this));
|
|
if (cs)
|
|
srv_->cs= cs;
|
|
|
|
srv_->init(ioThreadCnt, workThreadCnt, heartbeatTimeout_, memory_usage_count, heartbeatThreadCnt);
|
|
std::string curConfig= (*pConfig)[index - 1];
|
|
auto pos = curConfig.find(":");
|
|
host_ = curConfig.substr(0, pos);
|
|
port_ = std::stoull(curConfig.substr(pos + 1));
|
|
int error= 0;
|
|
if ((error= srv_->start(port_)))
|
|
{
|
|
easy_error_log("Fail to start libeasy service, error(%d).", error);
|
|
abort();
|
|
}
|
|
|
|
electionTimer_= std::make_shared<ThreadTimer>(srv_->getThreadTimerService(), srv_, electionTimeout_, ThreadTimer::Stage, &Paxos::startElectionCallback, this);
|
|
electionTimer_->start();
|
|
epochTimer_= std::make_shared<ThreadTimer>(srv_->getThreadTimerService(), srv_, electionTimeout_, ThreadTimer::Repeatable, &Paxos::epochTimerCallback, this);
|
|
purgeLogTimer_ = std::make_shared<ThreadTimer>(srv_->getThreadTimerService(), srv_, purgeLogTimeout_, ThreadTimer::Repeatable, &Paxos::purgeLogCallback, this);
|
|
|
|
/*
|
|
* TODO: we should 1. start libeasy 2. installConfig 3. requestVote
|
|
*/
|
|
|
|
/* Init Configuration */
|
|
std::dynamic_pointer_cast<StableConfiguration>(config_)->installConfig((*pConfig), index, this, localServer);
|
|
config_->forEach(&Server::connect, (void *)NULL);
|
|
config_->addLearners(strLearners, this, true);
|
|
|
|
if (needSetMeta)
|
|
{
|
|
log_->setMetaData(Paxos::keyLearnerConfigure, config_->learnersToString());
|
|
log_->setMetaData(Paxos::keyMemberConfigure, config_->membersToString(localServer_->strAddr));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::initAsLearner(std::string& strConfig, ClientService *cs, uint64_t ioThreadCnt, uint64_t workThreadCnt, std::shared_ptr<LocalServer> localServer, bool memory_usage_count, uint64_t heartbeatThreadCnt)
|
|
{
|
|
// set new seed for auto leader transfer
|
|
srand(time(0));
|
|
|
|
bool needSetMeta= false;
|
|
state_= LEARNER;
|
|
|
|
easy_warn_log("Start init node as a learner.");
|
|
/* Init persistent variables */
|
|
uint64_t itmp;
|
|
if (! log_->getMetaData(std::string(keyClusterId), &itmp))
|
|
{
|
|
clusterId_.store(itmp);
|
|
}
|
|
|
|
if (! log_->getMetaData(std::string(keyCurrentTerm), &itmp))
|
|
{
|
|
currentTerm_= itmp;
|
|
log_->setTerm(currentTerm_);
|
|
}
|
|
|
|
if (! log_->getMetaData(std::string(keyVoteFor), &itmp))
|
|
votedFor_= itmp;
|
|
|
|
std::vector<std::string> strLearners;
|
|
std::string strMember;
|
|
std::string config;
|
|
if (strConfig.size() == 0)
|
|
{
|
|
/* Init members and learners */
|
|
/**
|
|
* Old learner node format:
|
|
* keyMemberConfigure: ""
|
|
* keyLearnerConfigure: "<local ip:port>"
|
|
* New learner node format:
|
|
* keyMemberConfigure: "<local ip:port>"
|
|
* keyLearnerConfigure: "all learners in the cluster"
|
|
**/
|
|
log_->getMetaData(std::string(keyMemberConfigure), config);
|
|
|
|
if (config.size() > 0)
|
|
{
|
|
/* new format */
|
|
uint64_t metaCurrent;
|
|
strMember= config;
|
|
|
|
config.clear();
|
|
log_->getMetaData(std::string(keyLearnerConfigure), config);
|
|
strLearners= StableConfiguration::stringToVector(config, metaCurrent);
|
|
|
|
}
|
|
else
|
|
{
|
|
/* old format */
|
|
log_->getMetaData(std::string(keyLearnerConfigure), strMember);
|
|
needSetMeta= true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
strMember= strConfig;
|
|
needSetMeta= true;
|
|
}
|
|
|
|
log_->initMetaCache();
|
|
|
|
/* Init Service */
|
|
srv_= std::shared_ptr<Service>(new Service(this));
|
|
if (cs)
|
|
srv_->cs= cs;
|
|
|
|
srv_->init(ioThreadCnt, workThreadCnt, heartbeatTimeout_, memory_usage_count, heartbeatThreadCnt);
|
|
electionTimer_= std::make_shared<ThreadTimer>(srv_->getThreadTimerService(), srv_, electionTimeout_, ThreadTimer::Stage, &Paxos::startElectionCallback, this);
|
|
epochTimer_= std::make_shared<ThreadTimer>(srv_->getThreadTimerService(), srv_, electionTimeout_, ThreadTimer::Repeatable, &Paxos::epochTimerCallback, this);
|
|
purgeLogTimer_ = std::make_shared<ThreadTimer>(srv_->getThreadTimerService(), srv_, purgeLogTimeout_, ThreadTimer::Repeatable, &Paxos::purgeLogCallback, this);
|
|
|
|
const std::string& curConfig= strMember;
|
|
auto pos = curConfig.find(":");
|
|
host_ = curConfig.substr(0, pos);
|
|
port_ = std::stoull(curConfig.substr(pos + 1));
|
|
int error= 0;
|
|
if ((error= srv_->start(port_)))
|
|
{
|
|
easy_error_log("Fail to start libeasy service, error(%d).", error);
|
|
abort();
|
|
}
|
|
|
|
/*
|
|
* TODO: we should 1. start libeasy 2. installConfig 3. requestVote
|
|
*/
|
|
|
|
/* Init Configuration */
|
|
std::vector<std::string> tmpConfig;
|
|
tmpConfig.push_back(strMember);
|
|
std::dynamic_pointer_cast<StableConfiguration>(config_)->installConfig(tmpConfig, 1, this, localServer);
|
|
localServer_->serverId += 100;
|
|
|
|
/* Learner has all other learner's info now. */
|
|
//config_->forEachLearners(&Server::connect, (void *)NULL);
|
|
config_->addLearners(strLearners, this, true);
|
|
|
|
if (needSetMeta)
|
|
{
|
|
log_->setMetaData(Paxos::keyLearnerConfigure, config_->learnersToString());
|
|
log_->setMetaData(Paxos::keyMemberConfigure, config_->membersToString());
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
void Paxos::msleep(uint64_t t)
|
|
{
|
|
struct timeval sleeptime;
|
|
if (t == 0)
|
|
return;
|
|
sleeptime.tv_sec= t / 1000;
|
|
sleeptime.tv_usec= (t - (sleeptime.tv_sec * 1000)) * 1000;
|
|
select(0, 0, 0, 0, &sleeptime);
|
|
}
|
|
|
|
void Paxos::startElectionCallback()
|
|
{
|
|
easy_warn_log("Server %d : Enter startElectionCallback\n", localServer_->serverId);
|
|
requestVote(false);
|
|
}
|
|
|
|
void Paxos::heartbeatCallback(std::weak_ptr<RemoteServer> wserver)
|
|
{
|
|
std::shared_ptr<RemoteServer> server;
|
|
if (!(server = wserver.lock()))
|
|
return;
|
|
|
|
Paxos *paxos= server->paxos;
|
|
|
|
easy_warn_log("Server %d : send heartbeat msg to server %ld\n", paxos->getLocalServer()->serverId, server->serverId);
|
|
paxos->appendLogToServer(wserver, true, true);
|
|
}
|
|
|
|
uint64_t Paxos::getLeaderTransferInterval_()
|
|
{
|
|
return (electionTimeout_ / 5) + 100;
|
|
}
|
|
|
|
uint64_t Paxos::getNextEpochCheckStatemachine_(uint64_t epoch)
|
|
{
|
|
if (option.enableAutoLeaderTransfer_)
|
|
return epoch + std::max((uint64_t)5, (option.autoLeaderTransferCheckSeconds_ * 1000 / electionTimeout_));
|
|
else
|
|
return UINT64_MAX;
|
|
}
|
|
|
|
// paxos mutex protected
|
|
uint64_t Paxos::leaderTransferIfNecessary_(uint64_t epoch)
|
|
{
|
|
bool run= false;
|
|
std::string reason;
|
|
uint64_t target;
|
|
if (!option.enableAutoLeaderTransfer_.load() || state_ != LEADER || subState_ == SubLeaderTransfer)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
if (localServer_->logType)
|
|
{
|
|
run= true;
|
|
reason= "instance is log node";
|
|
}
|
|
else if (nextEpochCheckStatemachine_ != UINT64_MAX)
|
|
{
|
|
if (log_->isStateMachineHealthy())
|
|
{
|
|
nextEpochCheckStatemachine_= UINT64_MAX;
|
|
}
|
|
else if (epoch >= nextEpochCheckStatemachine_)
|
|
{
|
|
run= true;
|
|
reason= "state machine not healthy";
|
|
nextEpochCheckStatemachine_ = getNextEpochCheckStatemachine_(epoch);
|
|
}
|
|
}
|
|
|
|
if (!run)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
run= false;
|
|
|
|
auto servers = config_->getServers();
|
|
std::vector<uint64_t> choices;
|
|
for (auto& e : servers)
|
|
{
|
|
if (e == nullptr || e->serverId == localServer_->serverId)
|
|
continue;
|
|
std::shared_ptr<RemoteServer> server = std::dynamic_pointer_cast<RemoteServer>(e);
|
|
if (server->electionWeight >= localServer_->electionWeight && server->getLastAckEpoch() >= epoch)
|
|
{
|
|
run= true;
|
|
choices.push_back(server->serverId);
|
|
}
|
|
}
|
|
|
|
if (!run)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
target = choices[rand() % choices.size()];
|
|
easy_error_log("Server %d: try to do an auto leader transfer, reason: %s, target: %llu", localServer_->serverId, reason.c_str(), target);
|
|
|
|
return target;
|
|
}
|
|
|
|
void Paxos::epochTimerCallback()
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
if (state_ != LEADER && state_ != CANDIDATE)
|
|
{
|
|
epochTimer_->stop();
|
|
return;
|
|
}
|
|
|
|
if (state_ == CANDIDATE)
|
|
{
|
|
/* When we're candidate we only calculate the epoch. */
|
|
easy_warn_log("Server %d : Epoch task currentEpoch(%llu)\n", localServer_->serverId, currentEpoch_.load());
|
|
currentEpoch_.fetch_add(1);
|
|
return;
|
|
}
|
|
uint64_t forceMinEpoch= config_->forceMin(&Server::getLastAckEpoch);
|
|
uint64_t quorumEpoch= config_->quorumMin(&Server::getLastAckEpoch);
|
|
|
|
easy_warn_log("Server %d : Epoch task currentEpoch(%llu) quorumEpoch(%llu) forceMinEpoch(%llu)\n", localServer_->serverId, currentEpoch_.load(), quorumEpoch, forceMinEpoch);
|
|
|
|
if (currentEpoch_.load() > (forceMinEpoch + forceSyncEpochDiff_))
|
|
{
|
|
if (leaderForceSyncStatus_ == true)
|
|
{
|
|
leaderForceSyncStatus_.store(false);
|
|
easy_warn_log("Server %d : lost connect with force sync server, disable force sync now!\n", localServer_->serverId);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (leaderForceSyncStatus_ == false)
|
|
{
|
|
leaderForceSyncStatus_.store(true);
|
|
easy_warn_log("Server %d : reconnect with all force sync server, enable force sync now!\n", localServer_->serverId);
|
|
}
|
|
}
|
|
|
|
if (currentEpoch_.load() > quorumEpoch)
|
|
{
|
|
/* Lost connect with major followers, we should step down. */
|
|
easy_error_log("Server %d : lost connect with major followers, stepdown myself\n", localServer_->serverId);
|
|
if (debugDisableStepDown)
|
|
{
|
|
easy_warn_log("Server %d : Skip step down because of debugDisableStepDown currentTerm(%ld)\n", localServer_->serverId, currentTerm_.load());
|
|
return;
|
|
}
|
|
newTerm(currentTerm_ + 1);
|
|
electionTimer_->start();
|
|
}
|
|
else
|
|
{
|
|
assert(currentEpoch_.load() == quorumEpoch);
|
|
uint64_t prevEpoch = currentEpoch_.fetch_add(1);
|
|
uint64_t target = leaderTransferIfNecessary_(prevEpoch);
|
|
if (target) {
|
|
subState_.store(SubLeaderTransfer);
|
|
weightElecting_ = true;
|
|
ul.unlock();
|
|
/* try time should not exceed one epoch */
|
|
uint64_t times = std::max((electionTimeout_ / getLeaderTransferInterval_() + 1), (uint64_t)3);
|
|
leaderTransferSend_(target, currentTerm_.load(), log_->getLastLogIndex(), times);
|
|
}
|
|
}
|
|
}
|
|
|
|
int Paxos::initAutoPurgeLog(bool autoPurge, bool useAppliedIndex, std::function<bool(const LogEntry &le)> handler)
|
|
{
|
|
autoPurge_ = autoPurge;
|
|
if (!autoPurge_)
|
|
purgeLogTimer_->stop();
|
|
useAppliedIndex_ = useAppliedIndex;
|
|
if (autoPurge && !useAppliedIndex) {
|
|
easy_warn_log("Server %d : use commitIndex instead of appliedIndex when auto purging log.", localServer_->serverId);
|
|
}
|
|
log_->setPurgeLogFilter(handler);
|
|
return 0;
|
|
}
|
|
|
|
void Paxos::purgeLogCallback()
|
|
{
|
|
/* purge log without a forceIndex */
|
|
forcePurgeLog(false /* local */);
|
|
}
|
|
|
|
void Paxos::doPurgeLog(purgeLogArgType *arg)
|
|
{
|
|
uint64_t purgeIndex;
|
|
if (arg->paxos->useAppliedIndex_)
|
|
purgeIndex = arg->index < arg->paxos->getAppliedIndex()? arg->index: arg->paxos->getAppliedIndex();
|
|
else
|
|
purgeIndex = arg->index < arg->paxos->getCommitIndex()? arg->index: arg->paxos->getCommitIndex();
|
|
easy_warn_log("Server %d : doPurgeLog purge index %ld\n", arg->paxos->localServer_->serverId, purgeIndex);
|
|
arg->paxos->getLog()->truncateForward(purgeIndex);
|
|
}
|
|
|
|
void Paxos::updateAppliedIndex(uint64_t index)
|
|
{
|
|
appliedIndex_.store(index);
|
|
}
|
|
|
|
uint64_t Paxos::collectMinMatchIndex(std::vector<ClusterInfoType> &cis, bool local, uint64_t forceIndex)
|
|
{
|
|
uint64_t ret = forceIndex;
|
|
/*
|
|
* minMatchIndex Protection
|
|
* 1. non local (only leader): all nodes matchIndex
|
|
* 2. local & Leader: all nodes matchIndex
|
|
* 3. local & not leader: all learner source from me
|
|
*/
|
|
for (auto ci : cis) {
|
|
if (ci.serverId == localServer_->serverId)
|
|
continue;
|
|
if (local == false || state_ == LEADER ||
|
|
(ci.role == LEARNER && ci.learnerSource == localServer_->serverId))
|
|
ret = ci.matchIndex < ret ? ci.matchIndex : ret;
|
|
}
|
|
uint64_t lastLogIndex = log_->getLastLogIndex();
|
|
if (ret > lastLogIndex)
|
|
ret = lastLogIndex;
|
|
return ret;
|
|
}
|
|
|
|
int Paxos::forcePurgeLog(bool local, uint64_t forceIndex)
|
|
{
|
|
if (local == false && state_ != LEADER) {
|
|
return -1;
|
|
}
|
|
/* update minMatchIndex_ */
|
|
/* appendlog should take purge log information if minMatchIndex_ is not 0 */
|
|
std::vector<Paxos::ClusterInfoType> cis;
|
|
getClusterInfo(cis);
|
|
if (cis.size() == 0) {
|
|
return 0;
|
|
}
|
|
minMatchIndex_ = collectMinMatchIndex(cis, local, forceIndex);
|
|
easy_warn_log("Server %d : Prepare to purge log to %s, update minMatchIndex %ld\n", localServer_->serverId, local ? "local" : "cluster", minMatchIndex_);
|
|
/* leader */
|
|
purgeLogQueue_.push(new purgeLogArgType(minMatchIndex_, this));
|
|
srv_->sendAsyncEvent(&SingleProcessQueue<purgeLogArgType>::process, &purgeLogQueue_, Paxos::doPurgeLog);
|
|
if (local == false) {
|
|
/* follower */
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
return leaderCommand(PurgeLog, NULL);
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
void Paxos::electionWeightAction(uint64_t term, uint64_t baseEpoch)
|
|
{
|
|
easy_error_log("Server %d : electionWeightAction start, term:%llu epoch:%llu", localServer_->serverId, term, baseEpoch);
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (term != currentTerm_.load() || state_.load() != LEADER)
|
|
{
|
|
subState_.store(SubNone);
|
|
weightElecting_ = false;
|
|
easy_error_log("Server %d : electionWeightAction fail, action term(%llu), currentTerm(%llu), current state(%s)\n", localServer_->serverId, term, currentTerm_.load(), stateString[state_]);
|
|
return;
|
|
}
|
|
|
|
uint64_t targetId= config_->getMaxWeightServerId(baseEpoch, localServer_);
|
|
|
|
if (targetId != localServer_->serverId && targetId != 0)
|
|
{
|
|
auto term= currentTerm_.load();
|
|
auto lli= log_->getLastLogIndex();
|
|
|
|
easy_error_log("Server %d : electionWeightAction try to transfer leader to server %llu, term(%llu)\n", localServer_->serverId, targetId, term);
|
|
|
|
uint64_t retryTimes = 5;
|
|
lock_.unlock();
|
|
leaderTransferSend_(targetId, term, lli, retryTimes);
|
|
lock_.lock();
|
|
}
|
|
else
|
|
{
|
|
subState_.store(SubNone);
|
|
weightElecting_ = false;
|
|
easy_error_log("Server %d : electionWeightAction skip transfer leader because %s.\n", localServer_->serverId, targetId == 0 ? "no available server" : "I am the max weight available server");
|
|
}
|
|
|
|
}
|
|
|
|
void Paxos::resetNextIndexForServer(std::shared_ptr<RemoteServer> server)
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
auto lastLogIndex= getLastLogIndex();
|
|
/* make sure the first appendLog msg when reconnect have payload to truncateForward. */
|
|
if (lastLogIndex > 1)
|
|
lastLogIndex -= 1;
|
|
|
|
if (server->matchIndex.load() != 0)
|
|
server->nextIndex.store(server->matchIndex.load() + 1);
|
|
else if (server->isLearner && server->sendByAppliedIndex)
|
|
server->nextIndex.store(appliedIndex_.load() + 1);
|
|
else
|
|
server->nextIndex.store(lastLogIndex);
|
|
|
|
}
|
|
|
|
bool Paxos::tryFillFollowerMeta_(::google::protobuf::RepeatedPtrField< ::alisql::ClusterInfoEntry > *ciEntries)
|
|
{
|
|
uint64_t localFollowerMetaNo= followerMetaNo_.fetch_add(0);
|
|
if (localFollowerMetaNo > lastSyncMetaNo_ + syncMetaInterval_)
|
|
{
|
|
lastSyncMetaNo_= localFollowerMetaNo;
|
|
config_->forEachLearners(&Server::fillFollowerMeta, (void *)ciEntries);
|
|
}
|
|
return ciEntries->size() != 0;
|
|
}
|
|
|
|
int Paxos::getClusterInfo(std::vector<ClusterInfoType> &cis)
|
|
{
|
|
cis.clear();
|
|
|
|
config_->forEach(&Server::fillInfo, (void *)&cis);
|
|
config_->forEachLearners(&Server::fillInfo, (void *)&cis);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::getClusterHealthInfo(std::vector<HealthInfoType> &healthInfo)
|
|
{
|
|
std::lock_guard<std::mutex> lg(lock_);
|
|
if (state_ != LEADER)
|
|
return 1;
|
|
|
|
uint64_t lastLogIndex = getLastLogIndex();
|
|
uint64_t appliedIndex = appliedIndex_;
|
|
std::vector<ClusterInfoType> cis;
|
|
getClusterInfo(cis);
|
|
|
|
for (auto &e : cis) {
|
|
HealthInfoType hi;
|
|
hi.serverId = e.serverId;
|
|
hi.addr = e.ipPort;
|
|
hi.role = e.role;
|
|
if (e.serverId != localServer_->serverId) {
|
|
std::shared_ptr<RemoteServer> server = std::dynamic_pointer_cast<RemoteServer>(config_->getServer(e.serverId));
|
|
if (server) {
|
|
hi.connected = !(server->lostConnect || server->netError);
|
|
} else {
|
|
hi.connected = false;
|
|
}
|
|
} else {
|
|
hi.connected = true;
|
|
}
|
|
hi.logDelayNum = lastLogIndex > e.matchIndex ? lastLogIndex - e.matchIndex : 0;
|
|
hi.applyDelayNum = appliedIndex > e.appliedIndex ? appliedIndex - e.appliedIndex : 0;
|
|
healthInfo.push_back(hi);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void Paxos::printClusterInfo(const std::vector<ClusterInfoType> &cis)
|
|
{
|
|
for (auto& ci : cis)
|
|
{
|
|
std::cout<< "serverId:"<< ci.serverId<< " ipPort:"<< ci.ipPort<< " matchIndex:"<< ci.matchIndex<< " nextIndex:"<< ci.nextIndex<< " role:"<< ci.role<< " hasVoted:"<< ci.hasVoted << " forceSync:" << ci.forceSync << " electionWeight:" << ci.electionWeight << " learnerSource:" << ci.learnerSource << " appliedIndex:" << ci.appliedIndex << " pipelining:" << ci.pipelining << std::endl<< std::flush;
|
|
}
|
|
}
|
|
|
|
void Paxos::getMemberInfo(MemberInfoType *mi)
|
|
{
|
|
mi->serverId= localServer_->serverId;
|
|
mi->currentTerm= currentTerm_;
|
|
mi->currentLeader= leaderId_;
|
|
mi->commitIndex= commitIndex_;
|
|
|
|
uint64_t lastLogIndex= log_->getLastLogIndex();
|
|
LogEntry entry;
|
|
uint64_t lastLogTerm= 0;
|
|
if (log_->getEntry(lastLogIndex, entry, false) == 0)
|
|
lastLogTerm= entry.term();
|
|
|
|
mi->lastLogTerm= lastLogTerm;
|
|
mi->lastLogIndex= lastLogIndex;
|
|
if (weightElecting_.load() || leaderStepDowning_.load())
|
|
mi->role= NOROLE;
|
|
else
|
|
mi->role= state_;
|
|
mi->votedFor= votedFor_;
|
|
mi->lastAppliedIndex= appliedIndex_.load();
|
|
mi->currentLeaderAddr= leaderAddr_;
|
|
}
|
|
|
|
uint64_t Paxos::getServerIdFromAddr(const std::string& strAddr)
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
return config_->getServerIdFromAddr(strAddr);
|
|
}
|
|
|
|
// override default compress option for address, if address is empty, override for all
|
|
// return 0 on success, 1 on failure
|
|
int Paxos::setMsgCompressOption(int type, size_t threshold, bool checksum, const std::string &strAddr)
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
MsgCompressOption option((MsgCompressionType)type, threshold, checksum);
|
|
|
|
if (shutdown_.load() || config_ == nullptr) {
|
|
easy_error_log("set MsgCompressOption fail, Paxos is stopped.\n");
|
|
return 1;
|
|
}
|
|
|
|
if (strAddr == "") {
|
|
config_->forEach(&Server::setMsgCompressOption, &option);
|
|
config_->forEachLearners(&Server::setMsgCompressOption, &option);
|
|
} else {
|
|
uint64_t id = config_->getServerIdFromAddr(strAddr);
|
|
Configuration::ServerRef server;
|
|
if (id == 0 || ((server = config_->getServer(id)) == nullptr)) {
|
|
easy_error_log("Server %d : can't find server %s in setMsgCompressOption\n", localServer_->serverId, strAddr.c_str());
|
|
return 1;
|
|
}
|
|
server->setMsgCompressOption(&option);
|
|
}
|
|
|
|
easy_warn_log("set MsgCompressOption type(%d) threshold(%u) checksum(%d) to server(%s) succeed.\n",
|
|
type, threshold, checksum, strAddr == "" ? "all" : strAddr.c_str());
|
|
return 0;
|
|
}
|
|
|
|
int Paxos::resetMsgCompressOption()
|
|
{
|
|
return setMsgCompressOption(0 /* type */, 0 /* threshold */, 0 /* checksum */, "");
|
|
}
|
|
|
|
int Paxos::setClusterId(uint64_t ci)
|
|
{
|
|
int ret = log_->setMetaData(std::string(keyClusterId), ci);
|
|
if (ret == 0)
|
|
clusterId_.store(ci);
|
|
return ret;
|
|
}
|
|
|
|
void Paxos::setLearnerConnTimeout(uint64_t t)
|
|
{
|
|
if (t < (heartbeatTimeout_/4))
|
|
t = heartbeatTimeout_/4;
|
|
easy_warn_log("Server %d : Learner connection timeout set to %llu.", localServer_->serverId, t);
|
|
localServer_->learnerConnTimeout = t;
|
|
}
|
|
|
|
void Paxos::setSendPacketTimeout(uint64_t t)
|
|
{
|
|
if (t < heartbeatTimeout_)
|
|
t = heartbeatTimeout_;
|
|
easy_warn_log("Server %d : Send packet timeout set to %llu.", localServer_->serverId, t);
|
|
srv_->setSendPacketTimeout(t);
|
|
}
|
|
|
|
void Paxos::forceFixMatchIndex(uint64_t targetId, uint64_t newIndex)
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
if (state_ != LEADER || targetId == 0 || targetId == localServer_->serverId)
|
|
return;
|
|
std::shared_ptr<RemoteServer> server= std::dynamic_pointer_cast<RemoteServer>(config_->getServer(targetId));
|
|
if (!server)
|
|
{
|
|
easy_warn_log("Server %d : can't find server %llu in forceFixMatchIndex\n", localServer_->serverId, targetId);
|
|
return;
|
|
}
|
|
easy_error_log("Server %d : force fix server %d's matchIndex(old: %llu, new: %llu). Dangerous Operation!", localServer_->serverId, targetId, server->matchIndex.load(), newIndex);
|
|
server->resetMatchIndex(newIndex);
|
|
}
|
|
|
|
void Paxos::forceFixMatchIndex(const std::string& addr, uint64_t newIndex)
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
uint64_t targetId = config_->getServerIdFromAddr(addr);
|
|
if (state_ != LEADER || targetId == 0 || targetId == localServer_->serverId)
|
|
return;
|
|
std::shared_ptr<RemoteServer> server= std::dynamic_pointer_cast<RemoteServer>(config_->getServer(targetId));
|
|
if (!server)
|
|
{
|
|
easy_warn_log("Server %d : can't find server %llu in forceFixMatchIndex\n", localServer_->serverId, targetId);
|
|
return;
|
|
}
|
|
easy_error_log("Server %d : force fix server %d's matchIndex(old: %llu, new: %llu). Dangerous Operation!", localServer_->serverId, targetId, server->matchIndex.load(), newIndex);
|
|
server->resetMatchIndex(newIndex);
|
|
}
|
|
|
|
int Paxos::log_checksum_test(const LogEntry &le)
|
|
{
|
|
if (checksumCb_ && checksum_mode_ && le.checksum() != 0)
|
|
{
|
|
const unsigned char* buf = reinterpret_cast<const unsigned char*>(le.value().c_str());
|
|
uint64_t cs = checksumCb_(0, buf, le.value().size());
|
|
if (cs == le.checksum())
|
|
return 0;
|
|
else
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void Paxos::reset_flow_control()
|
|
{
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
config_->reset_flow_control();
|
|
}
|
|
|
|
void Paxos::set_flow_control(uint64_t serverId, int64_t fc)
|
|
{
|
|
/*
|
|
* flow control mode:
|
|
* >0 TODO
|
|
* 0 no flow control
|
|
* -1 slow send log (send by heartbeat)
|
|
* <-1 never send log
|
|
*/
|
|
std::unique_lock<std::mutex> ul(lock_);
|
|
config_->set_flow_control(serverId, fc);
|
|
}
|
|
|
|
void Paxos::truncateBackward_(uint64_t firstIndex)
|
|
{
|
|
if(ccMgr_.prepared && ccMgr_.preparedIndex >= firstIndex)
|
|
{
|
|
/*
|
|
* 1. set aborted to 1 to notify client the configureChange is failed.
|
|
* 2. reset preparedIndex to prevent apply configureChange before ccMgr_ is cleared.
|
|
* 3. prepared flag is kept to 1 to prevent concurrent configureChange until
|
|
* client is notified and ccMgr_ is cleared.
|
|
*/
|
|
ccMgr_.aborted = 1;
|
|
ccMgr_.preparedIndex = 0;
|
|
ccMgr_.cond.notify_all();
|
|
|
|
// if scan index < firstIndex, we do not reset, it will be reset when applied;
|
|
// if scan index >= firstIndex, then the prepared configure change will not be committed,
|
|
// we need to reset to 0.
|
|
log_->setMetaData(keyScanIndex, 0);
|
|
}
|
|
log_->truncateBackward(firstIndex);
|
|
}
|
|
|
|
const std::string Paxos::keyCurrentTerm= "@keyCurrentTerm_@";
|
|
const std::string Paxos::keyVoteFor= "@keyVoteFor_@";
|
|
const std::string Paxos::keyLastLeaderTerm= "@keyLastLeaderTerm_@";
|
|
const std::string Paxos::keyLastLeaderLogIndex= "@keyLastLeaderLogIndex_@";
|
|
const std::string Paxos::keyMemberConfigure= "@keyMemberConfigure_@";
|
|
const std::string Paxos::keyLearnerConfigure= "@keyLearnerConfigure_@";
|
|
const std::string Paxos::keyScanIndex= "@keyScanIndex_@";
|
|
const std::string Paxos::keyClusterId= "@keyClusterId_@";
|
|
const uint64_t Paxos::maxSystemPacketSize_= 50000000;
|
|
|
|
bool Paxos::debugDisableElection= false;
|
|
bool Paxos::debugDisableStepDown= false;
|
|
bool Paxos::debugWitnessTest= false;
|
|
bool Paxos::debugResetLogSlow= false;
|
|
bool Paxos::debugSkipUpdateCommitIndex= false;
|
|
|
|
} //namespace alisql
|