polardbxoperator/pkg/hpfs/backupbinlog/watcher.go

525 lines
14 KiB
Go

package backupbinlog
import (
"context"
"crypto/md5"
"database/sql"
"encoding/json"
"fmt"
"github.com/alibaba/polardbx-operator/pkg/hpfs/config"
"github.com/go-logr/logr"
"github.com/pkg/errors"
"os"
"path"
"path/filepath"
"regexp"
"runtime/debug"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
"sort"
"strconv"
"strings"
"sync"
"time"
)
const (
PERIOD = 5 * time.Second
PurgePeriod = 1 * time.Minute
InfoFilename = "polarx_backupbinloginfo.txt"
InfoVersionFilename = "polarx_backupbinloginfo_version.txt"
IndexFilename = "mysql_bin.index"
UploadRecordsDbFile = "polarx_uploadrecord.db"
)
var (
lock = sync.Mutex{}
registeredWatchers = map[string]*Watcher{}
)
func UploadLatestBinlogFile(workDir string) bool {
setWatcherUploadLatest(workDir)
return checkIfFinishUploadLatest(workDir)
}
func GetWatcherInfoHash(workDir string) string {
lock.Lock()
defer lock.Unlock()
watcher, ok := registeredWatchers[workDir]
if ok {
return watcher.GetHash()
}
return ""
}
func setWatcherUploadLatest(workDir string) {
lock.Lock()
defer lock.Unlock()
watcher, ok := registeredWatchers[workDir]
if ok {
watcher.SetUploadLatest(true)
}
}
func checkIfFinishUploadLatest(workDir string) bool {
indexFilepath := path.Join(workDir, IndexFilename)
_, err := os.Stat(indexFilepath)
if errors.Is(err, os.ErrNotExist) {
return true
}
watcher := NewWatcher(workDir, nil)
binlogNums, err := watcher.readLogIndex(indexFilepath)
if err != nil {
fmt.Printf("err : %+v\n", err)
return true
}
if len(binlogNums) > 0 {
maxBinlogNum := binlogNums[len(binlogNums)-1]
dbFilepath := path.Join(workDir, UploadRecordsDbFile)
db, err := GetDb(dbFilepath)
if err != nil {
fmt.Printf("err : %+v\n", err)
return true
}
defer db.Close()
record, err := FindRecord(db, maxBinlogNum.Num)
if err != nil {
fmt.Printf("err : %+v\n", err)
return true
}
if record == nil || record.Status != 0 {
return false
}
}
return true
}
func register(watcher *Watcher) bool {
lock.Lock()
defer lock.Unlock()
_, ok := registeredWatchers[watcher.workDir]
if !ok {
registeredWatchers[watcher.workDir] = watcher
return true
}
return false
}
func unregister(watcher *Watcher) {
lock.Lock()
defer lock.Unlock()
delete(registeredWatchers, watcher.workDir)
}
type Watcher struct {
workDir string
actions []Callback
ctx context.Context
cancelFunc context.CancelFunc
logger logr.Logger
uploadLogger logr.Logger
db *sql.DB
mysqlDb *sql.DB
lastUploadedBinlogNum int64
uploadLatest bool
uploadLatestCount uint64
lock sync.Mutex
lastPurgeTime time.Time
infoHash string
}
func NewWatcher(workDir string, actions ...Callback) *Watcher {
ctx, cancelFunc := context.WithCancel(context.Background())
return &Watcher{
workDir: workDir,
actions: actions,
ctx: ctx,
cancelFunc: cancelFunc,
logger: zap.New(zap.UseDevMode(true)).WithName("MysqlBinlogFileWatcher").WithValues("WorkDirectory", workDir),
lock: sync.Mutex{},
}
}
func (w *Watcher) SetHash(hash string) {
w.lock.Lock()
defer w.lock.Unlock()
w.infoHash = hash
}
func (w *Watcher) GetHash() string {
w.lock.Lock()
defer w.lock.Unlock()
return w.infoHash
}
func (w *Watcher) SetUploadLatest(uploadLatest bool) {
w.lock.Lock()
defer w.lock.Unlock()
w.uploadLatest = uploadLatest
w.uploadLatestCount = 0
}
func (w *Watcher) IsUploadLatest() bool {
w.lock.Lock()
defer w.lock.Unlock()
return w.uploadLatest
}
func (w *Watcher) UploadLatestCount() uint64 {
w.lock.Lock()
defer w.lock.Unlock()
return w.uploadLatestCount
}
func (w *Watcher) TryIncrementUploadLatestCount() bool {
w.lock.Lock()
defer w.lock.Unlock()
if w.uploadLatest {
w.uploadLatestCount += 1
}
return w.uploadLatest
}
func (w *Watcher) checkFiles(filepaths ...string) bool {
for _, filepath := range filepaths {
_, err := os.Stat(filepath)
if err != nil {
w.logger.Error(err, "failed to check file", "filepath", filepath)
return false
}
}
return true
}
func (w *Watcher) readFileLines(filepath string) ([]string, error) {
bytes, err := os.ReadFile(filepath)
if err != nil {
w.logger.Error(err, "Failed to readfile", "filepath", filepath)
return nil, err
}
content := string(bytes)
result := strings.Split(content, "\n")
return result, nil
}
func (w *Watcher) readLogIndex(indexFilepath string) ([]BinlogNum, error) {
lines, err := w.readFileLines(indexFilepath)
if err != nil {
return nil, err
}
reg := regexp.MustCompile(`^mysql_bin.(\w+)$`)
binlogNums := make([]BinlogNum, 0, len(lines))
for _, line := range lines {
if len(strings.TrimSpace(line)) == 0 {
continue
}
filename := filepath.Base(line)
matchedResult := reg.FindStringSubmatch(filename)
if len(matchedResult) == 2 {
num, err := strconv.ParseInt(matchedResult[1], 10, 63)
if err != nil {
w.logger.Error(err, "filename suffix is not a number", "filename", filename)
return nil, err
}
binlogNums = append(binlogNums, BinlogNum{
Num: num,
Filename: filename,
Filepath: filepath.Join(filepath.Dir(indexFilepath), filename),
})
}
}
sort.Slice(binlogNums, func(i, j int) bool {
return binlogNums[i].Num < binlogNums[j].Num
})
return binlogNums, nil
}
func (w *Watcher) readInfo(filepath string) (*Info, error) {
lines, err := w.readFileLines(filepath)
if err != nil {
return nil, err
}
infoMap := map[string]string{}
for _, line := range lines {
if len(strings.TrimSpace(line)) == 0 {
continue
}
kv := strings.Split(line, "=")
if len(kv) != 2 {
err := fmt.Errorf("invalid field, line %s", line)
w.logger.Error(err, "failed to parse field line", "filepath", filepath)
return nil, err
}
infoMap[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1])
}
namespace, namespaceOk := infoMap[InfoNamespace]
xstoreName, xstoreNameOk := infoMap[InfoXStoreName]
podName, podNameOk := infoMap[InfoPodName]
version, versionOk := infoMap[InfoVersion]
binlogChecksum, binlogChecksumOk := infoMap[InfoBinlogChecksum]
sinkName, sinkNameOk := infoMap[InfoSinkName]
sinkType, sinkTypeOk := infoMap[InfoSinkType]
xstoreUid, xstoreUidOk := infoMap[InfoXStoreUid]
pxcName, pxcNameOk := infoMap[InfoPxcName]
pxcUid, pxcUidOk := infoMap[InfoPxcUid]
uploadLatest, uploadLatestOk := infoMap[InfoUploadLatest]
//InfoExpireLogDays InfoMaxLocalBinlogCount
expireLogSeconds, expireLogSecondsOk := infoMap[InfoLocalExpireLogSeconds]
maxLocalBinlogCount, maxLocalBinlogCountOk := infoMap[InfoMaxLocalBinlogCount]
forbidPurgeStr, forbidPurgeOk := infoMap[InfoForbidPurge]
if !(namespaceOk && xstoreNameOk && podNameOk && versionOk && binlogChecksumOk && sinkNameOk && sinkTypeOk && xstoreUidOk && pxcNameOk && pxcUidOk && forbidPurgeOk) {
err := fmt.Errorf("invalid info")
w.logger.Error(err, InfoNamespace, namespace, InfoXStoreName, xstoreName, InfoPodName, podName, InfoVersion, version, InfoBinlogChecksum, binlogChecksum, InfoSinkName, sinkName, InfoSinkType, sinkType, InfoXStoreUid, xstoreUid, InfoPxcName, pxcName, InfoPxcUid, pxcUid)
return nil, err
}
forbidPurge, err := strconv.ParseBool(forbidPurgeStr)
if err != nil {
w.logger.Error(err, fmt.Sprintf("failed to parse forbid purge, str value = %s", forbidPurgeStr))
return nil, err
}
var infoUploadLatest *bool
if uploadLatestOk {
uploadLatest = strings.TrimSpace(uploadLatest)
val := strings.EqualFold(uploadLatest, "true") || strings.EqualFold(uploadLatest, "1")
infoUploadLatest = &val
}
var infoExpireLogSeconds int64 = int64(config.GetConfig().BackupBinlogConfig.GetExpireLogHours() * 3600)
if expireLogSecondsOk {
val, err := strconv.ParseInt(expireLogSeconds, 10, 64)
if err != nil {
w.logger.Error(err, fmt.Sprintf("failed to parse expireLogSeconds , strValue=%s", expireLogSeconds))
} else {
infoExpireLogSeconds = val
}
}
var infoMaxLocalBinlogCount int64 = config.GetConfig().BackupBinlogConfig.GetMaxLocalBinlogCount()
if maxLocalBinlogCountOk {
val, err := strconv.ParseInt(maxLocalBinlogCount, 10, 63)
if err != nil {
w.logger.Error(err, fmt.Sprintf("failed to parse maxLocalBinlogCount , strValue=%s", maxLocalBinlogCount))
} else {
infoMaxLocalBinlogCount = val
}
}
info := &Info{
Namespace: namespace,
XStoreName: xstoreName,
SinkName: sinkName,
SinkType: sinkType,
BinlogChecksum: binlogChecksum,
XStoreUid: xstoreUid,
PxcName: pxcName,
PxcUid: pxcUid,
LocalExpireLogSeconds: infoExpireLogSeconds,
MaxLocalBinlogCount: infoMaxLocalBinlogCount,
ForbidPurge: forbidPurge,
}
infoJson, _ := json.Marshal(info)
w.SetHash(fmt.Sprintf("%x", md5.Sum(infoJson)))
info.Version = version
info.PodName = podName
if info.UploadLatest != infoUploadLatest {
info.UploadLatest = infoUploadLatest
infoJson, _ = json.Marshal(info)
}
w.logger.Info("readInfo: " + string(infoJson))
return info, nil
}
func (w *Watcher) Start() {
go func() {
defer func() {
if err := recover(); err != nil {
w.logger.Info("Skip panic", "err", err, "stack", string(debug.Stack()))
}
}()
if !register(w) {
return
}
defer unregister(w)
infoFilepath := path.Join(w.workDir, InfoFilename)
indexFilepath := path.Join(w.workDir, IndexFilename)
if !w.checkFiles(infoFilepath, indexFilepath) {
return
}
dbFilepath := path.Join(w.workDir, UploadRecordsDbFile)
db, err := GetDb(dbFilepath)
if err != nil {
w.logger.Error(err, "failed to GetDb", "DbFilepath", dbFilepath)
return
}
defer db.Close()
err = TryCreateTableOfUploadRecord(db)
if err != nil {
w.logger.Error(err, "TryCreateTableOfUploadRecord")
return
}
w.db = db
// find mysql socket filepath
mysqlSockPath, err := FindMysqlSockByLogDir(w.workDir, config.GetConfig().BackupBinlogConfig.RootDirectories)
if err != nil {
w.logger.Error(err, "failed to find mysql sock by log dir", "logDir", w.workDir, "rootDirs", config.GetConfig().BackupBinlogConfig.RootDirectories)
return
}
mysqlDb, err := GetMysqlDb(mysqlSockPath)
if err != nil {
w.logger.Error(err, fmt.Sprintf("failed to get mysql db mysqlSockFilepath=%s", mysqlSockPath))
return
}
w.mysqlDb = mysqlDb
for {
binlogNums, err := w.readLogIndex(indexFilepath)
if err != nil {
return
}
info, err := w.readInfo(infoFilepath)
if err != nil {
return
}
if info.UploadLatest != nil {
w.SetUploadLatest(*info.UploadLatest)
}
// purge consensus logs
if w.lastPurgeTime.Add(PurgePeriod).Before(time.Now()) {
go func() {
defer func() {
if err := recover(); err != nil {
w.logger.Info("Skip panic", "err", err, "stack", string(debug.Stack()))
}
}()
if len(binlogNums) > 1 {
err := w.tryPurge(w.db, w.mysqlDb, *info, binlogNums[:len(binlogNums)-1])
if err != nil {
w.logger.Error(err, "failed to purge")
}
}
}()
w.lastPurgeTime = time.Now()
}
// upload consensus logs
binlogNumOut:
for binlogNumArrayIndex, binlogNum := range binlogNums {
if binlogNum.Num <= w.lastUploadedBinlogNum {
continue
}
var thisLatestBinlogUploaded bool
if binlogNumArrayIndex == len(binlogNums)-1 {
if w.IsUploadLatest() {
thisLatestBinlogUploaded = true
} else {
//not upload the latest binlog file
continue
}
}
binlogFile, err := FindRecord(db, binlogNum.Num)
if err != nil {
w.logger.Error(err, "failed to find record", "num", binlogNum.Num)
return
}
if binlogFile == nil || binlogFile.Status != 0 {
newBinlogFile := BinlogFile{
Info: *info,
BinlogNum: binlogNum,
}
if w.actions != nil {
for _, action := range w.actions {
if !action(w, &newBinlogFile) {
// breaking exec action if one action fails
w.logger.Info("action executing interrupt", "binlog filepath", newBinlogFile.Filepath)
break binlogNumOut
}
}
}
}
w.lastUploadedBinlogNum = binlogNum.Num
if thisLatestBinlogUploaded {
w.TryIncrementUploadLatestCount()
}
}
select {
case <-w.ctx.Done():
return
case <-time.After(PERIOD):
break
}
}
}()
}
func (w *Watcher) tryPurge(sqliteDb *sql.DB, mysqlDb *sql.DB, info Info, binlogNums []BinlogNum) error {
w.logger.Info("begin tryPurge consensus los")
defer w.logger.Info("end tryPurge consensus logs")
if info.ForbidPurge {
w.logger.Info("forbid purge. ignore this trigger")
return nil
}
var purgeBinlogNum *BinlogNum
countPurgeCnt := len(binlogNums) - int(info.MaxLocalBinlogCount)
if countPurgeCnt > 0 {
// need purge because of count
purgeBinlogNum = &binlogNums[countPurgeCnt-1]
}
expireTime := time.Now().Add(time.Duration(-info.LocalExpireLogSeconds) * time.Second)
for _, binlogNum := range binlogNums {
fileInfo, err := os.Stat(binlogNum.Filepath)
if errors.Is(err, os.ErrNotExist) {
continue
}
if fileInfo.ModTime().Before(expireTime) {
if purgeBinlogNum == nil {
purgeBinlogNum = &binlogNum
} else if binlogNum.Num > purgeBinlogNum.Num {
purgeBinlogNum = &binlogNum
}
}
}
if purgeBinlogNum != nil {
//check if the binlog has been uploaded
w.logger.Info(fmt.Sprintf("check the uploaded status of %s", purgeBinlogNum.Filename))
uploadedRecord, err := FindRecord(sqliteDb, purgeBinlogNum.Num)
if err != nil {
w.logger.Error(err, "failed to find the binlog", "binlog filename", purgeBinlogNum.Filename)
return err
}
if uploadedRecord != nil && uploadedRecord.Status == 0 {
//do purge
consensusLogRows, err := ShowConsensusLogs(w.mysqlDb)
if err != nil {
w.logger.Error(err, "failed to show consensus logs")
return err
}
sort.Slice(consensusLogRows, func(i, j int) bool {
return consensusLogRows[i].StartLogIndex < consensusLogRows[j].StartLogIndex
})
for i, consensusLogRow := range consensusLogRows {
if consensusLogRow.LogName == purgeBinlogNum.Filename {
if len(consensusLogRows) > i+1 {
purgeToIndex := consensusLogRows[i+1].StartLogIndex
w.logger.Info(fmt.Sprintf("exec purge local consensus_log before %d", purgeToIndex))
err := Purge(w.mysqlDb, purgeToIndex)
if err != nil {
w.logger.Error(err, "fail to purge local consensus_log")
return err
}
w.logger.Info("succeeded to purge local consensus_log")
}
break
}
}
}
}
return nil
}
func (w *Watcher) Stop() {
if w.cancelFunc != nil {
w.cancelFunc()
}
}