polardbxoperator/pkg/binlogtool/algo/seek_consistent_point.go

294 lines
8.6 KiB
Go

/*
Copyright 2022 Alibaba Group Holding Limited.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algo
import (
"bytes"
"encoding/binary"
"fmt"
"github.com/alibaba/polardbx-operator/pkg/binlogtool/binlog"
"sync"
"sync/atomic"
"github.com/alibaba/polardbx-operator/pkg/binlogtool/tx"
"github.com/alibaba/polardbx-operator/pkg/binlogtool/utils"
)
type SeekConsistentPoint struct {
txEventParsers map[string]tx.TransactionEventParser
heartbeatTxid uint64
}
// Stage 1. Locate the heartbeat transaction and emit the recoverable transactions' IDs.
func (sct *SeekConsistentPoint) stageOne(streamIndexes map[string]int) (
recoverableTxs map[uint64]int,
heartbeatPrepareAndCommitOffsets [][2]*binlog.EventOffset,
txsBetweenHeartbeat [][]tx.Event,
err error,
) {
var mu sync.Mutex
recoverableTxs = make(map[uint64]int)
heartbeatPrepareAndCommitOffsets = make([][2]*binlog.EventOffset, len(sct.txEventParsers))
txsBetweenHeartbeat = make([][]tx.Event, len(sct.txEventParsers))
errOccurred := int32(0)
scanErrs := make([]error, len(sct.txEventParsers))
var wg sync.WaitGroup
for streamName := range sct.txEventParsers {
txParser := sct.txEventParsers[streamName]
streamIndex := streamIndexes[streamName]
pcOffset := &heartbeatPrepareAndCommitOffsets[streamIndex]
wg.Add(1)
go func() {
defer wg.Done()
curRecoverableTxs := make(map[uint64]int)
eventsBetween := make([]tx.Event, 0)
if err := txParser.Parse(func(event *tx.Event) error {
if errOccurred > 0 {
return tx.StopParse
}
// If is heartbeat transaction. We use P_h and C_h in the following comments
// to denote the prepared and commit event.
if event.XID == sct.heartbeatTxid {
if event.Type == tx.Prepare {
// Move the commits in event recorded to recoverable txs if P_h moves ahead.
if pcOffset[0] != nil {
for _, event := range eventsBetween {
curRecoverableTxs[event.XID] = 1
}
eventsBetween = make([]tx.Event, 0)
}
pcOffset[0] = &binlog.EventOffset{File: event.File, Offset: event.EndPos}
}
if event.Type == tx.Commit {
pcOffset[1] = &binlog.EventOffset{File: event.File, Offset: event.EndPos}
return tx.StopParse
}
return nil
} else {
// Before the first P_h.
if pcOffset[0] == nil {
if event.Type == tx.Commit {
curRecoverableTxs[event.XID] = 1
}
} else {
eventsBetween = append(eventsBetween, *event)
}
}
return nil
}); err != nil {
atomic.AddInt32(&errOccurred, 1)
scanErrs[streamIndex] = err
return
}
// Write to global set.
mu.Lock()
defer mu.Unlock()
for xid := range curRecoverableTxs {
recoverableTxs[xid] = 1
}
txsBetweenHeartbeat[streamIndex] = eventsBetween
}()
}
// Memory barrier here, so don't worry about non-synced arrays.
wg.Wait()
if errOccurred > 0 {
multiErr := utils.MultiError()
for streamName := range sct.txEventParsers {
streamIndex := streamIndexes[streamName]
if err := scanErrs[streamIndex]; err != nil {
multiErr.Add(fmt.Errorf("error occurs when parsing stream %s: %w", streamName, err))
}
}
err = multiErr
return
}
// Check if P_h and C_h are found in all streams.
notFoundErrs := utils.MultiError()
for streamName := range sct.txEventParsers {
streamIndex := streamIndexes[streamName]
pcOffset := &heartbeatPrepareAndCommitOffsets[streamIndex]
if pcOffset[0] == nil {
notFoundErrs.Add(fmt.Errorf("heartbeat prepare not found in stream %s", streamName))
} else if pcOffset[1] == nil {
notFoundErrs.Add(fmt.Errorf("heartbeat commit not found in stream %s", streamName))
}
}
if notFoundErrs.Size() > 0 {
err = notFoundErrs
return
}
return
}
// Last prepare of recoverable transactions in [l, r].
func scanEventBorder(l, r int, recoverableTxs map[uint64]int, txEvents []tx.Event) int {
for i := r; i >= l; i-- {
ev := &txEvents[i]
if ev.Type != tx.Prepare {
continue
}
if _, ok := recoverableTxs[ev.XID]; ok {
return i
}
}
return l - 1
}
// Stage 2. Scan the binary logs events between heartbeat prepare and commit
// with an iteration and calculate the total recoverable transactions.
func (sct *SeekConsistentPoint) stageTwo(
streamIndexes map[string]int,
recoverableTxs map[uint64]int,
heartbeatPrepareAndCommitOffsets [][2]*binlog.EventOffset,
txsBetweenHeartbeat [][]tx.Event) ([]uint64, map[string]binlog.EventOffset, error) {
eventBorders := make([]int, len(streamIndexes))
// Scan the borders (reverse scan), i.e index of the last recoverable prepare.
for _, streamIndex := range streamIndexes {
txEvents := txsBetweenHeartbeat[streamIndex]
eventBorders[streamIndex] = scanEventBorder(0, len(txEvents)-1, recoverableTxs, txEvents)
}
// Iterate.
eventOffsets := make([]int, len(streamIndexes))
for {
// Scan committed but not in recoverable.
foundNewTxs := false
for _, streamIndex := range streamIndexes {
txEvents := txsBetweenHeartbeat[streamIndex]
border := eventBorders[streamIndex]
for i := border; i >= eventOffsets[streamIndex]; i-- {
ev := txEvents[i]
if ev.Type == tx.Commit {
if _, ok := recoverableTxs[ev.XID]; !ok {
recoverableTxs[ev.XID] = 1
foundNewTxs = true
}
}
}
}
if !foundNewTxs {
break
}
// Update the offset and rescan the border.
borderChanged := false
for _, streamIndex := range streamIndexes {
eventOffsets[streamIndex] = eventBorders[streamIndex] + 1
txEvents := txsBetweenHeartbeat[streamIndex]
eventBorders[streamIndex] = scanEventBorder(eventOffsets[streamIndex], len(txEvents)-1, recoverableTxs, txEvents)
if eventBorders[streamIndex] != eventOffsets[streamIndex] {
borderChanged = true
}
}
if !borderChanged {
break
}
}
// Return.
streamBorders := make(map[string]binlog.EventOffset)
for streamName, streamIndex := range streamIndexes {
txEvents := txsBetweenHeartbeat[streamIndex]
borderIndex := eventBorders[streamIndex]
if borderIndex < 0 { // Use P_h
streamBorders[streamName] = *heartbeatPrepareAndCommitOffsets[streamIndex][0]
} else { // Use last P
ev := txEvents[borderIndex]
streamBorders[streamName] = binlog.EventOffset{File: ev.File, Offset: ev.EndPos}
}
}
recoverableTxs[sct.heartbeatTxid] = 1
return utils.MapKeys(recoverableTxs), streamBorders, nil
}
func (sct *SeekConsistentPoint) buildStreamIndexes() map[string]int {
streamIndexes := make(map[string]int)
off := 0
for streamName := range sct.txEventParsers {
streamIndexes[streamName] = off
off++
}
return streamIndexes
}
func (sct *SeekConsistentPoint) Perform() ([]uint64, map[string]binlog.EventOffset, error) {
streamIndexes := sct.buildStreamIndexes()
recoverableTxs, heartbeatPrepareAndCommitOffsets, txsBetweenHeartbeat, err := sct.stageOne(streamIndexes)
if err != nil {
return nil, nil, err
}
return sct.stageTwo(streamIndexes, recoverableTxs, heartbeatPrepareAndCommitOffsets, txsBetweenHeartbeat)
}
func NewSeekConsistentPoint(txEventParsers map[string]tx.TransactionEventParser, heartbeatTxid uint64) *SeekConsistentPoint {
return &SeekConsistentPoint{
txEventParsers: txEventParsers,
heartbeatTxid: heartbeatTxid,
}
}
func SerializeCpResult(recoverableTxs []uint64, borders map[string]binlog.EventOffset) ([]byte, error) {
byteBuf := &bytes.Buffer{}
if err := binary.Write(byteBuf, binary.LittleEndian, uint32(len(recoverableTxs))); err != nil {
return nil, err
}
for _, txid := range recoverableTxs {
if err := binary.Write(byteBuf, binary.LittleEndian, txid); err != nil {
return nil, err
}
}
if err := binary.Write(byteBuf, binary.LittleEndian, uint16(len(borders))); err != nil {
return nil, err
}
for streamName, offset := range borders {
if err := binary.Write(byteBuf, binary.LittleEndian, uint8(len(streamName))); err != nil {
return nil, err
}
if _, err := byteBuf.Write([]byte(streamName)); err != nil {
return nil, err
}
if err := binary.Write(byteBuf, binary.LittleEndian, uint8(len(offset.File))); err != nil {
return nil, err
}
if _, err := byteBuf.Write([]byte(offset.File)); err != nil {
return nil, err
}
if err := binary.Write(byteBuf, binary.LittleEndian, offset.Offset); err != nil {
return nil, err
}
}
return byteBuf.Bytes(), nil
}