polardbxoperator/pkg/operator/v1/polardbx/guide/topology_mode.go

595 lines
18 KiB
Go

/*
Copyright 2021 Alibaba Group Holding Limited.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package guide
import (
"errors"
"reflect"
"regexp"
"sort"
"strconv"
"strings"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/intstr"
polardbxv1 "github.com/alibaba/polardbx-operator/api/v1"
polardbxv1common "github.com/alibaba/polardbx-operator/api/v1/common"
polardbxv1polardbx "github.com/alibaba/polardbx-operator/api/v1/polardbx"
polardbxv1xstore "github.com/alibaba/polardbx-operator/api/v1/xstore"
polardbxmeta "github.com/alibaba/polardbx-operator/pkg/operator/v1/polardbx/meta"
polardbxv1reconcile "github.com/alibaba/polardbx-operator/pkg/operator/v1/polardbx/reconcile"
)
func init() {
registerHandler(polardbxmeta.AnnotationTopologyModeGuide, &topologyModeHandler{})
}
type topologyModeHandler struct{}
type modeName string
type nodeType string
type nodeConfig struct {
// replica indicates the number of a specific node, like 3 CN, 2 DN
replica int32
// paxosReplica indicates the number of replicas in paxos protocol only for GMS and DN
// it only has three possible values: 1/3/5, 3 by default
paxosReplica int32
// resource indicates resources available for a node
resource *corev1.ResourceRequirements
}
type topologyMode struct {
name modeName
config map[nodeType]*nodeConfig
rules *polardbxv1polardbx.TopologyRules
shareGMS bool
}
const (
// Performance name creates 3 CN, 3 DN, 1 GMS and maximizes the use of resources
// usage: performance(labels=(zone-a))
// in which the labels describe where to schedule the nodes
Performance modeName = "performance"
// Minimum name creates 1 CN (2c4g) + 1 DN (2c4g), in which GMS in dn-0
Minimum modeName = "minimum"
// MinimumCDC name creates 1 CN + 1 DN + 1 GMS + 1 CDC all in 4c8g
MinimumCDC modeName = "minimum-cdc"
// Custom name allows user to specific both resources and replicas
// usage: custom(CN=(1,2c4g),DN=(,4c8g,single-replica),GMS=(1,))
// in which the former number indicates replicas and the later letters indicate resource
Custom modeName = "custom"
// QuickStart creates a 1 GMS + 1 CN + 1 DN + 1 CDC in total resources 4c8g for quick start.
QuickStart modeName = "quick-start"
// QuickStartPaxos Change to 3 paxos replicas of DN on the basis of QuickStart
QuickStartPaxos modeName = "quick-start-paxos"
)
const (
CN nodeType = "CN"
DN nodeType = "DN"
GMS nodeType = "GMS"
CDC nodeType = "CDC"
)
// RollingStrategyLabelName indicates the label name used to build node selectors for performance mode
const RollingStrategyLabelName = "node-configuration"
const MegaByte int64 = 1024 * 1024
const GigaByte int64 = 1024 * 1024 * 1024
var ErrInvalidModeGuide = errors.New("invalid name guide")
func (h *topologyModeHandler) updateTopology(obj *polardbxv1.PolarDBXCluster, mode *topologyMode) error {
for node, conf := range mode.config {
switch node {
case CN:
conf.resource.DeepCopyInto(&obj.Spec.Topology.Nodes.CN.Template.Resources)
*obj.Spec.Topology.Nodes.CN.Replicas = conf.replica
case DN:
conf.resource.DeepCopyInto(&obj.Spec.Topology.Nodes.DN.Template.Resources.ResourceRequirements)
obj.Spec.Topology.Nodes.DN.Replicas = conf.replica
case GMS:
obj.Spec.Topology.Nodes.GMS = polardbxv1polardbx.TopologyNodeGMS{
Template: &polardbxv1polardbx.XStoreTemplate{
Resources: polardbxv1common.ExtendedResourceRequirements{
ResourceRequirements: *conf.resource,
},
},
}
case CDC:
obj.Spec.Topology.Nodes.CDC = &polardbxv1polardbx.TopologyNodeCDC{
Template: polardbxv1polardbx.CDCTemplate{
Resources: *conf.resource,
},
}
obj.Spec.Topology.Nodes.CDC.Replicas = conf.replica
default:
return errors.New("unexpected type of node")
}
}
if mode.rules != nil {
mode.rules.DeepCopyInto(&obj.Spec.Topology.Rules)
}
// share gms is disabled by default
obj.Spec.ShareGMS = mode.shareGMS
return nil
}
func (h *topologyModeHandler) Handle(rc *polardbxv1reconcile.Context, obj *polardbxv1.PolarDBXCluster) error {
modeGuide := obj.Annotations[polardbxmeta.AnnotationTopologyModeGuide]
factory := NewTopologyModeFactory(rc)
mode, err := factory.parseTopologyModeGuide(modeGuide)
if err != nil {
return err
}
err = h.updateTopology(obj, mode)
if err != nil {
return err
}
delete(obj.Annotations, polardbxmeta.AnnotationTopologyModeGuide)
return nil
}
type TopologyModeFactory interface {
parseTopologyModeGuide(s string) (*topologyMode, error)
}
type topologyModeFactory struct {
rc *polardbxv1reconcile.Context
}
func NewTopologyModeFactory(rc *polardbxv1reconcile.Context) *topologyModeFactory {
return &topologyModeFactory{rc: rc}
}
// the bool return value is used to indicate whether user sets a param in performance or minimum mode
func (t *topologyModeFactory) convertibleStringOrDefault(s string, defaultValue int64) (int64, bool) {
value, err := strconv.Atoi(s)
if err != nil {
return defaultValue, false
}
return int64(value), true
}
func (t *topologyModeFactory) newResourceRequirements(limitCPU int64, limitMemory int64, requests ...int64) *corev1.ResourceRequirements {
requirements := &corev1.ResourceRequirements{
Limits: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: *resource.NewQuantity(limitCPU, resource.DecimalSI),
corev1.ResourceMemory: *resource.NewQuantity(limitMemory, resource.BinarySI),
},
}
if len(requests) >= 2 {
requirements.Requests = map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: *resource.NewQuantity(requests[0], resource.DecimalSI),
corev1.ResourceMemory: *resource.NewQuantity(requests[1], resource.BinarySI),
}
}
return requirements
}
func (t *topologyModeFactory) parseGuideParamsAndUpdateConfig(s string, config map[nodeType]*nodeConfig) {
pattern := regexp.MustCompile(`(DN|CN|GMS|CDC)=\((\d?)(?:,(?:(\d*)[cC](\d*)[gG])?)?(?:,([^()]+)?)?\),?`)
matches := pattern.FindAllStringSubmatch(s, -1)
for _, v := range matches {
var defaultReplica int64
if nodeType(v[1]) == DN || nodeType(v[1]) == CN {
defaultReplica = 2
} else {
defaultReplica = 1
}
replica, replicaConvertible := t.convertibleStringOrDefault(v[2], defaultReplica)
cpu, cpuConvertible := t.convertibleStringOrDefault(v[3], 4)
memory, memoryConvertible := t.convertibleStringOrDefault(v[4], 8)
// allow user to modify params even in performance or minimum template
if _, ok := config[nodeType(v[1])]; ok {
if replicaConvertible {
config[nodeType(v[1])].replica = int32(replica)
}
// cpu & memory must be modified together
if cpuConvertible && memoryConvertible {
config[nodeType(v[1])].resource = t.newResourceRequirements(cpu, memory*GigaByte)
}
} else {
// if the node is not included in the template or the mode is custom
config[nodeType(v[1])] = &nodeConfig{
replica: int32(replica),
resource: t.newResourceRequirements(cpu, memory*GigaByte),
}
}
if nodeType(v[1]) == DN || nodeType(v[1]) == GMS {
if v[5] == "single-replica" {
config[nodeType(v[1])].paxosReplica = 1
} else if v[5] == "five-replica" {
config[nodeType(v[1])].paxosReplica = 5
} else {
config[nodeType(v[1])].paxosReplica = 3
}
}
}
}
// parseLabels only work for performance mode in order to provide rolling strategy
func (t *topologyModeFactory) parseLabels(s string) []string {
pattern := regexp.MustCompile(`labels?=\(([^()]+)\)`)
matches := pattern.FindAllStringSubmatch(s, -1)
if len(matches) == 0 {
return nil
}
labels := strings.Split(matches[0][1], "|")
return labels
}
func (t *topologyModeFactory) newNodeSelector(labels []string) *polardbxv1polardbx.NodeSelectorItem {
nodeSelectorItem := &polardbxv1polardbx.NodeSelectorItem{
Name: "performance-mode-selector",
NodeSelector: corev1.NodeSelector{
NodeSelectorTerms: []corev1.NodeSelectorTerm{
{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: RollingStrategyLabelName,
Operator: corev1.NodeSelectorOpIn,
Values: labels,
},
},
},
},
},
}
return nodeSelectorItem
}
func (t *topologyModeFactory) newXStoreTopologyRule(replica int32, nodeSelectorItems []polardbxv1polardbx.NodeSelectorItem) *polardbxv1polardbx.XStoreTopologyRule {
var rule *polardbxv1polardbx.XStoreTopologyRule
if len(nodeSelectorItems) > 0 {
rule = &polardbxv1polardbx.XStoreTopologyRule{
Rolling: &polardbxv1polardbx.XStoreTopologyRuleRolling{
Replicas: replica,
NodeSelector: &polardbxv1polardbx.NodeSelectorReference{
Reference: nodeSelectorItems[0].Name,
NodeSelector: &nodeSelectorItems[0].NodeSelector,
},
},
}
return rule
}
if replica == 1 {
rule = &polardbxv1polardbx.XStoreTopologyRule{
NodeSets: []polardbxv1polardbx.XStoreTopologyRuleNodeSetItem{
{
Name: "single",
Role: polardbxv1xstore.RoleCandidate,
Replicas: 1,
},
},
}
} else if replica != 0 { // replica can only equal to 1/3/5
rule = &polardbxv1polardbx.XStoreTopologyRule{
NodeSets: []polardbxv1polardbx.XStoreTopologyRuleNodeSetItem{
{
Name: "cands",
Role: polardbxv1xstore.RoleCandidate,
Replicas: replica - 1,
},
{
Name: "log",
Role: polardbxv1xstore.RoleVoter,
Replicas: 1,
},
},
}
}
return rule
}
// newStatelessRule generates stateless rule only for performance mode
func (t *topologyModeFactory) newStatelessRule(replica int32, nodeSelectorItems []polardbxv1polardbx.NodeSelectorItem) *polardbxv1polardbx.StatelessTopologyRuleItem {
if len(nodeSelectorItems) == 0 {
return nil
}
rule := &polardbxv1polardbx.StatelessTopologyRuleItem{
Name: "performance",
Replicas: &intstr.IntOrString{Type: intstr.Int, IntVal: replica},
NodeSelector: &polardbxv1polardbx.NodeSelectorReference{
Reference: nodeSelectorItems[0].Name,
},
}
return rule
}
func (t *topologyModeFactory) computeResourcesForPerformanceMode(nodeList *corev1.NodeList) (map[nodeType]*nodeConfig, error) {
nodes := nodeList.Items
if len(nodes) < 3 {
return nil, errors.New("nodes insufficient for performance mode")
}
// sort nodes in the decrease order of cpu (suppose that memory has the same order
// performance will use the three nodes which have the most resources
sort.Slice(nodes, func(i, j int) bool {
return nodes[i].Status.Capacity.Cpu().Cmp(*nodes[j].Status.Capacity.Cpu()) == 1
})
if !reflect.DeepEqual(nodes[0].Status.Capacity, nodes[2].Status.Capacity) {
return nil, errors.New("performance mode is not supported for nodes with different resources")
}
totalCPU, _ := nodes[0].Status.Capacity.Cpu().AsInt64()
totalMemory, _ := nodes[0].Status.Capacity.Memory().AsInt64()
// leave at least 2 cpu and 4g for logger, in addition to a gap of 1 cpu and 512m
totalCPU -= 3
totalMemory -= 9 * 512 * MegaByte
// 2 DN + 1 CN in one node, which need 3.5 pieces of resources that DN request
// For the convenience of calculation, here set pieces to 4
var pieces int64 = 4
unitCPU := totalCPU / pieces
unitMemory := totalMemory / pieces
config := map[nodeType]*nodeConfig{
CN: {
replica: 3,
resource: t.newResourceRequirements(unitCPU/2+unitCPU, unitMemory/2+unitMemory),
},
DN: {
replica: 3,
paxosReplica: 3,
resource: t.newResourceRequirements(unitCPU, unitMemory),
},
GMS: {
replica: 1,
paxosReplica: 3,
resource: t.newResourceRequirements(4, 8*GigaByte, 1, GigaByte),
},
}
return config, nil
}
func (t *topologyModeFactory) computeResourcesForMinimumMode(withCDC bool) map[nodeType]*nodeConfig {
var config map[nodeType]*nodeConfig
if withCDC {
config = map[nodeType]*nodeConfig{
CN: {
replica: 1,
resource: t.newResourceRequirements(4, 8*GigaByte),
},
DN: {
replica: 1,
resource: t.newResourceRequirements(4, 8*GigaByte),
},
GMS: {
replica: 1,
resource: t.newResourceRequirements(4, 8*GigaByte),
},
CDC: {
replica: 1,
resource: t.newResourceRequirements(4, 8*GigaByte),
},
}
} else {
config = map[nodeType]*nodeConfig{
CN: {
replica: 1,
resource: t.newResourceRequirements(2, 4*GigaByte),
},
DN: {
replica: 1,
resource: t.newResourceRequirements(2, 4*GigaByte),
},
}
}
return config
}
func (t *topologyModeFactory) newQuickStartModeConfig() map[nodeType]*nodeConfig {
storeResources := corev1.ResourceRequirements{
Limits: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
},
Requests: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("500Mi"),
},
}
return map[nodeType]*nodeConfig{
GMS: {
replica: 1,
paxosReplica: 1,
resource: &storeResources,
},
CN: {
replica: 1,
resource: &corev1.ResourceRequirements{
Limits: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
Requests: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
},
},
},
DN: {
replica: 1,
paxosReplica: 1,
resource: &storeResources,
},
CDC: {
replica: 1,
resource: &corev1.ResourceRequirements{
Limits: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
},
Requests: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("500Mi"),
},
},
},
}
}
func (t *topologyModeFactory) newQuickStartPaxosModeConfig() map[nodeType]*nodeConfig {
storeResources := corev1.ResourceRequirements{
Limits: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
},
Requests: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("500Mi"),
},
}
return map[nodeType]*nodeConfig{
GMS: {
replica: 1,
paxosReplica: 3,
resource: &storeResources,
},
CN: {
replica: 1,
resource: &corev1.ResourceRequirements{
Limits: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("2Gi"),
},
Requests: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
},
},
},
DN: {
replica: 1,
paxosReplica: 3,
resource: &storeResources,
},
CDC: {
replica: 1,
resource: &corev1.ResourceRequirements{
Limits: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("1Gi"),
},
Requests: map[corev1.ResourceName]resource.Quantity{
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("500Mi"),
},
},
},
}
}
func (t *topologyModeFactory) parseTopologyModeGuide(s string) (*topologyMode, error) {
s = strings.TrimSpace(s)
mode := &topologyMode{}
// labels only used by performance mode so far
var labels []string
// a template will be first generated for performance or minimum mode
switch {
case strings.HasPrefix(s, string(Performance)):
mode.name = Performance
nodeList := &corev1.NodeList{}
err := t.rc.Client().List(t.rc.Context(), nodeList)
if err != nil {
return nil, err
}
config, err := t.computeResourcesForPerformanceMode(nodeList)
if err != nil {
return nil, err
}
mode.config = config
labels = t.parseLabels(s)
case strings.HasPrefix(s, string(MinimumCDC)):
mode.name = MinimumCDC
mode.config = t.computeResourcesForMinimumMode(true)
case strings.HasPrefix(s, string(Minimum)):
mode.name = Minimum
mode.config = t.computeResourcesForMinimumMode(false)
mode.shareGMS = true
case strings.HasPrefix(s, string(QuickStartPaxos)):
mode.name = QuickStartPaxos
mode.config = t.newQuickStartPaxosModeConfig()
mode.shareGMS = false
case strings.HasPrefix(s, string(QuickStart)):
mode.name = QuickStart
mode.config = t.newQuickStartModeConfig()
mode.shareGMS = false
case strings.HasPrefix(s, string(Custom)):
mode.name = Custom
default:
return nil, ErrInvalidModeGuide
}
// if any params given in performance or minimum mode, the existing template will be overwritten
if mode.config == nil {
mode.config = make(map[nodeType]*nodeConfig)
}
t.parseGuideParamsAndUpdateConfig(s, mode.config)
mode.rules = &polardbxv1polardbx.TopologyRules{}
if labels != nil {
nodeSelectorItem := t.newNodeSelector(labels)
mode.rules.Selectors = []polardbxv1polardbx.NodeSelectorItem{*nodeSelectorItem}
}
for node, conf := range mode.config {
switch node {
case CN:
statelessRule := t.newStatelessRule(conf.replica, mode.rules.Selectors)
if statelessRule != nil {
mode.rules.Components.CN = []polardbxv1polardbx.StatelessTopologyRuleItem{
*statelessRule,
}
}
case DN:
mode.rules.Components.DN = t.newXStoreTopologyRule(conf.paxosReplica, mode.rules.Selectors)
case GMS:
mode.rules.Components.GMS = t.newXStoreTopologyRule(conf.paxosReplica, mode.rules.Selectors)
}
}
return mode, nil
}