apiVersion: monitoring.coreos.com/v1 kind: PrometheusRule metadata: labels: prometheus: k8s role: alert-rules app.kubernetes.io/instance: {{ .Release.Name }} app.kubernetes.io/managed-by: {{ .Release.Service }} name: polardbx-alert-rules namespace: {{ .Release.Namespace }} spec: groups: - name: polardbx-cn rules: - alert: PolarDB-X CN SQL Running Connection is Too High annotations: summary: PolarDB-X CN SQL Running Connection is Too High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} running count is too high, over 1000 for 5m. expr: | sum(polardbx_stats_running_count) by (pod, polardbx_name, namespace) >= 1000 for: 5m labels: severity: critical - alert: PolarDB-X CN SQL Running Connection is High annotations: summary: PolarDB-X CN SQL Running Connection is High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} running count is high, over 500 for 5m. expr: | sum(polardbx_stats_running_count) by (pod, polardbx_name, namespace) >= 500 for: 5m labels: severity: warning - alert: PolarDB-X CN Total Connection is Too High annotations: summary: PolarDB-X CN Total Connection is Too High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} total connection is too high, over 10000. expr: | sum(polardbx_stats_active_connections) by (pod, polardbx_name, namespace) >= 10000 for: 5m labels: severity: critical - alert: PolarDB-X CN Total Connection is High annotations: summary: PolarDB-X CN Total Connection is High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} total connection is high, over 5000. expr: | sum(polardbx_stats_active_connections) by (pod, polardbx_name, namespace) >= 5000 for: 5m labels: severity: warning - alert: PolarDB-X CN Slow SQL Count is Too High annotations: summary: PolarDB-X CN Slow SQL Count is too High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} slow sql count is too high, over 250 for 5m. expr: | sum(rate(polardbx_stats_slow_request_count_total[1m])) by (pod, polardbx_name, namespace) >= 250 for: 5m labels: severity: critical - alert: PolarDB-X CN Slow SQL Count is High annotations: summary: PolarDB-X CN Slow SQL Count is High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} slow sql count is high, over 200 for 5m. expr: | sum(rate(polardbx_stats_slow_request_count_total[1m])) by (pod, polardbx_name, namespace) >= 200 for: 5m labels: severity: warning - alert: PolarDB-X CN Error SQL Count is Too High annotations: summary: PolarDB-X CN Error SQL Count is Too High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} error sql count is too high, over 50 for 5m. expr: | sum(rate(polardbx_stats_error_count_total[1m])) by (pod, polardbx_name, namespace) >= 50 for: 5m labels: severity: critical - alert: PolarDB-X CN Error SQL Count is High annotations: summary: PolarDB-X CN Error SQL Count is High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} error sql count is high, over 20 for 5m. expr: | sum(rate(polardbx_stats_error_count_total[1m])) by (pod, polardbx_name, namespace) >= 20 for: 5m labels: severity: warning - name: polardbx-dn rules: - alert: PolarDB-X DN SQL Running Connection is Too High annotations: summary: PolarDB-X DN SQL Running Connection is Too High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} running count is too high, over 500. expr: | sum(mysql_global_status_threads_running) by (pod, polardbx_name, namespace) >= 500 for: 5m labels: severity: critical - alert: PolarDB-X DN SQL Running Connection is High annotations: summary: PolarDB-X DN SQL Running Connection is High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} running count is high, over 300. expr: | sum(mysql_global_status_threads_running) by (pod, polardbx_name, namespace) >= 300 for: 5m labels: severity: warning - alert: PolarDB-X DN Connection Usage is Too High annotations: summary: PolarDB-X DN Connection Usage is too High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} connection usage is too high, over 90% for 5m. expr: | sum (100 * mysql_global_status_threads_connected /mysql_global_variables_max_connections{polardbx_role="dn"}) by (pod, polardbx_name, namespace) >= 90 for: 5m labels: severity: critical - alert: PolarDB-X DN Connection Usage is High annotations: summary: PolarDB-X DN Connection Usage is High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} connection usage is high, over 80% for 5m. expr: | sum (100 * mysql_global_status_threads_connected /mysql_global_variables_max_connections{polardbx_role="dn"}) by (pod, polardbx_name, namespace) >= 80 for: 5m labels: severity: warning - alert: PolarDB-X DN Slow SQL Count is Too High annotations: summary: PolarDB-X DN Slow SQL Count is Too High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} running count is too high, over 50. expr: | sum(mysql_global_status_slow_queries) by (pod, polardbx_name, namespace) >= 50 for: 5m labels: severity: critical - alert: PolarDB-X DN Slow SQL Count is High annotations: summary: PolarDB-X DN Slow SQL Count is High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} running count is high, over 30. expr: | sum(mysql_global_status_slow_queries) by (pod, polardbx_name, namespace) >= 30 for: 5m labels: severity: warning - name: polardbx-cdc rules: - alert: PolarDB-X CDC delay is Too High annotations: summary: PolarDB-X CDC delay is Too High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} cdc delay is too high, over 30min. expr: | sum(polardbx_cdc_dumper_delay_in_millisecond) by (pod, polardbx_name, namespace) >= 1800000 for: 5m labels: severity: critical - alert: PolarDB-X CDC delay is High annotations: summary: PolarDB-X CDC delay is High description: PolarDB-X {{`{{ $labels.polardbx_name}}`}} pod {{`{{ $labels.pod }}`}} namespace {{`{{ $labels.namespace }}`}} cdc delay is high, over 10min. expr: | sum(polardbx_cdc_dumper_delay_in_millisecond) by (pod, polardbx_name, namespace) >= 600000 for: 5m labels: severity: warning - name: polardbx-node-resources rules: - alert: PolarDB-X Node CPU Usage is Too High annotations: summary: PolarDB-X Node CPU is Too High description: Namespace {{`{{ $labels.namespace }}`}} pod {{`{{ $labels.pod }}`}} CPU is too High, over 90% for 5m. expr: | sum by (namespace, pod) (polardbx_container_cpu_usage_seconds_total:sum_rate{container="engine"}) / sum by (namespace, pod) (kube_pod_container_resource_limits{resource="cpu", unit="core"}) * 100>= 90 for: 5m labels: severity: critical - alert: PolarDB-X Node CPU Usage is High annotations: summary: PolarDB-X Node CPU is High description: Namespace {{`{{ $labels.namespace }}`}} pod {{`{{ $labels.pod }}`}} CPU is High, over 80% for 5m. expr: | sum by (namespace, pod) (polardbx_container_cpu_usage_seconds_total:sum_rate{container="engine"}) / sum by (namespace, pod) (kube_pod_container_resource_limits{resource="cpu", unit="core"}) * 100 >= 80 for: 5m labels: severity: warning - alert: PolarDB-X Node Used Memory is Too High annotations: summary: PolarDB-X Node Memory is Too High description: Namespace {{`{{ $labels.namespace }}`}} pod {{`{{ $labels.pod }}`}} Memory is too high, over 95% for 5m. expr: | sum by (namespace, pod) (container_memory_working_set_bytes{container="engine", id=~"^/kubepods.*"}) /sum by (namespace, pod) (kube_pod_container_resource_limits{container="engine",resource="memory", unit="byte"}) * 100 >= 95 for: 5m labels: severity: critical - alert: PolarDB-X Node Used Memory is High annotations: summary: PolarDB-X Node Memory is High description: Namespace {{`{{ $labels.namespace }}`}} pod {{`{{ $labels.pod }}`}} Memory is too high, over 90% for 5m. expr: | sum by (namespace, pod) (container_memory_working_set_bytes{container="engine", id=~"^/kubepods.*"}) /sum by (namespace, pod) (kube_pod_container_resource_limits{container="engine",resource="memory", unit="byte"}) * 100 >= 90 for: 5m labels: severity: warning - alert: PolarDB-X Node Disk Usage is Too High annotations: summary: PolarDB-X-Node Disk Usage is Too High description: Instance {{`{{ $labels.instance }}`}} mountpoint {{`{{ $labels.mountpoint }}`}} Disk Usage is too high, over 90% for 5m. expr: | sum((node_filesystem_size_bytes{fstype=~"ext.*|xfs|nfs",mountpoint !~".*pod.*", mountpoint != "/boot"} - node_filesystem_free_bytes{fstype=~"ext.*|xfs|nfs",mountpoint !~".*pod.*", mountpoint != "/boot"}) * 100 /(node_filesystem_avail_bytes{fstype=~"ext.*|xfs|nfs", mountpoint !~".*pod.*", mountpoint != "/boot"} + (node_filesystem_size_bytes{fstype=~"ext.*|xfs|nfs",mountpoint !~".*pod.*", mountpoint != "/boot"}-node_filesystem_free_bytes{fstype=~"ext.*|xfs|nfs",mountpoint !~".*pod.*", mountpoint != "/boot"}))) by (instance, mountpoint) * 100 >= 90 for: 5m labels: severity: critical - alert: PolarDB-X Node Disk Usage is High annotations: summary: PolarDB-X-Node Disk Usage is High description: Instance {{`{{ $labels.instance }}`}} mountpoint {{`{{ $labels.mountpoint }}`}} Disk Usage is high, over 80% for 5m. expr: | sum((node_filesystem_size_bytes{fstype=~"ext.*|xfs|nfs",mountpoint !~".*pod.*", mountpoint != "/boot"} - node_filesystem_free_bytes{fstype=~"ext.*|xfs|nfs",mountpoint !~".*pod.*", mountpoint != "/boot"}) * 100 /(node_filesystem_avail_bytes{fstype=~"ext.*|xfs|nfs", mountpoint !~".*pod.*", mountpoint != "/boot"} + (node_filesystem_size_bytes{fstype=~"ext.*|xfs|nfs",mountpoint !~".*pod.*", mountpoint != "/boot"}-node_filesystem_free_bytes{fstype=~"ext.*|xfs|nfs",mountpoint !~".*pod.*", mountpoint != "/boot"}))) by (instance, mountpoint) * 100 >= 80 for: 5m labels: severity: warning - alert: PolarDB-X-Node IO Usage is Too High annotations: summary: PolarDB-X-Node IO Usage is Too High description: instance {{`{{ $labels.instance }}`}} device {{`{{ $labels.device }}`}} IO Usage is too high, over 90% for 5m. expr: | sum (rate(node_disk_io_time_seconds_total[5m]) ) by (device, instance) >= 90 for: 5m labels: severity: critical - alert: PolarDB-X-Node IO Usage is High annotations: summary: PolarDB-X-Node IO Usage is High description: instance {{`{{ $labels.instance }}`}} device {{`{{ $labels.device }}`}} IO Usage is high, over 80% for 5m. expr: | sum (rate(node_disk_io_time_seconds_total[5m]) ) by (device, instance) >= 90 for: 5m labels: severity: warning