diff --git a/kustomize/monitoring/grafana/dashboards/kustomization.yaml b/kustomize/monitoring/grafana/dashboards/kustomization.yaml index 8f388d43..5764a226 100644 --- a/kustomize/monitoring/grafana/dashboards/kustomization.yaml +++ b/kustomize/monitoring/grafana/dashboards/kustomization.yaml @@ -11,6 +11,7 @@ configMapGenerator: - postgresql_service_health.json - prometheus_alerts.json - query_statistics.json + - pgbouncer_direct.json generatorOptions: disableNameSuffixHash: true diff --git a/kustomize/monitoring/grafana/dashboards/pgbouncer_direct.json b/kustomize/monitoring/grafana/dashboards/pgbouncer_direct.json new file mode 100644 index 00000000..68c23793 --- /dev/null +++ b/kustomize/monitoring/grafana/dashboards/pgbouncer_direct.json @@ -0,0 +1,710 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "PROMETHEUS", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "7.4.5" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "vendor=crunchydata" + ], + "title": "", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "PROMETHEUS", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.15", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "editorMode": "code", + "expr": "sum(ccp_pgbouncer_pools_client_active{cluster_name=~\"[[cluster_name]]\",pod=~\"[[pgbnode]]\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "client_active", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "editorMode": "code", + "expr": "sum(ccp_pgbouncer_pools_client_waiting{cluster_name=~\"[[cluster_name]]\",pod=~\"[[pgbnode]]\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "client_waiting", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "editorMode": "code", + "expr": "sum(ccp_pgbouncer_pools_server_active{cluster_name=~\"[[cluster_name]]\",pod=~\"[[pgbnode]]\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "server_active", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "editorMode": "code", + "expr": "sum(ccp_pgbouncer_pools_server_idle{cluster_name=~\"[[cluster_name]]\",pod=~\"[[pgbnode]]\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "server_idle", + "range": true, + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "editorMode": "code", + "expr": "sum(ccp_pgbouncer_pools_server_used{cluster_name=~\"[[cluster_name]]\",pod=~\"[[pgbnode]]\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "server_used", + "range": true, + "refId": "E" + } + ], + "title": "PGBouncer Total State Counts", + "type": "timeseries" + }, + { + "datasource": "PROMETHEUS", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.15", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "editorMode": "code", + "expr": "ccp_pgbouncer_lists_item_count{cluster_name=~\"[[cluster_name]]\",pod=~\"[[pgbnode]]\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{list}}", + "range": true, + "refId": "A" + } + ], + "title": "PGBouncer Total Item Counts", + "type": "timeseries" + }, + { + "datasource": "PROMETHEUS", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.15", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "editorMode": "code", + "expr": "ccp_pgbouncer_databases_current_connections{cluster_name=~\"[[cluster_name]]\", pod=~\"[[pgbnode]]\", name=~\"[[pool]]\"} / ccp_pgbouncer_databases_pool_size{cluster_name=~\"[[cluster_name]]\", pod=~\"[[pgbnode]]\", name=~\"[[pool]]\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "range": true, + "refId": "A" + } + ], + "title": "Connection % Used Per Pool ([[pool]])", + "type": "timeseries" + }, + { + "datasource": "PROMETHEUS", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.15", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "editorMode": "code", + "expr": "sum(ccp_pgbouncer_clients_wait_seconds{cluster_name=~\"[[cluster_name]]\",pod=~\"[[pgbnode]]\", database=~\"[[pool]]\"}) by (pool,state)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}}", + "range": true, + "refId": "A" + } + ], + "title": "Client Connection State Counts Per Pool ([[pool]])", + "type": "timeseries" + }, + { + "datasource": "PROMETHEUS", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 10 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.5.15", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ccp_datasource}" + }, + "editorMode": "code", + "expr": "sum(ccp_pgbouncer_servers_close_needed{cluster_name=~\"[[cluster_name]]\",pod=~\"[[pgbnode]]\", database=~\"[[pool]]\"}) by (state)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Server Connection State Counts Per Pool ([[pool]])", + "type": "timeseries" + } + ], + "refresh": "5m", + "schemaVersion": 39, + "style": "dark", + "tags": [ + "vendor=crunchydata" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": "PROMETHEUS", + "definition": "label_values(up{exp_type='pgbouncer'},cluster_name)", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": { + "query": "label_values(up{exp_type='pgbouncer'},cluster_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": {}, + "datasource": "PROMETHEUS", + "definition": "label_values(up{exp_type='pgbouncer'},pod)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "pgbnode", + "options": [], + "query": { + "query": "label_values(up{exp_type='pgbouncer'},pod)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "PROMETHEUS", + "definition": "label_values(ccp_pgbouncer_databases_pool_size{cluster_name=\"[[cluster_name]]\", pod=\"[[pgbnode]]\"},name)", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "pool", + "options": [], + "query": { + "query": "label_values(ccp_pgbouncer_databases_pool_size{cluster_name=\"[[cluster_name]]\", pod=\"[[pgbnode]]\"},name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "PGBouncer", + "uid": "a7ff3775-37c9-4072-b0bc-1292f5c5841b", + "version": 1 +} diff --git a/kustomize/monitoring/grafana/dashboards/pod_details.json b/kustomize/monitoring/grafana/dashboards/pod_details.json index 8ce6d395..dd704806 100644 --- a/kustomize/monitoring/grafana/dashboards/pod_details.json +++ b/kustomize/monitoring/grafana/dashboards/pod_details.json @@ -59,6 +59,73 @@ } ], "panels": [ + { + "datasource": "PROMETHEUS", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "2": { + "index": 2, + "text": "Primary" + }, + "1": { + "index": 1, + "text": "Replica" + } + }, + "type": "value" + } + ] + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 17, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 30 + }, + "textMode": "auto" + }, + "pluginVersion": "9.2.20", + "targets": [ + { + "datasource": "PROMETHEUS", + "editorMode": "code", + "exemplar": false, + "expr": "ccp_is_in_recovery_status{pod=\"[[pod]]\", pg_cluster=\"[[cluster]]\"}", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Pod Type", + "type": "stat" + }, { "aliasColors": { "% Throttled": "yellow", @@ -1129,7 +1196,7 @@ "allValue": null, "current": {}, "datasource": "PROMETHEUS", - "definition": "label_values({pg_cluster=\"[[cluster]]\"},pod)", + "definition": "label_values({pg_cluster=\"[[cluster]]\", exp_type!=\"pgbouncer\"},pod)", "description": null, "error": null, "hide": 0, @@ -1139,7 +1206,7 @@ "name": "pod", "options": [], "query": { - "query": "label_values({pg_cluster=\"[[cluster]]\"},pod)", + "query": "label_values({pg_cluster=\"[[cluster]]\", exp_type!=\"pgbouncer\"},pod)", "refId": "PROMETHEUS-pod-Variable-Query" }, "refresh": 1, diff --git a/kustomize/monitoring/grafana/dashboards/postgresql_details.json b/kustomize/monitoring/grafana/dashboards/postgresql_details.json index bb81b85b..90866777 100644 --- a/kustomize/monitoring/grafana/dashboards/postgresql_details.json +++ b/kustomize/monitoring/grafana/dashboards/postgresql_details.json @@ -231,7 +231,7 @@ "pluginVersion": "7.4.5", "targets": [ { - "expr": "sum(pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\", state=\"active\"})*100 /sum(pg_settings_max_connections{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\"})", + "expr": "sum(pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\", state=\"active\"})*100 /sum(pg_settings_max_connections{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\"}) or sum(ccp_pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\", state=\"active\"})*100 /sum(ccp_connection_stats_max_connections{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\"})", "format": "time_series", "hide": false, "instant": true, @@ -313,7 +313,7 @@ "pluginVersion": "7.4.5", "targets": [ { - "expr": "sum(pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle in transaction\"})/sum(pg_settings_max_connections{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\"})", + "expr": "sum(pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle in transaction\"})/sum(pg_settings_max_connections{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\"}) or sum(ccp_pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle in transaction\"})/sum(ccp_connection_stats_max_connections{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\"})", "format": "time_series", "instant": true, "interval": "", @@ -472,7 +472,7 @@ "pluginVersion": "7.4.5", "targets": [ { - "expr": "sum(pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle\"})*100/sum(pg_settings_max_connections{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\"})", + "expr": "sum(pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle\"})*100/sum(pg_settings_max_connections{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\"}) or sum(ccp_pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle\"})*100/sum(ccp_connection_stats_max_connections{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\"})", "format": "time_series", "instant": true, "interval": "", @@ -816,7 +816,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (state) (pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle\"})", + "expr": "sum by (state) (pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle\"}) or sum by (state) (ccp_pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle\"})", "format": "time_series", "hide": false, "interval": "", @@ -827,7 +827,7 @@ "step": 2 }, { - "expr": "sum by (state) (pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle in transaction\"})", + "expr": "sum by (state) (pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle in transaction\"}) or sum by (state) (ccp_pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle in transaction\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -835,7 +835,7 @@ "refId": "B" }, { - "expr": "sum by (state) (pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"active\"})", + "expr": "sum by (state) (pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"active\"}) or sum by (state) (ccp_pg_stat_activity_count{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"active\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "active", @@ -1696,7 +1696,7 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "Allocated", - "metric": "pg_stat_bgwriter_buffers_alloc", + "metric": "ccp_stat_bgwriter_buffers_alloc", "refId": "A", "step": 2 }, @@ -1705,7 +1705,7 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "Backend", - "metric": "pg_stat_bgwriter_buffers_backend", + "metric": "ccp_stat_bgwriter_buffers_backend", "refId": "B", "step": 2 }, @@ -1714,7 +1714,7 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "FSync", - "metric": "pg_stat_bgwriter_buffers_backend_fsync", + "metric": "ccp_stat_bgwriter_buffers_backend_fsync", "refId": "C", "step": 2 }, @@ -1723,7 +1723,7 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "CheckPoint", - "metric": "pg_stat_bgwriter_buffers_checkpoint", + "metric": "ccp_stat_bgwriter_buffers_checkpoint", "refId": "D", "step": 2 }, @@ -1732,7 +1732,7 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "Clean", - "metric": "pg_stat_bgwriter_buffers_clean", + "metric": "ccp_stat_bgwriter_buffers_clean", "refId": "E", "step": 2 } @@ -2071,7 +2071,7 @@ "allValue": ".*", "current": {}, "datasource": "PROMETHEUS", - "definition": "label_values({pg_cluster=\"[[cluster]]\"},pod)", + "definition": "label_values({pg_cluster=\"[[cluster]]\", exp_type!=\"pgbouncer\"},pod)", "description": null, "error": null, "hide": 0, @@ -2081,7 +2081,7 @@ "name": "pod", "options": [], "query": { - "query": "label_values({pg_cluster=\"[[cluster]]\"},pod)", + "query": "label_values({pg_cluster=\"[[cluster]]\", exp_type!=\"pgbouncer\"},pod)", "refId": "PROMETHEUS-pod-Variable-Query" }, "refresh": 1, diff --git a/kustomize/monitoring/grafana/dashboards/postgresql_overview.json b/kustomize/monitoring/grafana/dashboards/postgresql_overview.json index 61c2486f..b965463f 100644 --- a/kustomize/monitoring/grafana/dashboards/postgresql_overview.json +++ b/kustomize/monitoring/grafana/dashboards/postgresql_overview.json @@ -163,7 +163,7 @@ "targets": [ { "$hashKey": "object:243", - "expr": "sum(pg_up{pg_cluster=~\"$cluster\"})*100+sum(ccp_is_in_recovery_status{pg_cluster=~\"$cluster\"})", + "expr": "sum(pg_up{pg_cluster=~\"$cluster\"})*100+sum(ccp_is_in_recovery_status{pg_cluster=~\"$cluster\"}) or sum(up{pg_cluster=~\"$cluster\"})*100+sum(ccp_is_in_recovery_status{pg_cluster=~\"$cluster\"})", "format": "time_series", "interval": "", "intervalFactor": 1, diff --git a/kustomize/monitoring/grafana/dashboards/postgresql_service_health.json b/kustomize/monitoring/grafana/dashboards/postgresql_service_health.json index 2bee1d0b..de4f7fa1 100644 --- a/kustomize/monitoring/grafana/dashboards/postgresql_service_health.json +++ b/kustomize/monitoring/grafana/dashboards/postgresql_service_health.json @@ -600,7 +600,7 @@ "allValue": null, "current": {}, "datasource": "PROMETHEUS", - "definition": "label_values({pg_cluster=\"[[cluster]]\"},role)", + "definition": "label_values({pg_cluster=\"[[cluster]]\", exp_type!=\"pgbouncer\"},role)", "description": null, "error": null, "hide": 0, @@ -610,7 +610,7 @@ "name": "role", "options": [], "query": { - "query": "label_values({pg_cluster=\"[[cluster]]\"},role)", + "query": "label_values({pg_cluster=\"[[cluster]]\", exp_type!=\"pgbouncer\"},role)", "refId": "PROMETHEUS-role-Variable-Query" }, "refresh": 1, diff --git a/kustomize/monitoring/grafana/dashboards/prometheus_alerts.json b/kustomize/monitoring/grafana/dashboards/prometheus_alerts.json index ef8fb41a..f41aa481 100644 --- a/kustomize/monitoring/grafana/dashboards/prometheus_alerts.json +++ b/kustomize/monitoring/grafana/dashboards/prometheus_alerts.json @@ -136,7 +136,7 @@ "pluginVersion": "7.4.5", "targets": [ { - "expr": "count(count by (kubernetes_namespace) (pg_up))", + "expr": "count(count by (kubernetes_namespace) (pg_up)) or count(count by (kubernetes_namespace) (up))", "format": "time_series", "instant": true, "interval": "", @@ -208,7 +208,7 @@ "pluginVersion": "7.4.5", "targets": [ { - "expr": "count(count by (pg_cluster) (pg_up))", + "expr": "count(count by (pg_cluster) (pg_up)) or count(count by (pg_cluster) (up))", "format": "time_series", "instant": true, "interval": "", @@ -280,7 +280,7 @@ "pluginVersion": "7.4.5", "targets": [ { - "expr": "count(pg_up)", + "expr": "count(pg_up) or count(up)", "format": "time_series", "instant": true, "interval": "", diff --git a/kustomize/monitoring/grafana/dashboards/query_statistics.json b/kustomize/monitoring/grafana/dashboards/query_statistics.json index a17b06ed..72f67122 100644 --- a/kustomize/monitoring/grafana/dashboards/query_statistics.json +++ b/kustomize/monitoring/grafana/dashboards/query_statistics.json @@ -996,7 +996,7 @@ "refId": "A" } ], - "title": "Query Total WAL Genterated (Bytes)", + "title": "Query Total WAL Generated (Bytes)", "transformations": [ { "id": "organize", @@ -1068,7 +1068,7 @@ "allValue": null, "current": {}, "datasource": "PROMETHEUS", - "definition": "label_values({pg_cluster=\"[[cluster]]\"},role)", + "definition": "label_values({pg_cluster=\"[[cluster]]\", exp_type!=\"pgbouncer\"},role)", "description": null, "error": null, "hide": 0, @@ -1078,7 +1078,7 @@ "name": "role", "options": [], "query": { - "query": "label_values({pg_cluster=\"[[cluster]]\"},role)", + "query": "label_values({pg_cluster=\"[[cluster]]\", exp_type!=\"pgbouncer\"},role)", "refId": "StandardVariableQuery" }, "refresh": 2, diff --git a/kustomize/monitoring/prometheus/clusterrole.yaml b/kustomize/monitoring/prometheus/clusterrole.yaml index 784b6400..a2ef2748 100644 --- a/kustomize/monitoring/prometheus/clusterrole.yaml +++ b/kustomize/monitoring/prometheus/clusterrole.yaml @@ -11,3 +11,7 @@ rules: - get - list - watch +- nonResourceURLs: + - /metrics + verbs: + - get diff --git a/kustomize/monitoring/prometheus/config/crunchy-alert-rules-pg.yml b/kustomize/monitoring/prometheus/config/crunchy-alert-rules-pg.yml index 0ccd9e3d..83f666e4 100644 --- a/kustomize/monitoring/prometheus/config/crunchy-alert-rules-pg.yml +++ b/kustomize/monitoring/prometheus/config/crunchy-alert-rules-pg.yml @@ -35,7 +35,7 @@ groups: ########## POSTGRESQL RULES ########## - alert: PGIsUp - expr: pg_up < 1 + expr: "pg_up < 1 or up < 1" for: 60s labels: service: postgresql diff --git a/kustomize/monitoring/prometheus/config/prometheus.yml b/kustomize/monitoring/prometheus/config/prometheus.yml index 89a627bb..4c0be18b 100644 --- a/kustomize/monitoring/prometheus/config/prometheus.yml +++ b/kustomize/monitoring/prometheus/config/prometheus.yml @@ -10,6 +10,27 @@ global: evaluation_interval: 5s scrape_configs: +- job_name: 'pgo-metrics' + # If you are running CPK v5.7 or earlier, you will need to change the scheme to 'http' + # and add a metrics port to the postgres-operator deployment that exposes port 8080. + scheme: https + authorization: + type: Bearer + credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token + tls_config: + # By default, the operator's metrics server automatically creates self-signed certs + # which cannot be verified, so `insecure_skip_verify` is set to `true`. See the + # documentation for providing your own signed certificates. + insecure_skip_verify: true + kubernetes_sd_configs: + - role: pod + selectors: + - role: pod + # Our kustomize installer sets this label to "postgres-operator" + # but our Helm installer sets this label to the name of the installation + # therefore we just check for the existence of this label. + label: postgres-operator.crunchydata.com/control-plane + - job_name: 'crunchy-postgres-exporter' kubernetes_sd_configs: - role: pod @@ -42,13 +63,12 @@ scrape_configs: - source_labels: [__meta_kubernetes_pod_label_postgres_operator_crunchydata_com_role] target_label: role -- job_name: 'crunchy-postgres-exporter-v4' +- job_name: 'crunchy-otel-collector' kubernetes_sd_configs: - role: pod selectors: - role: pod - label: crunchy-postgres-exporter=true - + label: postgres-operator.crunchydata.com/crunchy-otel-collector=true relabel_configs: # Keep exporter port and drop all others - source_labels: [__meta_kubernetes_pod_container_port_number] @@ -61,19 +81,30 @@ scrape_configs: - source_labels: [__meta_kubernetes_pod_name] target_label: pod # Convert namespace and cluster name to pg_cluster=namespace:cluster - - source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_pod_label_pg_cluster] + - source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_pod_label_postgres_operator_crunchydata_com_cluster] target_label: pg_cluster separator: ":" replacement: '$1$2' # Convert kubernetes pod ip to ip - source_labels: [__meta_kubernetes_pod_ip] target_label: ip - # Set deployment_name as deployment label - - source_labels: [__meta_kubernetes_pod_label_deployment_name] + # Convert postgres-operator.crunchydata.com/instance to deployment + - source_labels: [__meta_kubernetes_pod_label_postgres_operator_crunchydata_com_instance] target_label: deployment - # Set label for role - - source_labels: [__meta_kubernetes_pod_label_role] + # Convert postgres-operator.crunchydata.com/role to role + - source_labels: [__meta_kubernetes_pod_label_postgres_operator_crunchydata_com_role] target_label: role + # The following relabels should make it easier to use pgMonitor dashboards. + # Note: The following was added for the pgBouncer dashboard and what labels it requires. + # For pgBouncer, `exp_type` should be equal to role. + - source_labels: [__meta_kubernetes_pod_label_postgres_operator_crunchydata_com_role] + target_label: exp_type + # `cluster_name` is equivalent to `pg_cluster` + - source_labels: [__meta_kubernetes_namespace,__meta_kubernetes_pod_label_postgres_operator_crunchydata_com_cluster] + target_label: cluster_name + separator: ":" + replacement: '$1$2' + rule_files: - /etc/prometheus/alert-rules.d/*.yml alerting: