diff --git a/docs/dashboard.md b/docs/dashboard.md index 878d70b0e..945069d99 100644 --- a/docs/dashboard.md +++ b/docs/dashboard.md @@ -1,20 +1,18 @@ -## Grafana Dashboard +# hami-vgpu-dashboard -- You can load this dashboard json file [gpu-dashboard.json](./gpu-dashboard.json) +- You can find the hami-vgpu-dashboard here: [https://grafana.com/grafana/dashboards/21833-hami-vgpu-dashboard](https://grafana.com/grafana/dashboards/21833-hami-vgpu-dashboard) -- This dashboard also includes some NVIDIA DCGM metrics: +- This dashboard also includes some [NVIDIA DCGM metrics](https://github.com/NVIDIA/dcgm-exporter):`kubectl create -f https://raw.githubusercontent.com/NVIDIA/dcgm-exporter/master/dcgm-exporter.yaml` - [dcgm-exporter](https://github.com/NVIDIA/dcgm-exporter) deploy:`kubectl create -f https://raw.githubusercontent.com/NVIDIA/dcgm-exporter/master/dcgm-exporter.yaml` - -- use this prometheus custom metric configure: +- add prometheus custom metric configuration: ```yaml -- job_name: 'kubernetes-vgpu-exporter' +- job_name: 'kubernetes-hami-exporter' kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_endpoints_name] - regex: vgpu-device-plugin-monitor + regex: hami-.* replacement: $1 action: keep - source_labels: [__meta_kubernetes_pod_node_name] @@ -47,7 +45,7 @@ action: replace ``` -- reload promethues: +- reload promethues: ```bash curl -XPOST http://{promethuesServer}:{port}/-/reload diff --git a/docs/dashboard_cn.md b/docs/dashboard_cn.md index 20ee3ecb8..dfd436d11 100644 --- a/docs/dashboard_cn.md +++ b/docs/dashboard_cn.md @@ -1,19 +1,18 @@ -## Grafana Dashboard +# hami-vgpu-dashboard -- 你可以在 grafana 中导入此 [gpu-dashboard.json](./gpu-dashboard.json) -- 此 dashboard 还包括一部分 NVIDIA DCGM 监控指标: +- 你可以在此找到 hami-vgpu-dashboard:[https://grafana.com/grafana/dashboards/21833-hami-vgpu-dashboard](https://grafana.com/grafana/dashboards/21833-hami-vgpu-dashboard) - [dcgm-exporter](https://github.com/NVIDIA/dcgm-exporter)部署:`kubectl create -f https://raw.githubusercontent.com/NVIDIA/dcgm-exporter/master/dcgm-exporter.yaml` +- 此 dashboard 还包括一部分 [NVIDIA DCGM 监控指标](https://github.com/NVIDIA/dcgm-exporter): `kubectl create -f https://raw.githubusercontent.com/NVIDIA/dcgm-exporter/master/dcgm-exporter.yaml` - 添加 prometheus 自定义的监控项: ```yaml -- job_name: 'kubernetes-vgpu-exporter' +- job_name: 'kubernetes-hami-exporter' kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_endpoints_name] - regex: vgpu-device-plugin-monitor + regex: hami-.* replacement: $1 action: keep - source_labels: [__meta_kubernetes_pod_node_name] @@ -46,7 +45,7 @@ action: replace ``` -- 加载 promethues 配置: +- 热加载 promethues 配置: ```bash curl -XPOST http://{promethuesServer}:{port}/-/reload diff --git a/docs/gpu-dashboard.json b/docs/gpu-dashboard.json deleted file mode 100644 index 2f71c23ea..000000000 --- a/docs/gpu-dashboard.json +++ /dev/null @@ -1,1150 +0,0 @@ -{ - "annotations": { - "list": [ - { - "$$hashKey": "object:192", - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "description": "This dashboard is gpu metrics dashboard base on NVIDIA DCGM Exporter and HAMi/k8s-vgpu-scheduler", - "editable": true, - "gnetId": 12239, - "graphTooltip": 0, - "id": 46, - "iteration": 1694498903162, - "links": [], - "panels": [ - { - "datasource": "ALL", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 100, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "#EAB839", - "value": 83 - }, - { - "color": "red", - "value": 87 - } - ] - }, - "unit": "celsius" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 4, - "x": 0, - "y": 0 - }, - "id": 14, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "7.5.17", - "targets": [ - { - "expr": "avg(DCGM_FI_DEV_GPU_TEMP{node_name=~\"${node_name}\", gpu=~\"${gpu}\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GPU平均温度", - "type": "gauge" - }, - { - "cacheTimeout": null, - "datasource": "ALL", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "max": 2400, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "#EAB839", - "value": 1800 - }, - { - "color": "red", - "value": 2200 - } - ] - }, - "unit": "watt" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 4, - "x": 4, - "y": 0 - }, - "id": 16, - "links": [], - "options": { - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "sum" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "7.5.17", - "targets": [ - { - "expr": "sum(DCGM_FI_DEV_POWER_USAGE{node_name=~\"${node_name}\", gpu=~\"${gpu}\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GPU总功率", - "type": "gauge" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "ALL", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 0 - }, - "hiddenSeries": false, - "id": 12, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "sort": "current", - "sortDesc": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.17", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "DCGM_FI_DEV_GPU_TEMP{node_name=~\"${node_name}\", gpu=~\"${gpu}\"}", - "instant": false, - "interval": "", - "legendFormat": "{{node_name}} gpu{{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "GPU温度", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:97", - "format": "celsius", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:98", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "ALL", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 0 - }, - "hiddenSeries": false, - "id": 2, - "interval": "", - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": false, - "sideWidth": null, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.17", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "DCGM_FI_DEV_SM_CLOCK{node_name=~\"${node_name}\", gpu=~\"${gpu}\"} * 1000000", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{node_name}} gpu{{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "GPU SM时钟频率(DCGM_FI_DEV_SM_CLOCK)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:462", - "decimals": null, - "format": "hertz", - "label": "", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:463", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "ALL", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 10 - }, - "hiddenSeries": false, - "id": 18, - "legend": { - "avg": true, - "current": false, - "max": true, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.17", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "DCGM_FI_DEV_FB_USED{node_name=~\"${node_name}\", gpu=~\"${gpu}\"}", - "interval": "", - "legendFormat": "{{node_name}} gpu{{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "GPU帧缓存(显存)使用量(DCGM_FI_DEV_FB_USED)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:618", - "format": "decmbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:619", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "ALL", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 10 - }, - "hiddenSeries": false, - "id": 10, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.17", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "DCGM_FI_DEV_POWER_USAGE{node_name=~\"${node_name}\", gpu=~\"${gpu}\"}", - "interval": "", - "legendFormat": "{{node_name}} gpu{{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "GPU功率消耗(DCGM_FI_DEV_POWER_USAGE)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:214", - "format": "watt", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:215", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "ALL", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 20 - }, - "hiddenSeries": false, - "id": 6, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.17", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "DCGM_FI_DEV_GPU_UTIL{node_name=~\"${node_name}\", gpu=~\"${gpu}\"}", - "interval": "", - "legendFormat": "{{node_name}} gpu{{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "GPU利用率(DCGM_FI_DEV_GPU_UTIL)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:699", - "format": "percent", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "$$hashKey": "object:700", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "uid": "ALL" - }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 20 - }, - "hiddenSeries": false, - "id": 26, - "legend": { - "alignAsTable": false, - "avg": true, - "current": false, - "max": true, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "9.3.14", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "uid": "ALL" - }, - "exemplar": true, - "expr": "Device_utilization_desc_of_container{node_name=~\"${node_name}\"}", - "interval": "", - "legendFormat": "{{podname}}", - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "HAMi-pod算力使用率", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:779", - "format": "percent", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "$$hashKey": "object:780", - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "ALL", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 20 - }, - "hiddenSeries": false, - "id": 24, - "legend": { - "alignAsTable": false, - "avg": true, - "current": false, - "max": true, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.17", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "Device_memory_desc_of_container{node_name=~\"${node_name}\"}", - "interval": "", - "legendFormat": "{{podname}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "HAMi-pod显存使用量(byte)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:779", - "format": "bytes", - "label": null, - "logBase": 1, - "show": true - }, - { - "$$hashKey": "object:780", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "ALL", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 30 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.17", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "HostGPUMemoryUsage{node_name=~\"${node_name}\"}", - "interval": "", - "legendFormat": "{{node_name}} gpu {{deviceid}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "HAMi-节点GPU显存使用量", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1087", - "format": "bytes", - "label": null, - "logBase": 1, - "show": true - }, - { - "$$hashKey": "object:1088", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "ALL", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 30 - }, - "hiddenSeries": false, - "id": 20, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": true, - "min": true, - "rightSide": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.17", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "HostCoreUtilization{node_name=~\"${node_name}\"}", - "interval": "", - "legendFormat": "{{node_name}} gpu {{deviceid}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "HAMi-节点GPU算力使用率", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:1243", - "format": "percent", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "$$hashKey": "object:1244", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "refresh": false, - "schemaVersion": 27, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": { - "selected": false - }, - "datasource": "ALL", - "definition": "label_values({__name__=~\"DCGM_FI_DEV_FB_FREE|vGPU_device_memory_limit_in_bytes\"}, node_name)", - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": null, - "multi": true, - "name": "node_name", - "options": [], - "query": { - "query": "label_values({__name__=~\"DCGM_FI_DEV_FB_FREE|vGPU_device_memory_limit_in_bytes\"}, node_name)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "selected": false, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": "ALL", - "definition": "label_values(DCGM_FI_DEV_FB_FREE{node_name=\"$node_name\"},gpu)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": null, - "multi": true, - "name": "gpu", - "options": [], - "query": { - "query": "label_values(DCGM_FI_DEV_FB_FREE{node_name=\"$node_name\"},gpu)", - "refId": "ALL-gpu-Variable-Query" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-12h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ] - }, - "timezone": "", - "title": "k8s-vgpu-scheduler Dashboard", - "uid": "Oxed_c6Wz1", - "version": 3 -}