Skip to content

Commit

Permalink
Add metrics for rocksdb query latency (#1709) (#1722)
Browse files Browse the repository at this point in the history
* Add metrics for rocksdb query latency

* Add metrics for rocksdb write stalling

* Add metrics for rocksdb write stall histogram

* Fix tests

(cherry picked from commit 75c86a7)

Co-authored-by: Evgeniy Scherbina <evgeniy.shcherbina.es@gmail.com>
  • Loading branch information
mergify[bot] and evgeniy-scherbina authored Sep 15, 2023
1 parent 12d8077 commit 9b8d68b
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 1 deletion.
154 changes: 154 additions & 0 deletions cmd/kava/opendb/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,29 @@ type Metrics struct {
BlockCacheHit metrics.Gauge
BlockCacheAdd metrics.Gauge
BlockCacheAddFailures metrics.Gauge

// Latency
DBGetMicrosP50 metrics.Gauge
DBGetMicrosP95 metrics.Gauge
DBGetMicrosP99 metrics.Gauge
DBGetMicrosP100 metrics.Gauge
DBGetMicrosCount metrics.Gauge

DBWriteMicrosP50 metrics.Gauge
DBWriteMicrosP95 metrics.Gauge
DBWriteMicrosP99 metrics.Gauge
DBWriteMicrosP100 metrics.Gauge
DBWriteMicrosCount metrics.Gauge

// Write Stall
StallMicros metrics.Gauge

DBWriteStallP50 metrics.Gauge
DBWriteStallP95 metrics.Gauge
DBWriteStallP99 metrics.Gauge
DBWriteStallP100 metrics.Gauge
DBWriteStallCount metrics.Gauge
DBWriteStallSum metrics.Gauge
}

// registerMetrics registers metrics in prometheus and initializes rocksdbMetrics variable
Expand Down Expand Up @@ -137,6 +160,114 @@ func registerMetrics() {
Name: "block_cache_add_failures",
Help: "number of failures when adding blocks to block cache",
}, labels),

// Latency
DBGetMicrosP50: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "latency",
Name: "db_get_micros_p50",
Help: "",
}, labels),
DBGetMicrosP95: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "latency",
Name: "db_get_micros_p95",
Help: "",
}, labels),
DBGetMicrosP99: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "latency",
Name: "db_get_micros_p99",
Help: "",
}, labels),
DBGetMicrosP100: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "latency",
Name: "db_get_micros_p100",
Help: "",
}, labels),
DBGetMicrosCount: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "latency",
Name: "db_get_micros_count",
Help: "",
}, labels),

DBWriteMicrosP50: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "latency",
Name: "db_write_micros_p50",
Help: "",
}, labels),
DBWriteMicrosP95: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "latency",
Name: "db_write_micros_p95",
Help: "",
}, labels),
DBWriteMicrosP99: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "latency",
Name: "db_write_micros_p99",
Help: "",
}, labels),
DBWriteMicrosP100: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "latency",
Name: "db_write_micros_p100",
Help: "",
}, labels),
DBWriteMicrosCount: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "latency",
Name: "db_write_micros_count",
Help: "",
}, labels),

// Write Stall
StallMicros: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "stall",
Name: "stall_micros",
Help: "",
}, labels),

DBWriteStallP50: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "stall",
Name: "db_write_stall_p50",
Help: "",
}, labels),
DBWriteStallP95: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "stall",
Name: "db_write_stall_p95",
Help: "",
}, labels),
DBWriteStallP99: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "stall",
Name: "db_write_stall_p99",
Help: "",
}, labels),
DBWriteStallP100: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "stall",
Name: "db_write_stall_p100",
Help: "",
}, labels),
DBWriteStallCount: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "stall",
Name: "db_write_stall_count",
Help: "",
}, labels),
DBWriteStallSum: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: "rocksdb",
Subsystem: "stall",
Name: "db_write_stall_sum",
Help: "",
}, labels),
}
}

Expand All @@ -163,4 +294,27 @@ func (m *Metrics) report(props *properties, stats *stats) {
m.BlockCacheHit.Set(float64(stats.BlockCacheHit))
m.BlockCacheAdd.Set(float64(stats.BlockCacheAdd))
m.BlockCacheAddFailures.Set(float64(stats.BlockCacheAddFailures))

// Latency
m.DBGetMicrosP50.Set(stats.DBGetMicros.P50)
m.DBGetMicrosP95.Set(stats.DBGetMicros.P95)
m.DBGetMicrosP99.Set(stats.DBGetMicros.P99)
m.DBGetMicrosP100.Set(stats.DBGetMicros.P100)
m.DBGetMicrosCount.Set(stats.DBGetMicros.Count)

m.DBWriteMicrosP50.Set(stats.DBWriteMicros.P50)
m.DBWriteMicrosP95.Set(stats.DBWriteMicros.P95)
m.DBWriteMicrosP99.Set(stats.DBWriteMicros.P99)
m.DBWriteMicrosP100.Set(stats.DBWriteMicros.P100)
m.DBWriteMicrosCount.Set(stats.DBWriteMicros.Count)

// Write Stall
m.StallMicros.Set(float64(stats.StallMicros))

m.DBWriteStallP50.Set(stats.DBWriteStallHistogram.P50)
m.DBWriteStallP95.Set(stats.DBWriteStallHistogram.P95)
m.DBWriteStallP99.Set(stats.DBWriteStallHistogram.P99)
m.DBWriteStallP100.Set(stats.DBWriteStallHistogram.P100)
m.DBWriteStallCount.Set(stats.DBWriteStallHistogram.Count)
m.DBWriteStallSum.Set(stats.DBWriteStallHistogram.Sum)
}
4 changes: 3 additions & 1 deletion cmd/kava/opendb/stats_loader.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ type stats struct {
BytesRead int64

// Writer has to wait for compaction or flush to finish.
StallMicros int64
StallMicros int64
DBWriteStallHistogram *float64Histogram

// Last level and non-last level read statistics
LastLevelReadBytes int64
Expand Down Expand Up @@ -180,6 +181,7 @@ func (l *statLoader) load() (*stats, error) {
BytesWritten: l.getInt64StatValue("rocksdb.bytes.written", count),
BytesRead: l.getInt64StatValue("rocksdb.bytes.read", count),
StallMicros: l.getInt64StatValue("rocksdb.stall.micros", count),
DBWriteStallHistogram: l.getFloat64HistogramStatValue("rocksdb.db.write.stall"),
LastLevelReadBytes: l.getInt64StatValue("rocksdb.last.level.read.bytes", count),
LastLevelReadCount: l.getInt64StatValue("rocksdb.last.level.read.count", count),
NonLastLevelReadBytes: l.getInt64StatValue("rocksdb.non.last.level.read.bytes", count),
Expand Down
1 change: 1 addition & 0 deletions cmd/kava/opendb/stats_loader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func TestStatsLoader(t *testing.T) {
"rocksdb.bytes.written": &defaultStat,
"rocksdb.bytes.read": &defaultStat,
"rocksdb.stall.micros": &defaultStat,
"rocksdb.db.write.stall": &defaultHistogramStat,
"rocksdb.last.level.read.bytes": &defaultStat,
"rocksdb.last.level.read.count": &defaultStat,
"rocksdb.non.last.level.read.bytes": &defaultStat,
Expand Down

0 comments on commit 9b8d68b

Please sign in to comment.