Skip to content

Commit

Permalink
Add a label with the server alias information for all Sentinel genera…
Browse files Browse the repository at this point in the history
…ted metrics
  • Loading branch information
roivaz committed Aug 9, 2024
1 parent a10ed06 commit 018a6f5
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 46 deletions.
32 changes: 20 additions & 12 deletions pkg/redis/events/watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,31 +38,31 @@ var (
Namespace: "saas_redis_sentinel",
Help: "+sdown (https://redis.io/topics/sentinel#sentinel-api)",
},
[]string{"sentinel", "shard", "redis_server"},
[]string{"sentinel", "shard", "redis_server_host", "redis_server_alias"},
)
sdownSentinelCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sdown_sentinel_count",
Namespace: "saas_redis_sentinel",
Help: "+sdown (https://redis.io/topics/sentinel#sentinel-api)",
},
[]string{"sentinel", "shard", "redis_server"},
[]string{"sentinel", "shard", "redis_server_host", "redis_server_alias"},
)
sdownClearedCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sdown_cleared_count",
Namespace: "saas_redis_sentinel",
Help: "-sdown (https://redis.io/topics/sentinel#sentinel-api)",
},
[]string{"sentinel", "shard", "redis_server"},
[]string{"sentinel", "shard", "redis_server_host", "redis_server_alias"},
)
sdownClearedSentinelCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sdown_cleared_sentinel_count",
Namespace: "saas_redis_sentinel",
Help: "-sdown (https://redis.io/topics/sentinel#sentinel-api)",
},
[]string{"sentinel", "shard", "redis_server"},
[]string{"sentinel", "shard", "redis_server_host", "redis_server_alias"},
)
)

Expand Down Expand Up @@ -208,14 +208,16 @@ func (sew *SentinelEventWatcher) metricsFromEvent(rem RedisEventMessage) {
sdownSentinelCount.With(
prometheus.Labels{
"sentinel": sew.sentinelURI, "shard": rem.master.name,
"redis_server": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port),
"redis_server_host": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port),
"redis_server_alias": sew.topology.GetPool().GetServerAlias(fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port)),
},
).Add(1)
default:
sdownCount.With(
prometheus.Labels{
"sentinel": sew.sentinelURI, "shard": rem.master.name,
"redis_server": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port),
"redis_server_host": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port),
"redis_server_alias": sew.topology.GetPool().GetServerAlias(fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port)),
},
).Add(1)
}
Expand All @@ -225,14 +227,16 @@ func (sew *SentinelEventWatcher) metricsFromEvent(rem RedisEventMessage) {
sdownClearedSentinelCount.With(
prometheus.Labels{
"sentinel": sew.sentinelURI, "shard": rem.master.name,
"redis_server": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port),
"redis_server_host": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port),
"redis_server_alias": sew.topology.GetPool().GetServerAlias(fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port)),
},
).Add(1)
default:
sdownClearedCount.With(
prometheus.Labels{
"sentinel": sew.sentinelURI, "shard": rem.master.name,
"redis_server": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port),
"redis_server_host": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port),
"redis_server_alias": sew.topology.GetPool().GetServerAlias(fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port)),
},
).Add(1)
}
Expand All @@ -258,25 +262,29 @@ func (sew *SentinelEventWatcher) initCounters() {
sdownSentinelCount.With(
prometheus.Labels{
"sentinel": sew.sentinelURI, "shard": shard.Name,
"redis_server": server.ID(),
"redis_server_host": server.ID(),
"redis_server_alias": sew.topology.GetPool().GetServerAlias(server.ID()),
},
).Add(0)
sdownCount.With(
prometheus.Labels{
"sentinel": sew.sentinelURI, "shard": shard.Name,
"redis_server": server.ID(),
"redis_server_host": server.ID(),
"redis_server_alias": sew.topology.GetPool().GetServerAlias(server.ID()),
},
).Add(0)
sdownClearedSentinelCount.With(
prometheus.Labels{
"sentinel": sew.sentinelURI, "shard": shard.Name,
"redis_server": server.ID(),
"redis_server_host": server.ID(),
"redis_server_alias": sew.topology.GetPool().GetServerAlias(server.ID()),
},
).Add(0)
sdownClearedCount.With(
prometheus.Labels{
"sentinel": sew.sentinelURI, "shard": shard.Name,
"redis_server": server.ID(),
"redis_server_host": server.ID(),
"redis_server_alias": sew.topology.GetPool().GetServerAlias(server.ID()),
},
).Add(0)
}
Expand Down
112 changes: 78 additions & 34 deletions pkg/redis/metrics/sentinel_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,31 @@ var (
Namespace: "saas_redis_sentinel",
Help: `"sentinel master <name> link-pending-commands"`,
},
[]string{"sentinel", "shard", "redis_server", "role"},
[]string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"},
)
lastOkPingReply = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "last_ok_ping_reply",
Namespace: "saas_redis_sentinel",
Help: `"sentinel master <name> last-ok-ping-reply"`,
},
[]string{"sentinel", "shard", "redis_server", "role"},
[]string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"},
)
roleReportedTime = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "role_reported_time",
Namespace: "saas_redis_sentinel",
Help: `"sentinel master <name> role-reported-time"`,
},
[]string{"sentinel", "shard", "redis_server", "role"},
[]string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"},
)
numOtherSentinels = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "num_other_sentinels",
Namespace: "saas_redis_sentinel",
Help: `"sentinel master <name> num-other-sentinels"`,
},
[]string{"sentinel", "shard", "redis_server", "role"},
[]string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"},
)

masterLinkDownTime = prometheus.NewGaugeVec(
Expand All @@ -55,7 +55,7 @@ var (
Namespace: "saas_redis_sentinel",
Help: `"sentinel slaves master-link-down-time"`,
},
[]string{"sentinel", "shard", "redis_server", "role"},
[]string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"},
)

slaveReplOffset = prometheus.NewGaugeVec(
Expand All @@ -64,7 +64,7 @@ var (
Namespace: "saas_redis_sentinel",
Help: `"sentinel slaves slave-repl-offset"`,
},
[]string{"sentinel", "shard", "redis_server", "role"},
[]string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"},
)
)

Expand All @@ -85,6 +85,7 @@ type SentinelMetricsGatherer struct {
refreshInterval time.Duration
sentinelURI string
sentinel *sharded.SentinelServer
serverPool *redis.ServerPool
started bool
cancel context.CancelFunc
}
Expand All @@ -99,6 +100,7 @@ func NewSentinelMetricsGatherer(sentinelURI string, refreshInterval time.Duratio
refreshInterval: refreshInterval,
sentinelURI: sentinelURI,
sentinel: sentinel,
serverPool: pool,
}, nil
}

Expand Down Expand Up @@ -177,21 +179,39 @@ func (smg *SentinelMetricsGatherer) gatherMetrics(ctx context.Context) error {
}

for _, master := range mresult {

linkPendingCommands.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported,
masterServerHost := fmt.Sprintf("%s:%d", master.IP, master.Port)
masterServerAlias := smg.serverPool.GetServerAlias(masterServerHost)

linkPendingCommands.With(prometheus.Labels{
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server_host": masterServerHost,
"redis_server_alias": masterServerAlias,
"role": master.RoleReported,
}).Set(float64(master.LinkPendingCommands))

lastOkPingReply.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported,
lastOkPingReply.With(prometheus.Labels{
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server_host": masterServerHost,
"redis_server_alias": masterServerAlias,
"role": master.RoleReported,
}).Set(float64(master.LastOkPingReply))

roleReportedTime.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported,
roleReportedTime.With(prometheus.Labels{
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server_host": masterServerHost,
"redis_server_alias": masterServerAlias,
"role": master.RoleReported,
}).Set(float64(master.RoleReportedTime))

numOtherSentinels.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported,
numOtherSentinels.With(prometheus.Labels{
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server_host": masterServerHost,
"redis_server_alias": masterServerAlias,
"role": master.RoleReported,
}).Set(float64(master.NumOtherSentinels))

sresult, err := smg.sentinel.SentinelSlaves(ctx, master.Name)
Expand All @@ -202,39 +222,63 @@ func (smg *SentinelMetricsGatherer) gatherMetrics(ctx context.Context) error {
// Cleanup any vector that corresponds to the same server but with a
// different role to avoid stale metrics after a role switch
cleanupMetrics(prometheus.Labels{
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port),
"role": string(client.Slave),
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server_host": masterServerHost,
"redis_server_alias": masterServerAlias,
"role": string(client.Slave),
})

for _, slave := range sresult {

linkPendingCommands.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported,
slaveServerHost := fmt.Sprintf("%s:%d", slave.IP, slave.Port)
slaveServerAlias := smg.serverPool.GetServerAlias(slaveServerHost)

linkPendingCommands.With(prometheus.Labels{
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server_host": slaveServerHost,
"redis_server_alias": slaveServerAlias,
"role": slave.RoleReported,
}).Set(float64(slave.LinkPendingCommands))

lastOkPingReply.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported,
lastOkPingReply.With(prometheus.Labels{
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server_host": slaveServerHost,
"redis_server_alias": slaveServerAlias,
"role": slave.RoleReported,
}).Set(float64(slave.LastOkPingReply))

roleReportedTime.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported,
roleReportedTime.With(prometheus.Labels{
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server_host": slaveServerHost,
"redis_server_alias": slaveServerAlias,
"role": slave.RoleReported,
}).Set(float64(slave.RoleReportedTime))

masterLinkDownTime.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported,
masterLinkDownTime.With(prometheus.Labels{
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server_host": slaveServerHost,
"redis_server_alias": slaveServerAlias,
"role": slave.RoleReported,
}).Set(float64(slave.MasterLinkDownTime))

slaveReplOffset.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported,
slaveReplOffset.With(prometheus.Labels{
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server_host": slaveServerHost,
"redis_server_alias": slaveServerAlias,
"role": slave.RoleReported,
}).Set(float64(slave.SlaveReplOffset))

cleanupMetrics(prometheus.Labels{
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port),
"role": string(client.Master),
"sentinel": smg.sentinelURI,
"shard": master.Name,
"redis_server_host": slaveServerHost,
"redis_server_alias": slaveServerAlias,
"role": string(client.Master),
})
}
}
Expand Down
8 changes: 8 additions & 0 deletions pkg/redis/server/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,11 @@ func (pool *ServerPool) indexByHostPort() map[string]*Server {

return index
}

func (pool *ServerPool) GetServerAlias(connectionString string) string {
if srv, err := pool.GetServer(connectionString, nil); err == nil {
return srv.alias
} else {
return ""
}
}
4 changes: 4 additions & 0 deletions pkg/redis/sharded/redis_sharded_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ func (cluster Cluster) LookupServerByID(hostport string) *RedisServer {
return nil
}

func (cluster Cluster) GetPool() *redis.ServerPool {
return cluster.pool
}

func (cluster *Cluster) Discover(ctx context.Context, options ...DiscoveryOption) error {
var merr operatorutils.MultiError

Expand Down

0 comments on commit 018a6f5

Please sign in to comment.