Skip to content

Commit

Permalink
feat(route_sync): add metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
aauren authored and mrueg committed Nov 21, 2024
1 parent da6ef9b commit 8c5cc3e
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pkg/controllers/routing/network_routes_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1289,7 +1289,7 @@ func NewNetworkRoutingController(clientset kubernetes.Interface,
nrc.bgpServerStarted = false
nrc.disableSrcDstCheck = kubeRouterConfig.DisableSrcDstCheck
nrc.initSrcDstCheckDone = false
nrc.routeSyncer = routes.NewRouteSyncer(kubeRouterConfig.InjectedRoutesSyncPeriod)
nrc.routeSyncer = routes.NewRouteSyncer(kubeRouterConfig.InjectedRoutesSyncPeriod, kubeRouterConfig.MetricsEnabled)

nrc.bgpHoldtime = kubeRouterConfig.BGPHoldTime.Seconds()
if nrc.bgpHoldtime > 65536 || nrc.bgpHoldtime < 3 {
Expand Down
24 changes: 24 additions & 0 deletions pkg/metrics/metrics_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,30 @@ var (
Name: "controller_policy_ipsets",
Help: "Active policy ipsets",
})
// ControllerHostRoutesSyncTime Time it took for the host routes controller to sync to the system
ControllerHostRoutesSyncTime = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: namespace,
Name: "host_routes_sync_time",
Help: "Time it took for the host routes controller to sync to the system",
})
// ControllerHostRoutesSynced Number of host routes currently synced to the system
ControllerHostRoutesSynced = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Name: "host_routes_synced",
Help: "Count of host routes currently synced to the system",
})
// ControllerHostRoutesSynced Number of host routes added to the system
ControllerHostRoutesAdded = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Name: "host_routes_added",
Help: "Total count of host routes added to the system",
})
// ControllerHostRoutesSynced Number of host routes removed to the system
ControllerHostRoutesRemoved = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Name: "host_routes_removed",
Help: "Total count of host routes removed to the system",
})
)

// Controller Holds settings for the metrics controller
Expand Down
31 changes: 30 additions & 1 deletion pkg/routes/route_sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"time"

"github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck"
"github.com/cloudnativelabs/kube-router/v2/pkg/metrics"
"github.com/prometheus/client_golang/prometheus"
"github.com/vishvananda/netlink"
"k8s.io/klog/v2"
)
Expand All @@ -26,6 +28,7 @@ type RouteSync struct {
injectedRoutesSyncPeriod time.Duration
mutex sync.Mutex
routeReplacer func(route *netlink.Route) error
metricsEnabled bool
}

// addInjectedRoute adds a route to the route map that is regularly synced to the kernel's routing table
Expand All @@ -34,6 +37,10 @@ func (rs *RouteSync) AddInjectedRoute(dst *net.IPNet, route *netlink.Route) {
defer rs.mutex.Unlock()
klog.V(3).Infof("Adding route for destination: %s", dst)
rs.routeTableStateMap[dst.String()] = route
if rs.metricsEnabled {
metrics.ControllerHostRoutesAdded.Inc()
metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap)))
}
}

// delInjectedRoute delete a route from the route map that is regularly synced to the kernel's routing table
Expand All @@ -44,10 +51,21 @@ func (rs *RouteSync) DelInjectedRoute(dst *net.IPNet) {
klog.V(3).Infof("Removing route for destination: %s", dst)
delete(rs.routeTableStateMap, dst.String())
}
if rs.metricsEnabled {
metrics.ControllerHostRoutesRemoved.Inc()
metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap)))
}
}

// syncLocalRouteTable iterates over the local route state map and syncs all routes to the kernel's routing table
func (rs *RouteSync) SyncLocalRouteTable() error {
if rs.metricsEnabled {
startSyncTime := time.Now()
defer func(startTime time.Time) {
runTime := time.Since(startTime)
metrics.ControllerHostRoutesSyncTime.Observe(runTime.Seconds())
}(startSyncTime)
}
rs.mutex.Lock()
defer rs.mutex.Unlock()
klog.V(2).Infof("Running local route table synchronization")
Expand All @@ -61,6 +79,9 @@ func (rs *RouteSync) SyncLocalRouteTable() error {
}
}
}
if rs.metricsEnabled {
metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap)))
}
return nil
}

Expand Down Expand Up @@ -94,12 +115,20 @@ func (rs *RouteSync) Run(healthChan chan<- *healthcheck.ControllerHeartbeat, sto

// NewRouteSyncer creates a new routeSyncer that, when run, will sync routes kept in its local state table every
// syncPeriod
func NewRouteSyncer(syncPeriod time.Duration) *RouteSync {
func NewRouteSyncer(syncPeriod time.Duration, registerMetrics bool) *RouteSync {
rs := RouteSync{}
rs.routeTableStateMap = make(map[string]*netlink.Route)
rs.injectedRoutesSyncPeriod = syncPeriod
rs.mutex = sync.Mutex{}
// We substitute the RouteReplace function here so that we can easily monkey patch it in our unit tests
rs.routeReplacer = netlink.RouteReplace
rs.metricsEnabled = registerMetrics

// Register Metrics
if registerMetrics {
prometheus.MustRegister(metrics.ControllerHostRoutesSynced, metrics.ControllerHostRoutesSyncTime,
metrics.ControllerHostRoutesAdded, metrics.ControllerHostRoutesRemoved)
}

return &rs
}
4 changes: 2 additions & 2 deletions pkg/routes/route_sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func Test_syncLocalRouteTable(t *testing.T) {
myNetlink.pause = time.Millisecond * 200

// Create a route replacer and seed it with some routes to iterate over
syncer := NewRouteSyncer(15 * time.Second)
syncer := NewRouteSyncer(15*time.Second, false)
syncer.routeTableStateMap = generateTestRouteMap(testRoutes)

// Replace the netlink.RouteReplace function with our own mock function that includes a WaitGroup for syncing
Expand Down Expand Up @@ -143,7 +143,7 @@ func Test_routeSyncer_run(t *testing.T) {

t.Run("Ensure that run goroutine shuts down correctly on stop", func(t *testing.T) {
// Setup routeSyncer to run 10 times a second
syncer := NewRouteSyncer(100 * time.Millisecond)
syncer := NewRouteSyncer(100*time.Millisecond, false)
myNetLink := mockNetlink{}
syncer.routeReplacer = myNetLink.mockRouteReplace
syncer.routeTableStateMap = generateTestRouteMap(testRoutes)
Expand Down

0 comments on commit 8c5cc3e

Please sign in to comment.