From 8c5cc3e4cbc477a5243d4cc6d9eb2414d5d3962c Mon Sep 17 00:00:00 2001 From: Aaron U'Ren Date: Sun, 17 Nov 2024 13:54:59 -0600 Subject: [PATCH] feat(route_sync): add metrics --- .../routing/network_routes_controller.go | 2 +- pkg/metrics/metrics_controller.go | 24 ++++++++++++++ pkg/routes/route_sync.go | 31 ++++++++++++++++++- pkg/routes/route_sync_test.go | 4 +-- 4 files changed, 57 insertions(+), 4 deletions(-) diff --git a/pkg/controllers/routing/network_routes_controller.go b/pkg/controllers/routing/network_routes_controller.go index 4301629cf..356f5884c 100644 --- a/pkg/controllers/routing/network_routes_controller.go +++ b/pkg/controllers/routing/network_routes_controller.go @@ -1289,7 +1289,7 @@ func NewNetworkRoutingController(clientset kubernetes.Interface, nrc.bgpServerStarted = false nrc.disableSrcDstCheck = kubeRouterConfig.DisableSrcDstCheck nrc.initSrcDstCheckDone = false - nrc.routeSyncer = routes.NewRouteSyncer(kubeRouterConfig.InjectedRoutesSyncPeriod) + nrc.routeSyncer = routes.NewRouteSyncer(kubeRouterConfig.InjectedRoutesSyncPeriod, kubeRouterConfig.MetricsEnabled) nrc.bgpHoldtime = kubeRouterConfig.BGPHoldTime.Seconds() if nrc.bgpHoldtime > 65536 || nrc.bgpHoldtime < 3 { diff --git a/pkg/metrics/metrics_controller.go b/pkg/metrics/metrics_controller.go index 393b66362..1e08a49f4 100644 --- a/pkg/metrics/metrics_controller.go +++ b/pkg/metrics/metrics_controller.go @@ -209,6 +209,30 @@ var ( Name: "controller_policy_ipsets", Help: "Active policy ipsets", }) + // ControllerHostRoutesSyncTime Time it took for the host routes controller to sync to the system + ControllerHostRoutesSyncTime = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: namespace, + Name: "host_routes_sync_time", + Help: "Time it took for the host routes controller to sync to the system", + }) + // ControllerHostRoutesSynced Number of host routes currently synced to the system + ControllerHostRoutesSynced = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "host_routes_synced", + Help: "Count of host routes currently synced to the system", + }) + // ControllerHostRoutesSynced Number of host routes added to the system + ControllerHostRoutesAdded = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: namespace, + Name: "host_routes_added", + Help: "Total count of host routes added to the system", + }) + // ControllerHostRoutesSynced Number of host routes removed to the system + ControllerHostRoutesRemoved = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: namespace, + Name: "host_routes_removed", + Help: "Total count of host routes removed to the system", + }) ) // Controller Holds settings for the metrics controller diff --git a/pkg/routes/route_sync.go b/pkg/routes/route_sync.go index 1ad017c14..8436541d6 100644 --- a/pkg/routes/route_sync.go +++ b/pkg/routes/route_sync.go @@ -7,6 +7,8 @@ import ( "time" "github.com/cloudnativelabs/kube-router/v2/pkg/healthcheck" + "github.com/cloudnativelabs/kube-router/v2/pkg/metrics" + "github.com/prometheus/client_golang/prometheus" "github.com/vishvananda/netlink" "k8s.io/klog/v2" ) @@ -26,6 +28,7 @@ type RouteSync struct { injectedRoutesSyncPeriod time.Duration mutex sync.Mutex routeReplacer func(route *netlink.Route) error + metricsEnabled bool } // addInjectedRoute adds a route to the route map that is regularly synced to the kernel's routing table @@ -34,6 +37,10 @@ func (rs *RouteSync) AddInjectedRoute(dst *net.IPNet, route *netlink.Route) { defer rs.mutex.Unlock() klog.V(3).Infof("Adding route for destination: %s", dst) rs.routeTableStateMap[dst.String()] = route + if rs.metricsEnabled { + metrics.ControllerHostRoutesAdded.Inc() + metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap))) + } } // delInjectedRoute delete a route from the route map that is regularly synced to the kernel's routing table @@ -44,10 +51,21 @@ func (rs *RouteSync) DelInjectedRoute(dst *net.IPNet) { klog.V(3).Infof("Removing route for destination: %s", dst) delete(rs.routeTableStateMap, dst.String()) } + if rs.metricsEnabled { + metrics.ControllerHostRoutesRemoved.Inc() + metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap))) + } } // syncLocalRouteTable iterates over the local route state map and syncs all routes to the kernel's routing table func (rs *RouteSync) SyncLocalRouteTable() error { + if rs.metricsEnabled { + startSyncTime := time.Now() + defer func(startTime time.Time) { + runTime := time.Since(startTime) + metrics.ControllerHostRoutesSyncTime.Observe(runTime.Seconds()) + }(startSyncTime) + } rs.mutex.Lock() defer rs.mutex.Unlock() klog.V(2).Infof("Running local route table synchronization") @@ -61,6 +79,9 @@ func (rs *RouteSync) SyncLocalRouteTable() error { } } } + if rs.metricsEnabled { + metrics.ControllerHostRoutesSynced.Set(float64(len(rs.routeTableStateMap))) + } return nil } @@ -94,12 +115,20 @@ func (rs *RouteSync) Run(healthChan chan<- *healthcheck.ControllerHeartbeat, sto // NewRouteSyncer creates a new routeSyncer that, when run, will sync routes kept in its local state table every // syncPeriod -func NewRouteSyncer(syncPeriod time.Duration) *RouteSync { +func NewRouteSyncer(syncPeriod time.Duration, registerMetrics bool) *RouteSync { rs := RouteSync{} rs.routeTableStateMap = make(map[string]*netlink.Route) rs.injectedRoutesSyncPeriod = syncPeriod rs.mutex = sync.Mutex{} // We substitute the RouteReplace function here so that we can easily monkey patch it in our unit tests rs.routeReplacer = netlink.RouteReplace + rs.metricsEnabled = registerMetrics + + // Register Metrics + if registerMetrics { + prometheus.MustRegister(metrics.ControllerHostRoutesSynced, metrics.ControllerHostRoutesSyncTime, + metrics.ControllerHostRoutesAdded, metrics.ControllerHostRoutesRemoved) + } + return &rs } diff --git a/pkg/routes/route_sync_test.go b/pkg/routes/route_sync_test.go index 47f1bf6c8..114294719 100644 --- a/pkg/routes/route_sync_test.go +++ b/pkg/routes/route_sync_test.go @@ -61,7 +61,7 @@ func Test_syncLocalRouteTable(t *testing.T) { myNetlink.pause = time.Millisecond * 200 // Create a route replacer and seed it with some routes to iterate over - syncer := NewRouteSyncer(15 * time.Second) + syncer := NewRouteSyncer(15*time.Second, false) syncer.routeTableStateMap = generateTestRouteMap(testRoutes) // Replace the netlink.RouteReplace function with our own mock function that includes a WaitGroup for syncing @@ -143,7 +143,7 @@ func Test_routeSyncer_run(t *testing.T) { t.Run("Ensure that run goroutine shuts down correctly on stop", func(t *testing.T) { // Setup routeSyncer to run 10 times a second - syncer := NewRouteSyncer(100 * time.Millisecond) + syncer := NewRouteSyncer(100*time.Millisecond, false) myNetLink := mockNetlink{} syncer.routeReplacer = myNetLink.mockRouteReplace syncer.routeTableStateMap = generateTestRouteMap(testRoutes)