From 2b934c08e68e8536105753fc0292dd531a398f2d Mon Sep 17 00:00:00 2001 From: Enrique Llorente Date: Thu, 13 Jun 2024 07:22:52 +0200 Subject: [PATCH 01/16] factory,control-manager: Add namespaces informer The namespace active-network annotation is handled at the NAD controller, since namespaces are common to the cluster they need to be updated at a common places, in the case of IC this is control-manager. This change add the namespace informer to it so nad controller can check if the annotation is already set. Signed-off-by: Enrique Llorente --- go-controller/pkg/factory/factory.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index 4d2de8f2b7f..9bc6b06f4cc 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -764,6 +764,13 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset return nil, err } } + + if util.IsNetworkSegmentationSupportEnabled() { + wf.informers[NamespaceType], err = newInformer(NamespaceType, wf.iFactory.Core().V1().Namespaces().Informer()) + if err != nil { + return nil, err + } + } } if config.OVNKubernetesFeature.EnableMultiExternalGateway { From 56aaf25ea32fc89217972c0a6c19703add564211 Mon Sep 17 00:00:00 2001 From: Enrique Llorente Date: Thu, 13 Jun 2024 08:45:43 +0200 Subject: [PATCH 02/16] rbac: Allow controller manager to patch ns At IC the controller manager will be responsible of of patching namespace with active-network, so it needs permissions to do so. Signed-off-by: Enrique Llorente --- dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 b/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 index 23ed2a7ee61..9ac84a9b693 100644 --- a/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 +++ b/dist/templates/rbac-ovnkube-cluster-manager.yaml.j2 @@ -100,3 +100,7 @@ rules: - adminnetworkpolicies/status - baselineadminnetworkpolicies/status verbs: [ "patch" ] + - apiGroups: [""] + resources: + - namespaces/status + verbs: [ "patch" ] From 8ccc653c8d3d5311701dc8bbe08be8c8470cda07 Mon Sep 17 00:00:00 2001 From: Enrique Llorente Date: Wed, 12 Jun 2024 12:23:42 +0200 Subject: [PATCH 03/16] util: Add namespace active-network updater Signed-off-by: Enrique Llorente --- .../pkg/util/namespace_annotation.go | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/util/namespace_annotation.go b/go-controller/pkg/util/namespace_annotation.go index 7b6fd8b5136..736e0b260dc 100644 --- a/go-controller/pkg/util/namespace_annotation.go +++ b/go-controller/pkg/util/namespace_annotation.go @@ -1,12 +1,18 @@ package util import ( + "context" + "encoding/json" "fmt" "net" "strings" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" ) @@ -20,7 +26,8 @@ const ( BfdAnnotation = "k8s.ovn.org/bfd-enabled" ExternalGatewayPodIPsAnnotation = "k8s.ovn.org/external-gw-pod-ips" // Annotation for enabling ACL logging to controller's log file - AclLoggingAnnotation = "k8s.ovn.org/acl-logging" + AclLoggingAnnotation = "k8s.ovn.org/acl-logging" + ActiveNetworkAnnotation = "k8s.ovn.org/active-network" ) func UpdateExternalGatewayPodIPsAnnotation(k kube.Interface, namespace string, exgwIPs []string) error { @@ -50,3 +57,29 @@ func ParseRoutingExternalGWAnnotation(annotation string) (sets.Set[string], erro } return ipTracker, nil } + +func UpdateNamespaceActiveNetwork(k kubernetes.Interface, namespace *corev1.Namespace, network string) error { + var err error + var patchData []byte + patch := struct { + Metadata map[string]interface{} `json:"metadata"` + }{ + Metadata: map[string]interface{}{ + "annotations": map[string]string{ + ActiveNetworkAnnotation: network, + }, + "resourceVersion": namespace.ResourceVersion, + }, + } + + patchData, err = json.Marshal(&patch) + if err != nil { + return fmt.Errorf("failed setting annotations on namespace %s: %w", namespace.Name, err) + } + + _, err = k.CoreV1().Namespaces().Patch(context.TODO(), namespace.Name, types.StrategicMergePatchType, patchData, metav1.PatchOptions{}, "status") + if err != nil { + return fmt.Errorf("failed setting annotation on namespace %s: %w", namespace.Name, err) + } + return nil +} From 11f542e3ec60f3df4c6cd2429e585ef945728f37 Mon Sep 17 00:00:00 2001 From: Enrique Llorente Date: Wed, 12 Jun 2024 14:52:29 +0200 Subject: [PATCH 04/16] nad, controller: Add manager system Add a net attach def manager system on react on creation/deletion of NAds, this will be used to attach at network segmentation namespace manager that reacts on NAD events. Signed-off-by: Enrique Llorente --- .../network_attach_def_controller.go | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/go-controller/pkg/network-attach-def-controller/network_attach_def_controller.go b/go-controller/pkg/network-attach-def-controller/network_attach_def_controller.go index f8a440bfb13..e18aa23811e 100644 --- a/go-controller/pkg/network-attach-def-controller/network_attach_def_controller.go +++ b/go-controller/pkg/network-attach-def-controller/network_attach_def_controller.go @@ -67,6 +67,11 @@ type NetworkControllerManager interface { CleanupDeletedNetworks(allControllers []NetworkController) error } +type NetAttachDefinitionManager interface { + OnAddNetAttachDef(*nettypes.NetworkAttachmentDefinition, util.NetInfo) error + OnDelNetAttachDef(nadName, netName string) error +} + type networkNADInfo struct { nadNames map[string]struct{} nc NetworkController @@ -84,6 +89,7 @@ type NetAttachDefinitionController struct { loopPeriod time.Duration stopChan chan struct{} wg sync.WaitGroup + managers map[string]NetAttachDefinitionManager // key is nadName, value is BasicNetInfo perNADNetInfo *syncmap.SyncMap[util.BasicNetInfo] @@ -114,6 +120,7 @@ func NewNetAttachDefinitionController(name string, ncm NetworkControllerManager, queue: workqueue.NewNamedRateLimitingQueue(rateLimiter, "net-attach-def"), loopPeriod: time.Second, stopChan: make(chan struct{}), + managers: map[string]NetAttachDefinitionManager{}, perNADNetInfo: syncmap.NewSyncMap[util.BasicNetInfo](), perNetworkNADInfo: syncmap.NewSyncMap[*networkNADInfo](), } @@ -130,6 +137,10 @@ func NewNetAttachDefinitionController(name string, ncm NetworkControllerManager, } +func (nadController *NetAttachDefinitionController) AddManager(key string, manager NetAttachDefinitionManager) { + nadController.managers[key] = manager +} + func (nadController *NetAttachDefinitionController) Start() error { klog.Infof("Starting %s NAD controller", nadController.name) g := errgroup.Group{} @@ -381,6 +392,14 @@ func (nadController *NetAttachDefinitionController) AddNetAttachDef(ncm NetworkC nadController.perNADNetInfo.Delete(nadName) return nil } + + for managerName, manager := range nadController.managers { + if err := manager.OnAddNetAttachDef(netattachdef, nInfo); err != nil { + nadController.perNADNetInfo.Delete(nadName) + return fmt.Errorf("failed running OnAddNetAttachDef at nad manager '%s': %w", managerName, err) + } + } + klog.V(5).Infof("%s: net-attach-def %s network %s first seen", nadController.name, nadName, netName) err = nadController.addNADToController(ncm, nadName, nInfo, doStart) if err != nil { @@ -451,6 +470,14 @@ func (nadController *NetAttachDefinitionController) DeleteNetAttachDef(netAttach return nil } netName := existingNadNetConfInfo.GetNetworkName() + + for managerName, manager := range nadController.managers { + if err := manager.OnDelNetAttachDef(nadName, netName); err != nil { + nadController.perNADNetInfo.Delete(nadName) + return fmt.Errorf("failed running OnDelNetAttachDef at nad manager '%s': %w", managerName, err) + } + } + err := nadController.deleteNADFromController(netName, nadName) if err != nil { klog.Errorf("%s: Failed to delete net-attach-def %s from network %s: %v", nadController.name, nadName, netName, err) From cb57eb8fa95cde11366f09be58b017fa6d3a457c Mon Sep 17 00:00:00 2001 From: Enrique Llorente Date: Thu, 13 Jun 2024 09:40:08 +0200 Subject: [PATCH 05/16] gh: Add network-segmentation lane Signed-off-by: Enrique Llorente --- .github/workflows/test.yml | 6 +++++- test/scripts/e2e-cp.sh | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7905d869a6f..788251ef517 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -424,6 +424,7 @@ jobs: - {"target": "kv-live-migration", "ha": "noHA", "gateway-mode": "local", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-disabled", "num-workers": "3"} - {"target": "kv-live-migration", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "dualstack", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "num-workers": "3"} - {"target": "control-plane", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "ipv4", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones", "forwarding": "disable-forwarding"} + - {"target": "network-segmentation", "ha": "noHA", "gateway-mode": "shared", "ipfamily": "dualstack", "disable-snat-multiple-gws": "SnatGW", "second-bridge": "1br", "ic": "ic-single-node-zones"} needs: [ build-pr ] env: JOB_NAME: "${{ matrix.target }}-${{ matrix.ha }}-${{ matrix.gateway-mode }}-${{ matrix.ipfamily }}-${{ matrix.disable-snat-multiple-gws }}-${{ matrix.second-bridge }}" @@ -437,7 +438,8 @@ jobs: OVN_SECOND_BRIDGE: "${{ matrix.second-bridge == '2br' }}" KIND_IPV4_SUPPORT: "${{ matrix.ipfamily == 'IPv4' || matrix.ipfamily == 'dualstack' }}" KIND_IPV6_SUPPORT: "${{ matrix.ipfamily == 'IPv6' || matrix.ipfamily == 'dualstack' }}" - ENABLE_MULTI_NET: "${{ matrix.target == 'multi-homing' || matrix.target == 'kv-live-migration' }}" + ENABLE_MULTI_NET: "${{ matrix.target == 'multi-homing' || matrix.target == 'kv-live-migration' || matrix.target == 'network-segmentation' }}" + ENABLE_NETWORK_SEGMENTATION: "${{ matrix.target == 'network-segmentation' }}" KIND_INSTALL_KUBEVIRT: "${{ matrix.target == 'kv-live-migration' }}" OVN_COMPACT_MODE: "${{ matrix.target == 'compact-mode' }}" OVN_DUMMY_GATEWAY_BRIDGE: "${{ matrix.target == 'compact-mode' }}" @@ -525,6 +527,8 @@ jobs: make -C test control-plane WHAT="Kubevirt Virtual Machines" elif [ "${{ matrix.target }}" == "control-plane-helm" ]; then make -C test control-plane + elif [ "${{ matrix.target }}" == "network-segmentation" ]; then + make -C test control-plane WHAT="Network Segmentation" else make -C test ${{ matrix.target }} if [ "${{ matrix.ipfamily }}" != "ipv6" ]; then diff --git a/test/scripts/e2e-cp.sh b/test/scripts/e2e-cp.sh index 4f93075b422..f3abb093da2 100755 --- a/test/scripts/e2e-cp.sh +++ b/test/scripts/e2e-cp.sh @@ -161,6 +161,15 @@ if [ "${WHAT}" != "${KV_LIVE_MIGRATION_TESTS}" ]; then SKIPPED_TESTS+=$KV_LIVE_MIGRATION_TESTS fi +# Only run network segmentation tests if they are explicitly requested +KV_NETWORK_SEGMENTATION_TESTS="Network Segmentation" +if [ "${WHAT}" != "${KV_NETWORK_SEGMENTATION_TESTS}" ]; then + if [ "$SKIPPED_TESTS" != "" ]; then + SKIPPED_TESTS+="|" + fi + SKIPPED_TESTS+=$KV_NETWORK_SEGMENTATION_TESTS +fi + # setting these is required to make RuntimeClass tests work ... :/ export KUBE_CONTAINER_RUNTIME=remote export KUBE_CONTAINER_RUNTIME_ENDPOINT=unix:///run/containerd/containerd.sock From ffdd7a62215b6452c3c7f517de2dd5349db14f9e Mon Sep 17 00:00:00 2001 From: Enrique Llorente Date: Thu, 13 Jun 2024 11:15:49 +0200 Subject: [PATCH 06/16] nad: Ensure active-network annotation at namespace This change ensure that there are no multiple primary network at the same namespace, that there are no pods before activate the primary network and update the namesapce with active-network if everything is fine else it will annotate with "unknown", Also namespace is created with active-network="default". Signed-off-by: Enrique Llorente --- .../secondary_network_cluster_manager.go | 3 + .../network_segmentation_manager.go | 88 ++++++++++ go-controller/pkg/ovn/namespace.go | 9 + go-controller/pkg/types/const.go | 1 + test/e2e/multihoming_utils.go | 8 +- test/e2e/network_segmentation.go | 160 ++++++++++++++++++ test/e2e/util.go | 11 ++ 7 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 go-controller/pkg/network-attach-def-controller/network_segmentation_manager.go create mode 100644 test/e2e/network_segmentation.go diff --git a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go index 940ea7ad2f8..8efb2768b29 100644 --- a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go +++ b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go @@ -56,6 +56,9 @@ func newSecondaryNetworkClusterManager(ovnClient *util.OVNClusterManagerClientse if err != nil { return nil, err } + if util.IsNetworkSegmentationSupportEnabled() { + sncm.nadController.AddManager("network segmentation", nad.NewNetworkSegmentationManager(ovnClient.KubeClient, wf)) + } return sncm, nil } diff --git a/go-controller/pkg/network-attach-def-controller/network_segmentation_manager.go b/go-controller/pkg/network-attach-def-controller/network_segmentation_manager.go new file mode 100644 index 00000000000..828ca658930 --- /dev/null +++ b/go-controller/pkg/network-attach-def-controller/network_segmentation_manager.go @@ -0,0 +1,88 @@ +package networkAttachDefController + +import ( + "fmt" + + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" +) + +var _ NetAttachDefinitionManager = &NetworkSegmentationManager{} + +type NetworkSegmentationManager struct { + kube kubernetes.Interface + watchFactory *factory.WatchFactory +} + +func NewNetworkSegmentationManager(kube kubernetes.Interface, watchFactory *factory.WatchFactory) *NetworkSegmentationManager { + return &NetworkSegmentationManager{ + kube: kube, + watchFactory: watchFactory, + } +} + +func (m *NetworkSegmentationManager) OnAddNetAttachDef(nad *nettypes.NetworkAttachmentDefinition, network util.NetInfo) error { + return m.ensureNamespaceActiveNetwork(nad.Namespace, network) +} + +func (m *NetworkSegmentationManager) OnDelNetAttachDef(nadName, netName string) error { + //TODO + return nil +} + +func (m *NetworkSegmentationManager) ensureNamespaceActiveNetwork(namespace string, network util.NetInfo) error { + if m.watchFactory == nil || m.kube == nil { + return nil + } + if !network.IsPrimaryNetwork() { + return nil + } + + networkNamespace, err := m.watchFactory.GetNamespace(namespace) + if err != nil { + return fmt.Errorf("failed looking for network namespace '%s': %w", namespace, err) + } + + currentActiveNetwork, ok := networkNamespace.Annotations[util.ActiveNetworkAnnotation] + if !ok { + return fmt.Errorf("missing active-network annotation at namespace %s", namespace) + } + + if currentActiveNetwork == network.GetNetworkName() { + return nil + } + + if currentActiveNetwork != types.DefaultNetworkName { + //TODO: Event + klog.Warningf("Active primary network %s already configured at namespace %s, marking namespace active network to unknown", currentActiveNetwork, networkNamespace.Name) + if err := util.UpdateNamespaceActiveNetwork(m.kube, networkNamespace, types.UnknownNetworkName); err != nil { + return fmt.Errorf("failed annotating namespace with active-network=unknown when a primary network was already configured: %w", err) + } + return nil + } + + pods, err := m.watchFactory.GetPods(networkNamespace.Name) + if err != nil { + return fmt.Errorf("failed ensuring namespace '%s' active network when listing pods: %w", networkNamespace.Name, err) + } + + // At this point all those pods exist before configuring the primary network, + // so we should mark the namespace and send and event + if len(pods) > 0 { + //TODO: Event + klog.Warningf("Pods present at namesapace %s before configuring primary network, marking namespace active network to unknown", networkNamespace.Name) + if err := util.UpdateNamespaceActiveNetwork(m.kube, networkNamespace, types.UnknownNetworkName); err != nil { + return fmt.Errorf("failed annotating namespace with active-network=unknown when namespace contains pods before configuring a primary network: %w", err) + } + return nil + } + + if err := util.UpdateNamespaceActiveNetwork(m.kube, networkNamespace, network.GetNetworkName()); err != nil { + return fmt.Errorf("failed annotating namespace with active-network=%s: %w", network.GetNetworkName(), err) + } + return nil +} diff --git a/go-controller/pkg/ovn/namespace.go b/go-controller/pkg/ovn/namespace.go index 6dddd2a85df..cb6210a1be8 100644 --- a/go-controller/pkg/ovn/namespace.go +++ b/go-controller/pkg/ovn/namespace.go @@ -100,6 +100,15 @@ func (oc *DefaultNetworkController) AddNamespace(ns *kapi.Namespace) error { klog.Infof("[%s] adding namespace", ns.Name) // Keep track of how long syncs take. start := time.Now() + + if util.IsNetworkSegmentationSupportEnabled() { + if _, ok := ns.Annotations[util.ActiveNetworkAnnotation]; !ok { + if err := util.UpdateNamespaceActiveNetwork(oc.kube.KClient, ns, types.DefaultNetworkName); err != nil { + return fmt.Errorf("failed annotating namesspace with active-network=default: %w", err) + } + } + } + defer func() { klog.Infof("[%s] adding namespace took %v", ns.Name, time.Since(start)) }() diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index d3de716b03a..92343058faa 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -5,6 +5,7 @@ import "time" const ( // Default network name DefaultNetworkName = "default" + UnknownNetworkName = "unknown" K8sPrefix = "k8s-" HybridOverlayPrefix = "int-" HybridOverlayGRSubfix = "-gr" diff --git a/test/e2e/multihoming_utils.go b/test/e2e/multihoming_utils.go index b241c271664..0c3586719b7 100644 --- a/test/e2e/multihoming_utils.go +++ b/test/e2e/multihoming_utils.go @@ -44,6 +44,7 @@ type networkAttachmentConfigParams struct { networkName string vlanID int allowPersistentIPs bool + primaryNetwork bool } type networkAttachmentConfig struct { @@ -78,7 +79,8 @@ func generateNAD(config networkAttachmentConfig) *nadapi.NetworkAttachmentDefini "mtu": 1300, "netAttachDefName": %q, "vlanID": %d, - "allowPersistentIPs": %t + "allowPersistentIPs": %t, + "primaryNetwork": %t } `, config.networkName, @@ -88,6 +90,7 @@ func generateNAD(config networkAttachmentConfig) *nadapi.NetworkAttachmentDefini namespacedName(config.namespace, config.name), config.vlanID, config.allowPersistentIPs, + config.primaryNetwork, ) return generateNetAttachDef(config.namespace, config.name, nadSpec) } @@ -126,6 +129,9 @@ func generatePodSpec(config podConfiguration) *v1.Pod { } func networkSelectionElements(elements ...nadapi.NetworkSelectionElement) map[string]string { + if len(elements) == 0 { + return map[string]string{} + } marshalledElements, err := json.Marshal(elements) if err != nil { panic(fmt.Errorf("programmer error: you've provided wrong input to the test data: %v", err)) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go new file mode 100644 index 00000000000..5fe68b90f69 --- /dev/null +++ b/test/e2e/network_segmentation.go @@ -0,0 +1,160 @@ +package e2e + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + nadclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1" +) + +var _ = Describe("Network Segmentation", func() { + const ( + activeNetworkAnnotation = "k8s.ovn.org/active-network" + ) + + f := wrappedTestFramework("network-segmentation") + + type activeNetworkTest struct { + nads []networkAttachmentConfigParams + podsBeforeNADs []podConfiguration + expectedActiveNetwork string + } + DescribeTable("should annotate namespace with proper active-network", func(td activeNetworkTest) { + nadClient, err := nadclient.NewForConfig(f.ClientConfig()) + Expect(err).NotTo(HaveOccurred()) + + By("Create pods before network attachment definition") + podsBeforeNADs := []*corev1.Pod{} + for _, pod := range td.podsBeforeNADs { + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create( + context.Background(), + generatePodSpec(pod), + metav1.CreateOptions{}, + ) + Expect(err).NotTo(HaveOccurred()) + podsBeforeNADs = append(podsBeforeNADs, pod) + } + + By("Create network attachment definitions") + for _, nad := range td.nads { + netConfig := newNetworkAttachmentConfig(nad) + netConfig.namespace = f.Namespace.Name + + _, err = nadClient.NetworkAttachmentDefinitions(netConfig.namespace).Create( + context.Background(), + generateNAD(netConfig), + metav1.CreateOptions{}, + ) + Expect(err).NotTo(HaveOccurred()) + } + + By("Wait for active-network annotation") + Eventually(thisNamespace(f.ClientSet, f.Namespace)). + WithPolling(time.Second / 2). + WithTimeout(5 * time.Second). + Should(WithTransform(getAnnotations, + HaveKeyWithValue(activeNetworkAnnotation, td.expectedActiveNetwork))) + + }, + Entry("without primary network nads to 'default'", activeNetworkTest{ + nads: []networkAttachmentConfigParams{}, + expectedActiveNetwork: "default", + }), + Entry("with one primaryNetwork nad on layer2 to network name", activeNetworkTest{ + nads: []networkAttachmentConfigParams{{ + name: "tenant-blue-l2", + networkName: "net-l2", + cidr: "10.128.0.0/24", + topology: "layer2", + primaryNetwork: true, + }}, + expectedActiveNetwork: "net-l2", + }), + Entry("with one primaryNetwork nad on layer3 to network name", activeNetworkTest{ + nads: []networkAttachmentConfigParams{{ + name: "tenant-blue-l3", + networkName: "net-l3", + cidr: "10.128.0.0/16/24", + topology: "layer3", + primaryNetwork: true, + }}, + expectedActiveNetwork: "net-l3", + }), + Entry("with two primaryNetwork nads on layer3 and same network with network name", activeNetworkTest{ + nads: []networkAttachmentConfigParams{ + { + name: "tenant-blue-l3", + networkName: "net-l3", + cidr: "10.128.0.0/16/24", + topology: "layer3", + primaryNetwork: true, + }, + { + name: "tenant-red-l3", + networkName: "net-l3", + cidr: "10.128.0.0/16/24", + topology: "layer3", + primaryNetwork: true, + }, + }, + expectedActiveNetwork: "net-l3", + }), + Entry("with two primaryNetwork nads on layer2 and same network with network name", activeNetworkTest{ + nads: []networkAttachmentConfigParams{ + { + name: "tenant-blue-l2", + networkName: "net-l2", + cidr: "10.128.0.0/24", + topology: "layer2", + primaryNetwork: true, + }, + { + name: "tenant-red-l2", + networkName: "net-l2", + cidr: "10.128.0.0/24", + topology: "layer2", + primaryNetwork: true, + }, + }, + expectedActiveNetwork: "net-l2", + }), + Entry("with two primaryNetwork nads and different network with 'unknown'", activeNetworkTest{ + nads: []networkAttachmentConfigParams{ + { + name: "tenant-blue-l3", + networkName: "net-l3", + cidr: "10.128.0.0/16/24", + topology: "layer3", + primaryNetwork: true, + }, + { + name: "tenant-blue-l2", + networkName: "net-l2", + cidr: "10.128.0.0/24", + topology: "layer2", + primaryNetwork: true, + }, + }, + expectedActiveNetwork: "unknown", + }), + Entry("with one primaryNetwork nad pods at the namespace with 'unknown'", activeNetworkTest{ + podsBeforeNADs: []podConfiguration{{ + name: "pod1", + }}, + nads: []networkAttachmentConfigParams{{ + name: "tenant-blue-l2", + networkName: "net-l2", + cidr: "10.128.0.0/24", + topology: "layer2", + primaryNetwork: true, + }}, + expectedActiveNetwork: "unknown", + }), + ) +}) diff --git a/test/e2e/util.go b/test/e2e/util.go index 361cde5d6b1..3f920575f52 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -32,6 +32,7 @@ import ( testutils "k8s.io/kubernetes/test/utils" admissionapi "k8s.io/pod-security-admission/api" utilnet "k8s.io/utils/net" + "sigs.k8s.io/controller-runtime/pkg/client" ) const ( @@ -1240,3 +1241,13 @@ func getGatewayMTUSupport(node *v1.Node) bool { } return false } + +func thisNamespace(cli kubernetes.Interface, namespace *v1.Namespace) func() (*v1.Namespace, error) { + return func() (*v1.Namespace, error) { + return cli.CoreV1().Namespaces().Get(context.Background(), namespace.Name, metav1.GetOptions{}) + } +} + +func getAnnotations(obj client.Object) map[string]string { + return obj.GetAnnotations() +} From 81fc42462ffc80539dfffe1151d42287b07ec7bc Mon Sep 17 00:00:00 2001 From: Enrique Llorente Date: Mon, 17 Jun 2024 08:01:37 +0200 Subject: [PATCH 07/16] nad: Handler primary network deletion This change add the following scenarios: - Delete a duplicate primary network nad - Delete primary network nad with no pods - Delete primary network nad with pods Signed-off-by: Enrique Llorente --- .../secondary_network_cluster_manager.go | 2 +- .../network_segmentation_manager.go | 67 ++++++++++++++-- test/e2e/network_segmentation.go | 78 ++++++++++++++++++- 3 files changed, 138 insertions(+), 9 deletions(-) diff --git a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go index 8efb2768b29..5f0a81348df 100644 --- a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go +++ b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go @@ -57,7 +57,7 @@ func newSecondaryNetworkClusterManager(ovnClient *util.OVNClusterManagerClientse return nil, err } if util.IsNetworkSegmentationSupportEnabled() { - sncm.nadController.AddManager("network segmentation", nad.NewNetworkSegmentationManager(ovnClient.KubeClient, wf)) + sncm.nadController.AddManager("network segmentation", nad.NewNetworkSegmentationManager(ovnClient.KubeClient, wf, ovnClient.NetworkAttchDefClient)) } return sncm, nil } diff --git a/go-controller/pkg/network-attach-def-controller/network_segmentation_manager.go b/go-controller/pkg/network-attach-def-controller/network_segmentation_manager.go index 828ca658930..c7f046cbec5 100644 --- a/go-controller/pkg/network-attach-def-controller/network_segmentation_manager.go +++ b/go-controller/pkg/network-attach-def-controller/network_segmentation_manager.go @@ -1,14 +1,20 @@ package networkAttachDefController import ( + "context" "fmt" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + nadclientset "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" - "k8s.io/client-go/kubernetes" - "k8s.io/klog/v2" ) var _ NetAttachDefinitionManager = &NetworkSegmentationManager{} @@ -16,12 +22,14 @@ var _ NetAttachDefinitionManager = &NetworkSegmentationManager{} type NetworkSegmentationManager struct { kube kubernetes.Interface watchFactory *factory.WatchFactory + nadClient nadclientset.Interface } -func NewNetworkSegmentationManager(kube kubernetes.Interface, watchFactory *factory.WatchFactory) *NetworkSegmentationManager { +func NewNetworkSegmentationManager(kube kubernetes.Interface, watchFactory *factory.WatchFactory, nadClient nadclientset.Interface) *NetworkSegmentationManager { return &NetworkSegmentationManager{ kube: kube, watchFactory: watchFactory, + nadClient: nadClient, } } @@ -30,7 +38,56 @@ func (m *NetworkSegmentationManager) OnAddNetAttachDef(nad *nettypes.NetworkAtta } func (m *NetworkSegmentationManager) OnDelNetAttachDef(nadName, netName string) error { - //TODO + namespace, _, err := cache.SplitMetaNamespaceKey(nadName) + if err != nil { + return err + } + nadList, err := m.nadClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(namespace).List(context.Background(), metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed listing network attachment definitions after nad '%s' deletion: %w", nadName, err) + } + + if len(nadList.Items) == 0 { + pods, err := m.watchFactory.GetPods(namespace) + if err != nil { + return fmt.Errorf("failed ensuring namespace '%s' active network when listing pods: %w", namespace, err) + } + networkNamespace, err := m.watchFactory.GetNamespace(namespace) + if err != nil { + return fmt.Errorf("failed looking for network namespace '%s': %w", namespace, err) + } + if len(pods) == 0 { + if err := util.UpdateNamespaceActiveNetwork(m.kube, networkNamespace, types.DefaultNetworkName); err != nil { + return fmt.Errorf("failed annotating namespace with active-network=%s: %w", types.DefaultNetworkName, err) + } + return nil + } else { + currentActiveNetwork, ok := networkNamespace.Annotations[util.ActiveNetworkAnnotation] + if !ok { + return fmt.Errorf("missing active-network annotation at namespace %s", namespace) + } + if currentActiveNetwork != types.DefaultNetworkName { + //TODO: Event + klog.Warningf("Active primary network %s deleted from namespace '%s' with pods, marking namespace active network to unknown", currentActiveNetwork, networkNamespace.Name) + if err := util.UpdateNamespaceActiveNetwork(m.kube, networkNamespace, types.UnknownNetworkName); err != nil { + return fmt.Errorf("failed annotating namespace with active-network=unknown when a primary network was already configured: %w", err) + } + return nil + } + + } + } + + for _, nad := range nadList.Items { + network, err := util.ParseNADInfo(&nad) + if err != nil { + return fmt.Errorf("failed parsing nads after nad '%s' deletion: %w", nadName, err) + } + if err := m.ensureNamespaceActiveNetwork(namespace, network); err != nil { + return fmt.Errorf("failed ensuring namespace active network after nad '%s' deletion: %w", nadName, err) + } + } + return nil } @@ -56,7 +113,7 @@ func (m *NetworkSegmentationManager) ensureNamespaceActiveNetwork(namespace stri return nil } - if currentActiveNetwork != types.DefaultNetworkName { + if currentActiveNetwork != types.DefaultNetworkName && currentActiveNetwork != types.UnknownNetworkName { //TODO: Event klog.Warningf("Active primary network %s already configured at namespace %s, marking namespace active network to unknown", currentActiveNetwork, networkNamespace.Name) if err := util.UpdateNamespaceActiveNetwork(m.kube, networkNamespace, types.UnknownNetworkName); err != nil { diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index 5fe68b90f69..ef51cb46ab7 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -23,7 +23,9 @@ var _ = Describe("Network Segmentation", func() { type activeNetworkTest struct { nads []networkAttachmentConfigParams podsBeforeNADs []podConfiguration + podsAfterNADs []podConfiguration expectedActiveNetwork string + deleteNADs []string } DescribeTable("should annotate namespace with proper active-network", func(td activeNetworkTest) { nadClient, err := nadclient.NewForConfig(f.ClientConfig()) @@ -54,12 +56,35 @@ var _ = Describe("Network Segmentation", func() { Expect(err).NotTo(HaveOccurred()) } - By("Wait for active-network annotation") + By("Create pods after network attachment definition") + podsAfterNADs := []*corev1.Pod{} + for _, pod := range td.podsAfterNADs { + pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create( + context.Background(), + generatePodSpec(pod), + metav1.CreateOptions{}, + ) + Expect(err).NotTo(HaveOccurred()) + podsAfterNADs = append(podsAfterNADs, pod) + } + for _, nadNameToDelete := range td.deleteNADs { + Expect(nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Delete( + context.Background(), + nadNameToDelete, + metav1.DeleteOptions{}, + )).To(Succeed()) + } + + By("Wait for expected active-network annotation") + activeNetworkMatch := HaveKeyWithValue(activeNetworkAnnotation, td.expectedActiveNetwork) + if td.expectedActiveNetwork == "" { + activeNetworkMatch = Not(HaveKey(activeNetworkAnnotation)) + } + Eventually(thisNamespace(f.ClientSet, f.Namespace)). WithPolling(time.Second / 2). WithTimeout(5 * time.Second). - Should(WithTransform(getAnnotations, - HaveKeyWithValue(activeNetworkAnnotation, td.expectedActiveNetwork))) + Should(WithTransform(getAnnotations, activeNetworkMatch)) }, Entry("without primary network nads to 'default'", activeNetworkTest{ @@ -156,5 +181,52 @@ var _ = Describe("Network Segmentation", func() { }}, expectedActiveNetwork: "unknown", }), + Entry("with two primaryNetwork nads and different network created and then one delete with networkName", activeNetworkTest{ + nads: []networkAttachmentConfigParams{ + { + name: "tenant-blue-l3", + networkName: "net-l3", + cidr: "10.128.0.0/16/24", + topology: "layer3", + primaryNetwork: true, + }, + { + name: "tenant-blue-l2", + networkName: "net-l2", + cidr: "10.128.0.0/24", + topology: "layer2", + primaryNetwork: true, + }, + }, + deleteNADs: []string{"tenant-blue-l3"}, + expectedActiveNetwork: "net-l2", + }), + Entry("with one primaryNetwork nad deleted and no pods should annotate back to 'default'", activeNetworkTest{ + nads: []networkAttachmentConfigParams{ + { + name: "tenant-blue-l3", + networkName: "net-l3", + cidr: "10.128.0.0/16/24", + topology: "layer3", + primaryNetwork: true, + }, + }, + deleteNADs: []string{"tenant-blue-l3"}, + expectedActiveNetwork: "default", + }), + Entry("with one primaryNetwork nad deleted and pods should annotate to 'unknown'", activeNetworkTest{ + nads: []networkAttachmentConfigParams{ + { + name: "tenant-blue-l3", + networkName: "net-l3", + cidr: "10.128.0.0/16/24", + topology: "layer3", + primaryNetwork: true, + }, + }, + podsAfterNADs: []podConfiguration{{name: "pod1"}}, + deleteNADs: []string{"tenant-blue-l3"}, + expectedActiveNetwork: "unknown", + }), ) }) From 615bffc8f8e55ac831be014c8a3ef8be81d31de7 Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Sat, 15 Jun 2024 19:14:25 +0200 Subject: [PATCH 08/16] Add primary field to pod annotation This commit adds the primary network boolean field to a pod's annotation info so that this information can be used to tell if the pod is managed by the default network OR secondary network. This will be useful in future commits and also for other feature controller's to leverage in the future. k8s.ovn.org/pod-networks: '{ "default":{ "ip_addresses":["10.244.1.4/24"], "mac_address":"0a:58:0a:f4:01:04", "routes":[{"dest":"10.244.0.0/16","nextHop":"10.244.1.1"},{"dest":"100.64.0.0/16","nextHop":"10.244.1.1"}], "ip_address":"10.244.1.4/24", "primary":false }, "ns2/l2-network":{ "ip_addresses":["10.100.200.9/24"], "mac_address":"0a:58:0a:64:c8:09", "ip_address":"10.100.200.9/24", "tunnel_id":2, "primary":true } }' Signed-off-by: Surya Seetharaman --- .../pkg/allocator/pod/pod_annotation.go | 33 +++++- .../pkg/allocator/pod/pod_annotation_test.go | 49 ++++++-- .../network_cluster_controller.go | 1 + .../pkg/clustermanager/pod/allocator_test.go | 2 + .../pkg/ovn/base_network_controller_pods.go | 21 +++- .../ovn/base_network_controller_pods_test.go | 105 ++++++++++++++++++ go-controller/pkg/ovn/pods_test.go | 2 + .../secondary_layer2_network_controller.go | 1 + .../secondary_layer3_network_controller.go | 1 + .../secondary_localnet_network_controller.go | 1 + .../listers/core/v1/NamespaceLister.go | 89 +++++++++++++++ .../pkg/util/namespace_annotation.go | 10 ++ go-controller/pkg/util/pod_annotation.go | 12 +- 13 files changed, 310 insertions(+), 17 deletions(-) create mode 100644 go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/NamespaceLister.go diff --git a/go-controller/pkg/allocator/pod/pod_annotation.go b/go-controller/pkg/allocator/pod/pod_annotation.go index 83a6b761ef8..a6970cd797c 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation.go +++ b/go-controller/pkg/allocator/pod/pod_annotation.go @@ -22,8 +22,9 @@ import ( // PodAnnotationAllocator is a utility to handle allocation of the PodAnnotation to Pods. type PodAnnotationAllocator struct { - podLister listers.PodLister - kube kube.InterfaceOVN + podLister listers.PodLister + namespaceLister listers.NamespaceLister + kube kube.InterfaceOVN netInfo util.NetInfo ipamClaimsReconciler persistentips.PersistentAllocations @@ -32,11 +33,13 @@ type PodAnnotationAllocator struct { func NewPodAnnotationAllocator( netInfo util.NetInfo, podLister listers.PodLister, + nsLister listers.NamespaceLister, kube kube.InterfaceOVN, claimsReconciler persistentips.PersistentAllocations, ) *PodAnnotationAllocator { return &PodAnnotationAllocator{ podLister: podLister, + namespaceLister: nsLister, kube: kube, netInfo: netInfo, ipamClaimsReconciler: claimsReconciler, @@ -63,6 +66,7 @@ func (allocator *PodAnnotationAllocator) AllocatePodAnnotation( return allocatePodAnnotation( allocator.podLister, + allocator.namespaceLister, allocator.kube, ipAllocator, allocator.netInfo, @@ -75,6 +79,7 @@ func (allocator *PodAnnotationAllocator) AllocatePodAnnotation( func allocatePodAnnotation( podLister listers.PodLister, + namespaceLister listers.NamespaceLister, kube kube.Interface, ipAllocator subnet.NamedAllocator, netInfo util.NetInfo, @@ -92,6 +97,7 @@ func allocatePodAnnotation( allocateToPodWithRollback := func(pod *v1.Pod) (*v1.Pod, func(), error) { var rollback func() pod, podAnnotation, rollback, err = allocatePodAnnotationWithRollback( + namespaceLister, ipAllocator, idAllocator, netInfo, @@ -137,6 +143,7 @@ func (allocator *PodAnnotationAllocator) AllocatePodAnnotationWithTunnelID( return allocatePodAnnotationWithTunnelID( allocator.podLister, + allocator.namespaceLister, allocator.kube, ipAllocator, idAllocator, @@ -150,6 +157,7 @@ func (allocator *PodAnnotationAllocator) AllocatePodAnnotationWithTunnelID( func allocatePodAnnotationWithTunnelID( podLister listers.PodLister, + namespaceLister listers.NamespaceLister, kube kube.Interface, ipAllocator subnet.NamedAllocator, idAllocator id.NamedAllocator, @@ -165,6 +173,7 @@ func allocatePodAnnotationWithTunnelID( allocateToPodWithRollback := func(pod *v1.Pod) (*v1.Pod, func(), error) { var rollback func() pod, podAnnotation, rollback, err = allocatePodAnnotationWithRollback( + namespaceLister, ipAllocator, idAllocator, netInfo, @@ -207,6 +216,7 @@ func allocatePodAnnotationWithTunnelID( // implementations. Use an inlined implementation if you want to extract // information from it as a side-effect. func allocatePodAnnotationWithRollback( + namespaceLister listers.NamespaceLister, ipAllocator subnet.NamedAllocator, idAllocator id.NamedAllocator, netInfo util.NetInfo, @@ -265,6 +275,25 @@ func allocatePodAnnotationWithRollback( IPs: podAnnotation.IPs, MAC: podAnnotation.MAC, TunnelID: podAnnotation.TunnelID, + Primary: false, + } + if util.IsNetworkSegmentationSupportEnabled() { + tentative.Primary = netInfo.IsSecondary() && netInfo.IsPrimaryNetwork() + if !netInfo.IsSecondary() { // only do this extra bit if needed + var podNamespace *v1.Namespace + podNamespace, err = namespaceLister.Get(pod.Namespace) + if err != nil { + return + } + activeNetworkForPod := util.GetNamespaceActiveNetwork(podNamespace) + if activeNetworkForPod == nadName { + tentative.Primary = true + } + } + } else if !netInfo.IsSecondary() { + // if user defined network segmentation is not enabled + // then we know pod's primary network is "default" + tentative.Primary = true } hasIDAllocation := util.DoesNetworkRequireTunnelIDs(netInfo) diff --git a/go-controller/pkg/allocator/pod/pod_annotation_test.go b/go-controller/pkg/allocator/pod/pod_annotation_test.go index 408c44ab283..a906e6b240d 100644 --- a/go-controller/pkg/allocator/pod/pod_annotation_test.go +++ b/go-controller/pkg/allocator/pod/pod_annotation_test.go @@ -10,6 +10,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" cnitypes "github.com/containernetworking/cni/pkg/types" + mocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1" + "github.com/stretchr/testify/mock" ipamclaimsapi "github.com/k8snetworkplumbingwg/ipamclaims/pkg/crd/ipamclaims/v1alpha1" nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" @@ -115,6 +117,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { ipam bool idAllocation bool persistentIPAllocation bool + primaryNetwork bool podAnnotation *util.PodAnnotation invalidNetworkAnnotation bool wantUpdatedPod bool @@ -157,9 +160,11 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { }, }, wantUpdatedPod: true, + primaryNetwork: true, wantPodAnnotation: &util.PodAnnotation{ - IPs: ovntest.MustParseIPNets("192.168.0.4/24"), - MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.4/24")[0].IP), + IPs: ovntest.MustParseIPNets("192.168.0.4/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.4/24")[0].IP), + Primary: true, }, }, { @@ -175,11 +180,13 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { netxtIPs: ovntest.MustParseIPNets("192.168.0.3/24"), }, }, + primaryNetwork: true, wantUpdatedPod: true, wantPodAnnotation: &util.PodAnnotation{ IPs: ovntest.MustParseIPNets("192.168.0.4/24"), MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.4/24")[0].IP), Gateways: ovntest.MustParseIPs("192.168.0.1"), + Primary: true, }, }, { @@ -216,6 +223,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { NextHop: ovntest.MustParseIP("192.168.0.1").To4(), }, }, + Primary: true, // default network's netInfo }, wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.3/24"), }, @@ -225,15 +233,17 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { name: "expect no updates, annotated, IPAM", ipam: true, podAnnotation: &util.PodAnnotation{ - IPs: ovntest.MustParseIPNets("192.168.0.3/24"), - MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + Primary: true, // default network netInfo, }, args: args{ ipAllocator: &ipAllocatorStub{}, }, wantPodAnnotation: &util.PodAnnotation{ - IPs: ovntest.MustParseIPNets("192.168.0.3/24"), - MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + Primary: true, // default network netInfo, }, wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.3/24"), }, @@ -243,8 +253,9 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { name: "expect no updates, annotated, already allocated, IPAM", ipam: true, podAnnotation: &util.PodAnnotation{ - IPs: ovntest.MustParseIPNets("192.168.0.3/24"), - MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + Primary: true, // default network netInfo }, args: args{ ipAllocator: &ipAllocatorStub{ @@ -252,8 +263,9 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { }, }, wantPodAnnotation: &util.PodAnnotation{ - IPs: ovntest.MustParseIPNets("192.168.0.3/24"), - MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + IPs: ovntest.MustParseIPNets("192.168.0.3/24"), + MAC: util.IPAddrToHWAddr(ovntest.MustParseIPNets("192.168.0.3/24")[0].IP), + Primary: true, // default network netInfo }, }, { @@ -297,6 +309,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { NextHop: ovntest.MustParseIP("192.168.0.1").To4(), }, }, + Primary: true, // default network's netInfo }, wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.4/24"), }, @@ -326,6 +339,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { NextHop: ovntest.MustParseIP("192.168.0.1").To4(), }, }, + Primary: true, // default network netInfo }, }, { @@ -354,6 +368,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { NextHop: ovntest.MustParseIP("192.168.0.1").To4(), }, }, + Primary: true, // default network netInfo }, wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.3/24"), }, @@ -410,6 +425,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { NextHop: ovntest.MustParseIP("192.168.0.1").To4(), }, }, + Primary: true, // default network netInfo }, wantReleasedIPsOnRollback: ovntest.MustParseIPNets("192.168.0.3/24"), }, @@ -600,7 +616,9 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var err error - + config.OVNKubernetesFeature.EnableMultiNetwork = true + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + namespaceListerMock := &mocks.NamespaceLister{} g := gomega.NewWithT(t) network := tt.args.network @@ -627,6 +645,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { NADName: nadName, Subnets: subnets, AllowPersistentIPs: tt.persistentIPAllocation, + PrimaryNetwork: tt.primaryNetwork, }) if err != nil { t.Fatalf("failed to create NetInfo: %v", err) @@ -634,7 +653,12 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { } config.OVNKubernetesFeature.EnableInterconnect = tt.idAllocation - + namespaceListerMock.On("Get", mock.AnythingOfType("string")).Return(&v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "namespace", + Annotations: map[string]string{util.ActiveNetworkAnnotation: types.DefaultNetworkName}, + }, + }, nil) pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "pod", @@ -666,6 +690,7 @@ func Test_allocatePodAnnotationWithRollback(t *testing.T) { } pod, podAnnotation, rollback, err := allocatePodAnnotationWithRollback( + namespaceListerMock, tt.args.ipAllocator, tt.args.idAllocator, netInfo, diff --git a/go-controller/pkg/clustermanager/network_cluster_controller.go b/go-controller/pkg/clustermanager/network_cluster_controller.go index 2dc67275a95..c761164f733 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller.go @@ -181,6 +181,7 @@ func (ncc *networkClusterController) init() error { podAllocationAnnotator = annotationalloc.NewPodAnnotationAllocator( ncc.NetInfo, ncc.watchFactory.PodCoreInformer().Lister(), + ncc.watchFactory.NamespaceCoreInformer().Lister(), ncc.kube, ipamClaimsReconciler, ) diff --git a/go-controller/pkg/clustermanager/pod/allocator_test.go b/go-controller/pkg/clustermanager/pod/allocator_test.go index d383b8c22e2..8d076e9dbc6 100644 --- a/go-controller/pkg/clustermanager/pod/allocator_test.go +++ b/go-controller/pkg/clustermanager/pod/allocator_test.go @@ -493,6 +493,7 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { idallocator := &idAllocatorStub{} podListerMock := &v1mocks.PodLister{} + namespaceListerMock := &v1mocks.NamespaceLister{} kubeMock := &kubemocks.InterfaceOVN{} podNamespaceLister := &v1mocks.PodNamespaceLister{} @@ -541,6 +542,7 @@ func TestPodAllocator_reconcileForNAD(t *testing.T) { podAnnotationAllocator := pod.NewPodAnnotationAllocator( netInfo, podListerMock, + namespaceListerMock, kubeMock, ipamClaimsReconciler, ) diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index cdb4aaf8c71..79d95e371a6 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -864,14 +864,31 @@ func (bnc *BaseNetworkController) allocatePodAnnotation(pod *kapi.Pod, existingL } } podAnnotation = &util.PodAnnotation{ - IPs: podIfAddrs, - MAC: podMac, + IPs: podIfAddrs, + MAC: podMac, + Primary: false, } var nodeSubnets []*net.IPNet if nodeSubnets = bnc.lsManager.GetSwitchSubnets(switchName); nodeSubnets == nil && bnc.doesNetworkRequireIPAM() { return nil, false, fmt.Errorf("cannot retrieve subnet for assigning gateway routes for pod %s, switch: %s", podDesc, switchName) } + + if util.IsNetworkSegmentationSupportEnabled() { + podNamespace, err := bnc.watchFactory.GetNamespace(pod.Namespace) + if err != nil { + return nil, false, err + } + activeNetworkForPod := util.GetNamespaceActiveNetwork(podNamespace) + if activeNetworkForPod == nadName { + podAnnotation.Primary = true + } + } else { + // if user defined network segmentation is not enabled + // then we know pod's primary network is "default" + podAnnotation.Primary = true + } + err = util.AddRoutesGatewayIP(bnc.NetInfo, pod, podAnnotation, network) if err != nil { return nil, false, err diff --git a/go-controller/pkg/ovn/base_network_controller_pods_test.go b/go-controller/pkg/ovn/base_network_controller_pods_test.go index 221efbc74a1..8aa1b9f159b 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods_test.go +++ b/go-controller/pkg/ovn/base_network_controller_pods_test.go @@ -1,16 +1,25 @@ package ovn import ( + "fmt" "net" "testing" "time" + nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" "github.com/onsi/gomega" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" + lsm "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/logical_switch_manager" ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/client-go/kubernetes/fake" ) func TestBaseNetworkController_trackPodsReleasedBeforeStartup(t *testing.T) { @@ -232,3 +241,99 @@ func TestBaseNetworkController_trackPodsReleasedBeforeStartup(t *testing.T) { }) } } + +func TestBaseNetworkController_DefaultNetwork_PodAnnotationPrimaryField(t *testing.T) { + config.OVNKubernetesFeature.EnableMultiNetwork = true + tests := []struct { + name string + namespaceAnnotaton map[string]string + expectedPrimaryVal bool + }{ + { + name: "no annotation on namespace", + namespaceAnnotaton: map[string]string{}, + expectedPrimaryVal: true, + }, + { + name: "default network annotation on namespace", + namespaceAnnotaton: map[string]string{ + "k8s.ovn.org/active-network": "default", + }, + expectedPrimaryVal: true, + }, + { + name: "secondary l3-network annotation on namespace", + namespaceAnnotaton: map[string]string{ + "k8s.ovn.org/active-network": "l3-network", + }, + expectedPrimaryVal: false, + }, + } + for i := 1; i <= 2; i++ { + if i%2 == 0 { + config.OVNKubernetesFeature.EnableNetworkSegmentation = true + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := gomega.NewWithT(t) + var err error + netInfo := &util.DefaultNetInfo{} + bnc := &BaseNetworkController{ + NetInfo: netInfo, + lsManager: lsm.NewLogicalSwitchManager(), + } + bnc.lsManager.AddOrUpdateSwitch("node1", []*net.IPNet{ovntest.MustParseIPNet("10.128.1.0/24")}) + namespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "namespace", + Annotations: tt.namespaceAnnotaton, + }, + } + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + Namespace: namespace.Name, + }, + Spec: corev1.PodSpec{ + NodeName: "node1", + }, + } + kubeClient := fake.NewSimpleClientset(namespace, pod) + bnc.kube = &kube.KubeOVN{ + Kube: kube.Kube{KClient: kubeClient}, + } + bnc.watchFactory, err = factory.NewMasterWatchFactory(&util.OVNMasterClientset{KubeClient: kubeClient}) + g.Expect(err).To(gomega.BeNil()) + g.Expect(bnc.watchFactory.Start()).To(gomega.Succeed()) + nadName := types.DefaultNetworkName + podDesc := fmt.Sprintf("%s/%s/%s", nadName, pod.Namespace, pod.Name) + portName := bnc.GetLogicalPortName(pod, nadName) + existingLSP := &nbdb.LogicalSwitchPort{ + UUID: "lsp1", + Addresses: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + ExternalIDs: map[string]string{ + "pod": "true", + "namespace": pod.Namespace, + }, + Name: portName, + Options: map[string]string{ + "iface-id-ver": "myPod", + "requested-chassis": "node1", + }, + PortSecurity: []string{"0a:58:0a:80:01:03 10.128.1.3"}, + } + network := &nadapi.NetworkSelectionElement{ + Name: "boo", + } + podAnnotation, _, err := bnc.allocatePodAnnotation(pod, existingLSP, podDesc, nadName, network) + g.Expect(err).To(gomega.BeNil()) + if config.OVNKubernetesFeature.EnableNetworkSegmentation { + g.Expect(podAnnotation.Primary).To(gomega.Equal(tt.expectedPrimaryVal)) + } else { + // if feature is not enabled then default network is always true + g.Expect(podAnnotation.Primary).To(gomega.BeTrue()) + } + }) + } + } +} diff --git a/go-controller/pkg/ovn/pods_test.go b/go-controller/pkg/ovn/pods_test.go index e0f65811cce..8b2a8bb1238 100644 --- a/go-controller/pkg/ovn/pods_test.go +++ b/go-controller/pkg/ovn/pods_test.go @@ -299,6 +299,7 @@ func (p testPod) getAnnotationsJson() string { Gateways []string `json:"gateway_ips,omitempty"` Routes []podRoute `json:"routes,omitempty"` TunnelID int `json:"tunnel_id,omitempty"` + Primary bool `json:"primary"` } var address string @@ -334,6 +335,7 @@ func (p testPod) getAnnotationsJson() string { Gateway: nodeGWIP, Gateways: nodeGWIPs, Routes: routes, + Primary: true, // all tests here run with network-segmentation disabled }, } diff --git a/go-controller/pkg/ovn/secondary_layer2_network_controller.go b/go-controller/pkg/ovn/secondary_layer2_network_controller.go index e6954fcee1d..3e5d5999c55 100644 --- a/go-controller/pkg/ovn/secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer2_network_controller.go @@ -74,6 +74,7 @@ func NewSecondaryLayer2NetworkController(cnci *CommonNetworkControllerInfo, netI oc.podAnnotationAllocator = pod.NewPodAnnotationAllocator( netInfo, cnci.watchFactory.PodCoreInformer().Lister(), + cnci.watchFactory.NamespaceCoreInformer().Lister(), cnci.kube, claimsReconciler) } diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller.go b/go-controller/pkg/ovn/secondary_layer3_network_controller.go index 0e821bff687..041ccfbb135 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller.go @@ -282,6 +282,7 @@ func NewSecondaryLayer3NetworkController(cnci *CommonNetworkControllerInfo, netI podAnnotationAllocator := pod.NewPodAnnotationAllocator( netInfo, cnci.watchFactory.PodCoreInformer().Lister(), + cnci.watchFactory.NamespaceCoreInformer().Lister(), cnci.kube, nil) oc.podAnnotationAllocator = podAnnotationAllocator diff --git a/go-controller/pkg/ovn/secondary_localnet_network_controller.go b/go-controller/pkg/ovn/secondary_localnet_network_controller.go index 8c2059b37b5..3fe7296b7d7 100644 --- a/go-controller/pkg/ovn/secondary_localnet_network_controller.go +++ b/go-controller/pkg/ovn/secondary_localnet_network_controller.go @@ -69,6 +69,7 @@ func NewSecondaryLocalnetNetworkController(cnci *CommonNetworkControllerInfo, ne oc.podAnnotationAllocator = pod.NewPodAnnotationAllocator( netInfo, cnci.watchFactory.PodCoreInformer().Lister(), + cnci.watchFactory.NamespaceCoreInformer().Lister(), cnci.kube, claimsReconciler) } diff --git a/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/NamespaceLister.go b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/NamespaceLister.go new file mode 100644 index 00000000000..1a4d63ef7fe --- /dev/null +++ b/go-controller/pkg/testing/mocks/k8s.io/client-go/listers/core/v1/NamespaceLister.go @@ -0,0 +1,89 @@ +// Code generated by mockery v2.43.2. DO NOT EDIT. + +package mocks + +import ( + mock "github.com/stretchr/testify/mock" + labels "k8s.io/apimachinery/pkg/labels" + + v1 "k8s.io/api/core/v1" +) + +// NamespaceLister is an autogenerated mock type for the NamespaceLister type +type NamespaceLister struct { + mock.Mock +} + +// Get provides a mock function with given fields: name +func (_m *NamespaceLister) Get(name string) (*v1.Namespace, error) { + ret := _m.Called(name) + + if len(ret) == 0 { + panic("no return value specified for Get") + } + + var r0 *v1.Namespace + var r1 error + if rf, ok := ret.Get(0).(func(string) (*v1.Namespace, error)); ok { + return rf(name) + } + if rf, ok := ret.Get(0).(func(string) *v1.Namespace); ok { + r0 = rf(name) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*v1.Namespace) + } + } + + if rf, ok := ret.Get(1).(func(string) error); ok { + r1 = rf(name) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// List provides a mock function with given fields: selector +func (_m *NamespaceLister) List(selector labels.Selector) ([]*v1.Namespace, error) { + ret := _m.Called(selector) + + if len(ret) == 0 { + panic("no return value specified for List") + } + + var r0 []*v1.Namespace + var r1 error + if rf, ok := ret.Get(0).(func(labels.Selector) ([]*v1.Namespace, error)); ok { + return rf(selector) + } + if rf, ok := ret.Get(0).(func(labels.Selector) []*v1.Namespace); ok { + r0 = rf(selector) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]*v1.Namespace) + } + } + + if rf, ok := ret.Get(1).(func(labels.Selector) error); ok { + r1 = rf(selector) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// NewNamespaceLister creates a new instance of NamespaceLister. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +// The first argument is typically a *testing.T value. +func NewNamespaceLister(t interface { + mock.TestingT + Cleanup(func()) +}) *NamespaceLister { + mock := &NamespaceLister{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/go-controller/pkg/util/namespace_annotation.go b/go-controller/pkg/util/namespace_annotation.go index 736e0b260dc..1d2edd1356a 100644 --- a/go-controller/pkg/util/namespace_annotation.go +++ b/go-controller/pkg/util/namespace_annotation.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" + ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -83,3 +84,12 @@ func UpdateNamespaceActiveNetwork(k kubernetes.Interface, namespace *corev1.Name } return nil } + +func GetNamespaceActiveNetwork(namespace *corev1.Namespace) string { + activeNetwork, ok := namespace.Annotations[ActiveNetworkAnnotation] + if !ok { + return ovntypes.DefaultNetworkName + } + + return activeNetwork +} diff --git a/go-controller/pkg/util/pod_annotation.go b/go-controller/pkg/util/pod_annotation.go index a3969426453..be4ba1124f3 100644 --- a/go-controller/pkg/util/pod_annotation.go +++ b/go-controller/pkg/util/pod_annotation.go @@ -71,6 +71,13 @@ type PodAnnotation struct { // TunnelID assigned to each pod for layer2 secondary networks TunnelID int + + // Set to true if this network is asked to be the default network + // for the pod. By default, "default" network is the primary network + // unless user-defined-network-segmentation feature has been activated + // At a given time a pod can have only 1 network with this flag set + // to true. + Primary bool } // PodRoute describes any routes to be added to the pod's network namespace @@ -95,7 +102,8 @@ type podAnnotation struct { IP string `json:"ip_address,omitempty"` Gateway string `json:"gateway_ip,omitempty"` - TunnelID int `json:"tunnel_id,omitempty"` + TunnelID int `json:"tunnel_id,omitempty"` + Primary bool `json:"primary"` } // Internal struct used to marshal PodRoute to the pod annotation @@ -116,6 +124,7 @@ func MarshalPodAnnotation(annotations map[string]string, podInfo *PodAnnotation, pa := podAnnotation{ TunnelID: podInfo.TunnelID, MAC: podInfo.MAC.String(), + Primary: podInfo.Primary, } if len(podInfo.IPs) == 1 { @@ -191,6 +200,7 @@ func UnmarshalPodAnnotation(annotations map[string]string, nadName string) (*Pod podAnnotation := &PodAnnotation{ TunnelID: a.TunnelID, + Primary: a.Primary, } podAnnotation.MAC, err = net.ParseMAC(a.MAC) if err != nil { From 8d19a5235c41f69868b614cd0c62800609583efd Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Mon, 17 Jun 2024 20:58:26 +0200 Subject: [PATCH 09/16] Steer default and service traffic via primary secondary network This commit ensures that for L3 and L2 by default all traffic should leave via the default gatewayIP of the podsubnet that is part of the user defined secondary network which is the delegated primary network of the pod. L3: surya@fedora:~/user-defined-networks$ oc rsh -n ns1 tinypod / # ip r default via 10.128.0.1 dev net1 10.96.0.0/16 via 10.128.0.1 dev net1 10.128.0.0/24 dev net1 proto kernel scope link src 10.128.0.4 10.128.0.0/16 via 10.128.0.1 dev net1 10.244.0.0/16 via 10.244.1.1 dev eth0 10.244.1.0/24 dev eth0 proto kernel scope link src 10.244.1.3 100.64.0.0/16 via 10.244.1.1 dev eth0 L2: / # ip r 10.96.0.0/16 via 10.100.200.1 dev net1 10.100.200.0/24 dev net1 proto kernel scope link src 10.100.200.9 10.244.0.0/16 via 10.244.1.1 dev eth0 10.244.1.0/24 dev eth0 proto kernel scope link src 10.244.1.4 100.64.0.0/16 via 10.244.1.1 dev eth0 Signed-off-by: Surya Seetharaman --- go-controller/pkg/util/pod_annotation.go | 67 ++++++++++++++++++------ 1 file changed, 52 insertions(+), 15 deletions(-) diff --git a/go-controller/pkg/util/pod_annotation.go b/go-controller/pkg/util/pod_annotation.go index be4ba1124f3..0a8a21ebf87 100644 --- a/go-controller/pkg/util/pod_annotation.go +++ b/go-controller/pkg/util/pod_annotation.go @@ -468,9 +468,30 @@ func AddRoutesGatewayIP( podAnnotation.Gateways = append(podAnnotation.Gateways, network.GatewayRequest...) topoType := netinfo.TopologyType() switch topoType { - case types.Layer2Topology, types.LocalnetTopology: + case types.LocalnetTopology: // no route needed for directly connected subnets return nil + case types.Layer2Topology: + if IsNetworkSegmentationSupportEnabled() && netinfo.IsPrimaryNetwork() { + for _, podIfAddr := range podAnnotation.IPs { + isIPv6 := utilnet.IsIPv6CIDR(podIfAddr) + nodeSubnet, err := MatchFirstIPNetFamily(isIPv6, nodeSubnets) + if err != nil { + return err + } + gatewayIPnet := GetNodeGatewayIfAddr(nodeSubnet) + // Ensure default service network traffic always goes to OVN + for _, serviceSubnet := range config.Kubernetes.ServiceCIDRs { + if isIPv6 == utilnet.IsIPv6CIDR(serviceSubnet) { + podAnnotation.Routes = append(podAnnotation.Routes, PodRoute{ + Dest: serviceSubnet, + NextHop: gatewayIPnet.IP, + }) + } + } + } + } + return nil case types.Layer3Topology: for _, podIfAddr := range podAnnotation.IPs { isIPv6 := utilnet.IsIPv6CIDR(podIfAddr) @@ -487,6 +508,20 @@ func AddRoutesGatewayIP( }) } } + if IsNetworkSegmentationSupportEnabled() && netinfo.IsPrimaryNetwork() { + // Ensure default service network traffic always goes to OVN + for _, serviceSubnet := range config.Kubernetes.ServiceCIDRs { + if isIPv6 == utilnet.IsIPv6CIDR(serviceSubnet) { + podAnnotation.Routes = append(podAnnotation.Routes, PodRoute{ + Dest: serviceSubnet, + NextHop: gatewayIPnet.IP, + }) + } + } + if len(network.GatewayRequest) == 0 { // if specific default route for pod was not requested then add gatewayIP + podAnnotation.Gateways = append(podAnnotation.Gateways, gatewayIPnet.IP) + } + } } return nil } @@ -532,22 +567,24 @@ func AddRoutesGatewayIP( } } - // Ensure default service network traffic always goes to OVN - for _, serviceSubnet := range config.Kubernetes.ServiceCIDRs { - if isIPv6 == utilnet.IsIPv6CIDR(serviceSubnet) { - podAnnotation.Routes = append(podAnnotation.Routes, PodRoute{ - Dest: serviceSubnet, - NextHop: gatewayIPnet.IP, - }) + if podAnnotation.Primary { + // Ensure default service network traffic always goes to OVN + for _, serviceSubnet := range config.Kubernetes.ServiceCIDRs { + if isIPv6 == utilnet.IsIPv6CIDR(serviceSubnet) { + podAnnotation.Routes = append(podAnnotation.Routes, PodRoute{ + Dest: serviceSubnet, + NextHop: gatewayIPnet.IP, + }) + } } - } - otherDefaultRoute := otherDefaultRouteV4 - if isIPv6 { - otherDefaultRoute = otherDefaultRouteV6 - } - if !otherDefaultRoute { - podAnnotation.Gateways = append(podAnnotation.Gateways, gatewayIPnet.IP) + otherDefaultRoute := otherDefaultRouteV4 + if isIPv6 { + otherDefaultRoute = otherDefaultRouteV6 + } + if !otherDefaultRoute { + podAnnotation.Gateways = append(podAnnotation.Gateways, gatewayIPnet.IP) + } } // Ensure default join subnet traffic always goes to OVN From a6d6703ee8173547ecde8975b383a093863b12e6 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 20 Jun 2024 09:15:47 +0200 Subject: [PATCH 10/16] user-def-nets: patch OVN annotations Signed-off-by: Miguel Duarte Barroso --- .../network_cluster_controller.go | 26 +++++++++++++++- .../pkg/clustermanager/pod/allocator.go | 30 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/clustermanager/network_cluster_controller.go b/go-controller/pkg/clustermanager/network_cluster_controller.go index c761164f733..e9f199675fb 100644 --- a/go-controller/pkg/clustermanager/network_cluster_controller.go +++ b/go-controller/pkg/clustermanager/network_cluster_controller.go @@ -16,6 +16,7 @@ import ( "k8s.io/klog/v2" ipamclaimsapi "github.com/k8snetworkplumbingwg/ipamclaims/pkg/crd/ipamclaims/v1alpha1" + networkattchmentdefclientset "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned" idallocator "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/id" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/ip/subnet" @@ -62,6 +63,9 @@ type networkClusterController struct { subnetAllocator subnet.Allocator util.NetInfo + + // TODO: this should be a watch grabbed from the watch factory + nadClient networkattchmentdefclientset.Interface } func newNetworkClusterController(networkIDAllocator idallocator.NamedAllocator, netInfo util.NetInfo, ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory) *networkClusterController { @@ -81,6 +85,7 @@ func newNetworkClusterController(networkIDAllocator idallocator.NamedAllocator, stopChan: make(chan struct{}), wg: wg, networkIDAllocator: networkIDAllocator, + nadClient: ovnClient.NetworkAttchDefClient, } return ncc @@ -186,7 +191,26 @@ func (ncc *networkClusterController) init() error { ipamClaimsReconciler, ) - ncc.podAllocator = pod.NewPodAllocator(ncc.NetInfo, podAllocationAnnotator, ipAllocator, ipamClaimsReconciler) + // TODO: get rid of these logs below + klog.Infof( + "DEBUG| creating pod allocator for netInfo: %q. Is it primary ? %t", + ncc.NetInfo.GetNetworkName(), + ncc.NetInfo.IsPrimaryNetwork(), + ) + + klog.Infof( + "DEBUG| creating pod allocator for netInfo: %q. Is it secondary ? %t", + ncc.NetInfo.GetNetworkName(), + ncc.NetInfo.IsSecondary(), + ) + + ncc.podAllocator = pod.NewPodAllocator( + ncc.NetInfo, + podAllocationAnnotator, + ipAllocator, + ipamClaimsReconciler, + ncc.watchFactory.NamespaceCoreInformer().Lister(), + ) if err := ncc.podAllocator.Init(); err != nil { return fmt.Errorf("failed to initialize pod ip allocator: %w", err) } diff --git a/go-controller/pkg/clustermanager/pod/allocator.go b/go-controller/pkg/clustermanager/pod/allocator.go index af3df9cc24b..713e53c5deb 100644 --- a/go-controller/pkg/clustermanager/pod/allocator.go +++ b/go-controller/pkg/clustermanager/pod/allocator.go @@ -2,6 +2,7 @@ package pod import ( "fmt" + "k8s.io/client-go/listers/core/v1" "sync" corev1 "k8s.io/api/core/v1" @@ -41,6 +42,8 @@ type PodAllocator struct { // release more than once releasedPods map[string]sets.Set[string] releasedPodsMutex sync.Mutex + + namespaceLister v1.NamespaceLister } // NewPodAllocator builds a new PodAllocator @@ -49,12 +52,14 @@ func NewPodAllocator( podAnnotationAllocator *pod.PodAnnotationAllocator, ipAllocator subnet.Allocator, claimsReconciler persistentips.PersistentAllocations, + namespaceLister v1.NamespaceLister, ) *PodAllocator { podAllocator := &PodAllocator{ netInfo: netInfo, releasedPods: map[string]sets.Set[string]{}, releasedPodsMutex: sync.Mutex{}, podAnnotationAllocator: podAnnotationAllocator, + namespaceLister: namespaceLister, } // this network might not have IPAM, we will just allocate MAC addresses @@ -131,6 +136,31 @@ func (a *PodAllocator) reconcile(old, new *corev1.Pod, releaseFromAllocator bool pod = new } + ns, err := a.namespaceLister.Get(pod.Namespace) + if err != nil { + return err + } + activeNetworkName, hasAnActiveNetwork := ns.Annotations["k8s.ovn.org/active-network"] + + // TODO: get rid of this log below + klog.Infof( + "DEBUG| reconciling pod %q; has an active network ? %t; Active net name: %q", + pod.Name, + hasAnActiveNetwork, + activeNetworkName, + ) + if hasAnActiveNetwork && activeNetworkName != "default" { + network := &nettypes.NetworkSelectionElement{ + // TODO: below we need to have the NAD name, *not* the network name + Name: activeNetworkName, + Namespace: pod.Namespace, + } + nadName := fmt.Sprintf("%s/%s", pod.Namespace, activeNetworkName) + if err := a.reconcileForNAD(old, new, nadName, network, releaseFromAllocator); err != nil { + return err + } + } + podScheduled := util.PodScheduled(pod) podWantsHostNetwork := util.PodWantsHostNetwork(pod) From 9d408161acf36f44c6fd3d14c7e5d60b07933f41 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 20 Jun 2024 09:20:28 +0200 Subject: [PATCH 11/16] e2e: remove unused function Signed-off-by: Miguel Duarte Barroso --- test/e2e/e2e.go | 53 ------------------------------------------------- 1 file changed, 53 deletions(-) diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 41a9591c075..c4830c623a0 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -203,59 +203,6 @@ func checkConnectivityPingToHost(f *framework.Framework, nodeName, podName, host return err } -// Place the workload on the specified node and return pod gw route -func getPodGWRoute(f *framework.Framework, nodeName string, podName string) net.IP { - command := []string{"bash", "-c", "sleep 20000"} - contName := fmt.Sprintf("%s-container", podName) - pod := &v1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: podName, - }, - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: contName, - Image: agnhostImage, - Command: command, - }, - }, - NodeName: nodeName, - RestartPolicy: v1.RestartPolicyNever, - }, - } - podClient := f.ClientSet.CoreV1().Pods(f.Namespace.Name) - _, err := podClient.Create(context.Background(), pod, metav1.CreateOptions{}) - if err != nil { - framework.Failf("Error trying to create pod") - } - - // Wait for pod network setup to be almost ready - wait.PollImmediate(1*time.Second, 30*time.Second, func() (bool, error) { - podGet, err := podClient.Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - return false, nil - } - if podGet.Annotations != nil && podGet.Annotations[podNetworkAnnotation] != "" { - return true, nil - } - return false, nil - }) - if err != nil { - framework.Failf("Error trying to get the pod annotations") - } - - podGet, err := podClient.Get(context.Background(), podName, metav1.GetOptions{}) - if err != nil { - framework.Failf("Error trying to get the pod object") - } - annotation, err := unmarshalPodAnnotation(podGet.Annotations) - if err != nil { - framework.Failf("Error trying to unmarshal pod annotations") - } - - return annotation.Gateways[0] -} - // Create a pod on the specified node using the agnostic host image func createGenericPod(f *framework.Framework, podName, nodeSelector, namespace string, command []string) (*v1.Pod, error) { return createPod(f, podName, nodeSelector, namespace, command, nil) From 21c64864d742542d468507fb2d05ad32167d5aba Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 20 Jun 2024 10:01:45 +0200 Subject: [PATCH 12/16] e2e, user-defined-nets: add east/west test Signed-off-by: Miguel Duarte Barroso --- test/e2e/network_segmentation.go | 186 +++++++++++++++++++++++++++++++ test/e2e/util.go | 4 +- 2 files changed, 188 insertions(+), 2 deletions(-) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index ef51cb46ab7..205daa6034b 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -2,13 +2,17 @@ package e2e import ( "context" + "fmt" + "strings" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" nadclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1" ) @@ -229,4 +233,186 @@ var _ = Describe("Network Segmentation", func() { expectedActiveNetwork: "unknown", }), ) + + Context("a user defined primary network", func() { + const ( + externalServiceIPv4IP = "10.128.0.1" + nodeHostnameKey = "kubernetes.io/hostname" + port = 9000 + userDefinedNetworkIPv4Subnet = "10.128.0.0/16" + userDefinedNetworkIPv6Subnet = "2014:100:200::0/60" + userDefinedNetworkName = "tenantblue" + workerOneNodeName = "ovn-worker" + workerTwoNodeName = "ovn-worker2" + ) + + var ( + cs clientset.Interface + nadClient nadclient.K8sCniCncfIoV1Interface + ) + + BeforeEach(func() { + cs = f.ClientSet + + var err error + nadClient, err = nadclient.NewForConfig(f.ClientConfig()) + Expect(err).NotTo(HaveOccurred()) + }) + + DescribeTable( + "can perform east/west traffic between nodes", + func( + netConfigParams networkAttachmentConfigParams, + clientPodConfig podConfiguration, + serverPodConfig podConfiguration, + ) { + netConfig := newNetworkAttachmentConfig(netConfigParams) + + netConfig.namespace = f.Namespace.Name + clientPodConfig.namespace = f.Namespace.Name + serverPodConfig.namespace = f.Namespace.Name + + By("creating the attachment configuration") + _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Create( + context.Background(), + generateNAD(netConfig), + metav1.CreateOptions{}, + ) + Expect(err).NotTo(HaveOccurred()) + + By("instantiating the server pod") + serverPod, err := cs.CoreV1().Pods(serverPodConfig.namespace).Create( + context.Background(), + generatePodSpec(serverPodConfig), + metav1.CreateOptions{}, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(serverPod).NotTo(BeNil()) + + By("asserting the server pod reaches the `Ready` state") + Eventually(func() v1.PodPhase { + updatedPod, err := cs.CoreV1().Pods(f.Namespace.Name).Get(context.Background(), serverPod.GetName(), metav1.GetOptions{}) + if err != nil { + return v1.PodFailed + } + return updatedPod.Status.Phase + }, 2*time.Minute, 6*time.Second).Should(Equal(v1.PodRunning)) + + By("instantiating the *client* pod") + clientPod, err := cs.CoreV1().Pods(clientPodConfig.namespace).Create( + context.Background(), + generatePodSpec(clientPodConfig), + metav1.CreateOptions{}, + ) + Expect(err).NotTo(HaveOccurred()) + + By("asserting the client pod reaches the `Ready` state") + Eventually(func() v1.PodPhase { + updatedPod, err := cs.CoreV1().Pods(f.Namespace.Name).Get(context.Background(), clientPod.GetName(), metav1.GetOptions{}) + if err != nil { + return v1.PodFailed + } + return updatedPod.Status.Phase + }, 2*time.Minute, 6*time.Second).Should(Equal(v1.PodRunning)) + + serverIP := "" + for _, cidr := range strings.Split(netConfig.cidr, ",") { + if cidr != "" { + By("asserting the server pod has an IP from the configured range") + serverIP, err = podIPForUserDefinedPrimaryNetwork( + cs, + f.Namespace.Name, + serverPod.GetName(), + namespacedName(serverPod.Namespace, netConfig.name), + ) + Expect(err).NotTo(HaveOccurred()) + const netPrefixLengthPerNode = 24 + By(fmt.Sprintf("asserting the server pod IP %v is from the configured range %v/%v", serverIP, cidr, netPrefixLengthPerNode)) + subnet, err := getNetCIDRSubnet(cidr) + Expect(err).NotTo(HaveOccurred()) + Expect(inRange(subnet, serverIP)).To(Succeed()) + } + + By("asserting the *client* pod can contact the server pod exposed endpoint") + Eventually(func() error { + return reachToServerPodFromClient(cs, serverPodConfig, clientPodConfig, serverIP, port) + }, 2*time.Minute, 6*time.Second).Should(Succeed()) + } + }, + Entry( + "two pods connected over an IPv4 network", + networkAttachmentConfigParams{ + name: userDefinedNetworkName, + networkName: userDefinedNetworkName, + topology: "layer2", + cidr: userDefinedNetworkIPv4Subnet, + excludeCIDRs: []string{externalServiceIPv4IP + "/32"}, + primaryNetwork: true, + }, + *podConfig( + "client-pod", + withNodeSelector(map[string]string{nodeHostnameKey: workerOneNodeName}), + ), + *podConfig( + "server-pod", + withCommand(func() []string { + return httpServerContainerCmd(port) + }), + withNodeSelector(map[string]string{nodeHostnameKey: workerTwoNodeName}), + ), + ), + ) + }) }) + +type podOption func(*podConfiguration) + +func podConfig(podName string, opts ...podOption) *podConfiguration { + pod := &podConfiguration{ + name: podName, + } + for _, opt := range opts { + opt(pod) + } + return pod +} + +func withCommand(cmdGenerationFn func() []string) podOption { + return func(pod *podConfiguration) { + pod.containerCmd = cmdGenerationFn() + } +} + +func withNodeSelector(nodeSelector map[string]string) podOption { + return func(pod *podConfiguration) { + pod.nodeSelector = nodeSelector + } +} + +func podIPForUserDefinedPrimaryNetwork(k8sClient clientset.Interface, podNamespace string, podName string, attachmentName string) (string, error) { + pod, err := k8sClient.CoreV1().Pods(podNamespace).Get(context.Background(), podName, metav1.GetOptions{}) + if err != nil { + return "", err + } + netStatus, err := userDefinedNetworkStatus(pod, attachmentName) + if err != nil { + return "", err + } + + if len(netStatus.IPs) == 0 { + return "", fmt.Errorf("attachment for network %q without IPs", attachmentName) + } + if len(netStatus.IPs) > 1 { + return "", fmt.Errorf("attachment for network %q with more than one IP", attachmentName) + } + return netStatus.IPs[0].IP.String(), nil +} + +func userDefinedNetworkStatus(pod *v1.Pod, networkName string) (PodAnnotation, error) { + netStatus, err := unmarshalPodAnnotation(pod.Annotations, networkName) + if err != nil { + return PodAnnotation{}, fmt.Errorf("failed to unmarshall annotations for pod %q: %v", pod.Name, err) + } + + return *netStatus, nil +} diff --git a/test/e2e/util.go b/test/e2e/util.go index 3f920575f52..8c60a85b365 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -167,7 +167,7 @@ func newAnnotationNotSetError(format string, args ...interface{}) error { } // UnmarshalPodAnnotation returns the default network info from pod.Annotations -func unmarshalPodAnnotation(annotations map[string]string) (*PodAnnotation, error) { +func unmarshalPodAnnotation(annotations map[string]string, networkName string) (*PodAnnotation, error) { ovnAnnotation, ok := annotations[podNetworkAnnotation] if !ok { return nil, newAnnotationNotSetError("could not find OVN pod annotation in %v", annotations) @@ -178,7 +178,7 @@ func unmarshalPodAnnotation(annotations map[string]string) (*PodAnnotation, erro return nil, fmt.Errorf("failed to unmarshal ovn pod annotation %q: %v", ovnAnnotation, err) } - tempA := podNetworks["default"] + tempA := podNetworks[networkName] a := &tempA podAnnotation := &PodAnnotation{} From c0d160957ac3329cf78662e9febc7e5b8f9f95ac Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 20 Jun 2024 12:51:59 +0200 Subject: [PATCH 13/16] user-def-nets, node: create LSP for primary net Signed-off-by: Miguel Duarte Barroso --- .../pkg/ovn/base_network_controller_pods.go | 4 ++- .../ovn/base_network_controller_secondary.go | 34 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 79d95e371a6..f0b2d8b9a1e 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -538,8 +538,10 @@ func (bnc *BaseNetworkController) addLogicalPortToNetwork(pod *kapi.Pod, nadName // it for the default network as well. If at all possible, keep them // functionally equivalent going forward. var annotationUpdated bool - if bnc.IsSecondary() { + if bnc.IsSecondary() || bnc.IsPrimaryNetwork() { + klog.Infof("DEBUG| allocating logical port %s for pod %q on switch %s", portName, pod.Name, switchName) podAnnotation, annotationUpdated, err = bnc.allocatePodAnnotationForSecondaryNetwork(pod, existingLSP, nadName, network) + klog.Infof("DEBUG| err: %v; annotation: %v", err, podAnnotation) } else { podAnnotation, annotationUpdated, err = bnc.allocatePodAnnotation(pod, existingLSP, podDesc, nadName, network) } diff --git a/go-controller/pkg/ovn/base_network_controller_secondary.go b/go-controller/pkg/ovn/base_network_controller_secondary.go index d34f388a28b..6bfc71dddc1 100644 --- a/go-controller/pkg/ovn/base_network_controller_secondary.go +++ b/go-controller/pkg/ovn/base_network_controller_secondary.go @@ -235,6 +235,15 @@ func (bsnc *BaseSecondaryNetworkController) ensurePodForSecondaryNetwork(pod *ka return err } + if bsnc.IsPrimaryNetwork() && (bsnc.TopologyType() == "layer2" || bsnc.TopologyType() == "layer3") { + network := &nadapi.NetworkSelectionElement{ + // TODO: we need to get the NAD name here, not the network name. + Name: bsnc.GetNetworkName(), + Namespace: pod.Namespace, + } + return bsnc.ensurePodForUserDefinedPrimaryNet(pod, network) + } + on, networkMap, err := util.GetPodNADToNetworkMapping(pod, bsnc.NetInfo) if err != nil { // configuration error, no need to retry, do not return error @@ -680,3 +689,28 @@ func (oc *BaseSecondaryNetworkController) allowPersistentIPs() bool { util.DoesNetworkRequireIPAM(oc.NetInfo) && (oc.NetInfo.TopologyType() == types.Layer2Topology || oc.NetInfo.TopologyType() == types.LocalnetTopology) } + +func (bsnc *BaseSecondaryNetworkController) ensurePodForUserDefinedPrimaryNet(pod *kapi.Pod, network *nadapi.NetworkSelectionElement) error { + klog.Infof("DEBUG| invoking the port builder for the defined network") + switchName, err := bsnc.getExpectedSwitchName(pod) + if err != nil { + return err + } + + nadName := fmt.Sprintf("%s/%s", pod.Namespace, network.Name) + var errs []error + if err := bsnc.addLogicalPortToNetworkForNAD(pod, nadName, switchName, network); err != nil { + errs = append( + errs, + fmt.Errorf( + "failed to add logical port of Pod %s/%s for NAD %s: %w", + pod.Namespace, + pod.Name, + nadName, + err, + ), + ) + } + + return nil +} From b2ad9069f4914f3ca2d4b69df0114dcb7a2b77ab Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 20 Jun 2024 17:02:07 +0200 Subject: [PATCH 14/16] user-def-nets, cni: configure the additional net iface Signed-off-by: Miguel Duarte Barroso --- go-controller/pkg/cni/cni.go | 99 +++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/cni/cni.go b/go-controller/pkg/cni/cni.go index 5a9a1d6212e..6ac7ae34fd7 100644 --- a/go-controller/pkg/cni/cni.go +++ b/go-controller/pkg/cni/cni.go @@ -1,8 +1,12 @@ package cni import ( + "context" "fmt" + "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + types2 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni/types" "net" + "time" kapi "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" @@ -131,8 +135,39 @@ func (pr *PodRequest) cmdAdd(kubeAuth *KubeAPIAuth, clientset *ClientSet) (*Resp } // Get the IP address and MAC address of the pod // for DPU, ensure connection-details is present + + var ( + hasUserDefinedPrimaryNetwork bool + userDefinedPrimaryNetworkAnnotation *util.PodAnnotation + userDefinedNetworkName string + ) pod, annotations, podNADAnnotation, err := GetPodWithAnnotations(pr.ctx, clientset, namespace, podName, - pr.nadName, annotCondFn) + pr.nadName, func(annotations map[string]string, nadName string) (*util.PodAnnotation, bool) { + // TODO: we need to simplify this function. + annotation, isReady := annotCondFn(annotations, nadName) + if nadName == "default" { + if annotation.Primary { + return annotation, isReady // on the default network case we return here, so we don't impact too much + } + podNetworks, err := util.UnmarshalPodAnnotationAllNetworks(annotations) + if err != nil { + return nil, false + } + for netName, podNetwork := range podNetworks { + if podNetwork.Primary { + hasUserDefinedPrimaryNetwork = true + userDefinedNetworkName = netName + userDefinedPrimaryNetworkAnnotation, err = util.UnmarshalPodAnnotation(annotations, netName) + if err != nil { + return nil, false + } + return annotation, true + } + } + return nil, false + } + return annotation, isReady + }) if err != nil { return nil, fmt.Errorf("failed to get pod annotation: %v", err) } @@ -148,12 +183,45 @@ func (pr *PodRequest) cmdAdd(kubeAuth *KubeAPIAuth, clientset *ClientSet) (*Resp podInterfaceInfo.SkipIPConfig = kubevirt.IsPodLiveMigratable(pod) + var userDefinedNetPodInfo *PodInterfaceInfo + if hasUserDefinedPrimaryNetwork { + klog.Infof("About to plumb the user defined NET for %q on pod %q", userDefinedNetworkName, podName) + userDefinedNetPodInfo, err = PodAnnotation2PodInfo( + annotations, + userDefinedPrimaryNetworkAnnotation, + pr.PodUID, + "", + userDefinedNetworkName, // TODO: we need to find a way to have here access to the network name + userDefinedNetworkName, // TODO: we can have the NAD name from the annotations. But the network name is missing. maybe we can add that to the annotations + pr.CNIConf.MTU, + ) + if err != nil { + return nil, err + } + + } + response := &Response{KubeAuth: kubeAuth} if !config.UnprivilegedMode { response.Result, err = pr.getCNIResult(clientset, podInterfaceInfo) if err != nil { return nil, err } + if hasUserDefinedPrimaryNetwork && userDefinedNetPodInfo != nil { + klog.Infof("creating the user defined net request for %q on pod %q", userDefinedNetworkName, podName) + userDefinedRequest := newUserDefinedPrimaryNetworkPodRequest( + pr.Command, + pod, + pr.SandboxID, + pr.Netns, + "sdn1", // TODO: need to find a name here. This one is OK I guess + userDefinedNetworkName, + userDefinedNetPodInfo.NADName, + ) + if _, err := userDefinedRequest.getCNIResult(clientset, userDefinedNetPodInfo); err != nil { + return nil, err + } + } } else { response.PodIFInfo = podInterfaceInfo } @@ -329,3 +397,32 @@ func (pr *PodRequest) getCNIResult(getter PodInfoGetter, podInterfaceInfo *PodIn IPs: ips, }, nil } + +// TODO: if we're actually taking this route, maybe what we want to to build a REQ from the default network REQ since half of it can be copied (pod UID / netns path / ...) +func newUserDefinedPrimaryNetworkPodRequest( + cmd command, + pod *kapi.Pod, + containerID string, + netns string, + ifaceName string, + networkName string, + nadName string, +) *PodRequest { + req := &PodRequest{ + Command: cmd, + PodNamespace: pod.Namespace, + PodName: pod.Name, + PodUID: string(pod.UID), + SandboxID: containerID, + Netns: netns, + IfName: ifaceName, + CNIConf: &types2.NetConf{}, // TODO: I think we will really need to have the NAD's contents here ... + timestamp: time.Now(), + IsVFIO: false, + netName: networkName, + nadName: nadName, + deviceInfo: v1.DeviceInfo{}, + } + req.ctx, req.cancel = context.WithTimeout(context.Background(), 2*time.Minute) + return req +} From 707485d993f4d32d3bfa8ac8c30a3b85ee704bc6 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Thu, 20 Jun 2024 18:56:01 +0200 Subject: [PATCH 15/16] user-def-nets, control-plane: set default GW on the user defined net Signed-off-by: Miguel Duarte Barroso --- go-controller/pkg/clustermanager/pod/allocator.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/go-controller/pkg/clustermanager/pod/allocator.go b/go-controller/pkg/clustermanager/pod/allocator.go index 713e53c5deb..f280ea840b6 100644 --- a/go-controller/pkg/clustermanager/pod/allocator.go +++ b/go-controller/pkg/clustermanager/pod/allocator.go @@ -3,6 +3,7 @@ package pod import ( "fmt" "k8s.io/client-go/listers/core/v1" + "net" "sync" corev1 "k8s.io/api/core/v1" @@ -150,10 +151,17 @@ func (a *PodAllocator) reconcile(old, new *corev1.Pod, releaseFromAllocator bool activeNetworkName, ) if hasAnActiveNetwork && activeNetworkName != "default" { + var gws []net.IP + for _, networkCIDR := range a.netInfo.Subnets() { + subnetGW := util.GetNodeGatewayIfAddr(networkCIDR.CIDR).IP + gws = append(gws, subnetGW) + } + network := &nettypes.NetworkSelectionElement{ // TODO: below we need to have the NAD name, *not* the network name - Name: activeNetworkName, - Namespace: pod.Namespace, + Name: activeNetworkName, + Namespace: pod.Namespace, + GatewayRequest: gws, } nadName := fmt.Sprintf("%s/%s", pod.Namespace, activeNetworkName) if err := a.reconcileForNAD(old, new, nadName, network, releaseFromAllocator); err != nil { From cb92150078df72d78b5bcb66fa6f67ee074eb514 Mon Sep 17 00:00:00 2001 From: Miguel Duarte Barroso Date: Fri, 21 Jun 2024 12:25:51 +0200 Subject: [PATCH 16/16] e2e, user-defined-nets: add egress test In this test, we start a container running an HTTP service outside the Kubernetes cluster. We then start a client pod in the Kubernetes cluster, with a user defined primary network attachment, and try to access the service located outside the cluster from it. Signed-off-by: Miguel Duarte Barroso --- test/e2e/network_segmentation.go | 73 ++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index 205daa6034b..7227836256a 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -362,6 +362,75 @@ var _ = Describe("Network Segmentation", func() { ), ), ) + + Context("an HTTP service which is deployed outside the Kubernetes service", func() { + const externalContainerName = "ovn-k-egress-test-helper" + var externalIpv4 string + BeforeEach(func() { + externalIpv4, _ = createClusterExternalContainer( + externalContainerName, + "registry.k8s.io/e2e-test-images/agnhost:2.45", + runExternalContainerCmd(), + httpServerContainerCmd(port), + ) + + DeferCleanup(func() { + deleteClusterExternalContainer(externalContainerName) + }) + }) + + DescribeTable( + "can be accessed to from the pods running in the Kubernetes cluster", + func(netConfigParams networkAttachmentConfigParams, clientPodConfig podConfiguration) { + netConfig := newNetworkAttachmentConfig(netConfigParams) + + netConfig.namespace = f.Namespace.Name + clientPodConfig.namespace = f.Namespace.Name + + By("creating the attachment configuration") + _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Create( + context.Background(), + generateNAD(netConfig), + metav1.CreateOptions{}, + ) + Expect(err).NotTo(HaveOccurred()) + + By("instantiating the client pod") + clientPod, err := cs.CoreV1().Pods(clientPodConfig.namespace).Create( + context.Background(), + generatePodSpec(clientPodConfig), + metav1.CreateOptions{}, + ) + Expect(err).NotTo(HaveOccurred()) + Expect(clientPod).NotTo(BeNil()) + + By("asserting the client pod reaches the `Ready` state") + Eventually(func() v1.PodPhase { + updatedPod, err := cs.CoreV1().Pods(f.Namespace.Name).Get(context.Background(), clientPod.GetName(), metav1.GetOptions{}) + if err != nil { + return v1.PodFailed + } + return updatedPod.Status.Phase + }, 2*time.Minute, 6*time.Second).Should(Equal(v1.PodRunning)) + + By("asserting the *client* pod can contact the server located outside the cluster") + Eventually(func() error { + return connectToServer(clientPodConfig, externalIpv4, port) + }, 2*time.Minute, 6*time.Second).Should(Succeed()) + }, + Entry("by one pod with a single IPv4 address", + networkAttachmentConfigParams{ + name: userDefinedNetworkName, + networkName: userDefinedNetworkName, + topology: "layer2", + cidr: userDefinedNetworkIPv4Subnet, + excludeCIDRs: []string{externalServiceIPv4IP + "/32"}, + primaryNetwork: true, + }, + *podConfig("client-pod"), + ), + ) + }) }) }) @@ -416,3 +485,7 @@ func userDefinedNetworkStatus(pod *v1.Pod, networkName string) (PodAnnotation, e return *netStatus, nil } + +func runExternalContainerCmd() []string { + return []string{"--network", "kind"} +}