Skip to content

Commit

Permalink
northd: Commit all traffic when there is stateful NAT/LB.
Browse files Browse the repository at this point in the history
Commit all traffic that is not already commit by either NAT or LB. This
ensures that the traffic is tracked, and we don't erroneously commit
reply traffic, or reply traffic is not marked as invalid.

To achieve the commit we need to perform lookup on every packet
that goes through LR pipeline whenever there is stateful NAT.

The SNAT lookup requires additional flag as the unSNAT is happening
in ingress pipeline and at that point we need to know if the packet
is reply or not. This is not required for DNAT, because unDNAT stage
happens in egress.

Signed-off-by: Ales Musil <amusil@redhat.com>
---
There is one failing system test with userspace datapath, that's due
to the recirculation limit that is being hit due to additional
lookups.
  • Loading branch information
almusil committed Nov 20, 2024
1 parent c00ba9f commit 2fe6258
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 94 deletions.
4 changes: 4 additions & 0 deletions include/ovn/logical-fields.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ enum mff_log_flags_bits {
MLF_LOCALNET_BIT = 15,
MLF_RX_FROM_TUNNEL_BIT = 16,
MLF_ICMP_SNAT_BIT = 17,
MLF_UNSNAT_NEW_BIT = 18,
};

/* MFF_LOG_FLAGS_REG flag assignments */
Expand Down Expand Up @@ -141,6 +142,9 @@ enum mff_log_flags {
MLF_RX_FROM_TUNNEL = (1 << MLF_RX_FROM_TUNNEL_BIT),

MLF_ICMP_SNAT = (1 << MLF_ICMP_SNAT_BIT),

/* Indicate that the packet didn't go through unSNAT. */
MLF_UNSNAT_NEW = (1 << MLF_UNSNAT_NEW_BIT),
};

/* OVN logical fields
Expand Down
4 changes: 4 additions & 0 deletions lib/logical-fields.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ ovn_init_symtab(struct shash *symtab)
flags_str);
snprintf(flags_str, sizeof flags_str, "flags[%d]", MLF_RX_FROM_TUNNEL_BIT);
expr_symtab_add_subfield(symtab, "flags.tunnel_rx", NULL, flags_str);
snprintf(flags_str, sizeof flags_str, "flags[%d]",
MLF_UNSNAT_NEW_BIT);
expr_symtab_add_subfield(symtab, "flags.unsnat_new", NULL,
flags_str);

/* Connection tracking state. */
expr_symtab_add_field_scoped(symtab, "ct_mark", MFF_CT_MARK, NULL, false,
Expand Down
80 changes: 46 additions & 34 deletions northd/northd.c
Original file line number Diff line number Diff line change
Expand Up @@ -16207,8 +16207,7 @@ build_lrouter_out_snat_flow(struct lflow_table *lflows,
struct ds *actions, bool distributed_nat,
struct eth_addr mac, int cidr_bits, bool is_v6,
struct ovn_port *l3dgw_port,
struct lflow_ref *lflow_ref,
const struct chassis_features *features)
struct lflow_ref *lflow_ref)
{
if (!(nat_entry->type == SNAT || nat_entry->type == DNAT_AND_SNAT)) {
return;
Expand Down Expand Up @@ -16239,34 +16238,6 @@ build_lrouter_out_snat_flow(struct lflow_table *lflows,
priority, ds_cstr(match),
ds_cstr(actions), &nat->header_,
lflow_ref);

/* For the SNAT networks, we need to make sure that connections are
* properly tracked so we can decide whether to perform SNAT on traffic
* exiting the network. */
if (features->ct_commit_to_zone && features->ct_next_zone &&
nat_entry->type == SNAT && !od->is_gw_router) {
/* For traffic that comes from SNAT network, initiate CT state before
* entering S_ROUTER_OUT_SNAT to allow matching on various CT states.
*/
ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_UNDNAT, 70,
ds_cstr(match), "ct_next(snat);",
lflow_ref);

build_lrouter_out_snat_match(lflows, od, nat, match,
distributed_nat, cidr_bits, is_v6,
l3dgw_port, lflow_ref, true);

/* New traffic that goes into SNAT network is committed to CT to avoid
* SNAT-ing replies.*/
ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, priority,
ds_cstr(match), "ct_snat;",
lflow_ref);

ds_put_cstr(match, " && ct.new");
ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_SNAT, priority,
ds_cstr(match), "ct_commit_to_zone(snat);",
lflow_ref);
}
}

static void
Expand Down Expand Up @@ -16548,6 +16519,8 @@ static void build_lr_nat_defrag_and_lb_default_flows(
/* Packets are allowed by default. */
ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 0, "1", "next;", lflow_ref);
ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;", lflow_ref);
ovn_lflow_add(lflows, od, S_ROUTER_IN_POST_UNSNAT, 0, "1", "next;",
lflow_ref);
ovn_lflow_add(lflows, od, S_ROUTER_OUT_CHECK_DNAT_LOCAL, 0, "1",
REGBIT_DST_NAT_IP_LOCAL" = 0; next;", lflow_ref);
ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;", lflow_ref);
Expand Down Expand Up @@ -16659,9 +16632,6 @@ build_lrouter_nat_defrag_and_lb(
ovn_lflow_add(lflows, od, S_ROUTER_OUT_UNDNAT, 50,
"ip", "flags.loopback = 1; ct_dnat;",
lflow_ref);
ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_UNDNAT, 50,
"ip && ct.new", "ct_commit { } ; next; ",
lflow_ref);
}

/* NAT rules are only valid on Gateway routers and routers with
Expand All @@ -16679,6 +16649,9 @@ build_lrouter_nat_defrag_and_lb(
!lport_addresses_is_empty(&lrnat_rec->dnat_force_snat_addrs);
bool lb_force_snat_ip =
!lport_addresses_is_empty(&lrnat_rec->lb_force_snat_addrs);
bool stateful_dnat = lr_stateful_rec->has_lb_vip;
bool stateful_snat = (dnat_force_snat_ip || lb_force_snat_ip ||
lrnat_rec->lb_force_snat_router_ip);

for (size_t i = 0; i < lrnat_rec->n_nat_entries; i++) {
struct ovn_nat *nat_entry = &lrnat_rec->nat_entries[i];
Expand All @@ -16697,6 +16670,21 @@ build_lrouter_nat_defrag_and_lb(
continue;
}

if (!stateless) {
switch (nat_entry->type) {
case DNAT:
stateful_dnat = true;
break;
case SNAT:
stateful_snat = true;
break;
case DNAT_AND_SNAT:
stateful_snat = true;
stateful_dnat = true;
break;
}
}

/* S_ROUTER_IN_UNSNAT
* Ingress UNSNAT table: It is for already established connections'
* reverse traffic. i.e., SNAT has already been done in egress
Expand Down Expand Up @@ -16819,7 +16807,7 @@ build_lrouter_nat_defrag_and_lb(
} else {
build_lrouter_out_snat_flow(lflows, od, nat_entry, match, actions,
distributed_nat, mac, cidr_bits, is_v6,
l3dgw_port, lflow_ref, features);
l3dgw_port, lflow_ref);
}

/* S_ROUTER_IN_ADMISSION - S_ROUTER_IN_IP_INPUT */
Expand Down Expand Up @@ -16909,6 +16897,30 @@ build_lrouter_nat_defrag_and_lb(
}
}


bool can_commit = features->ct_commit_to_zone && features->ct_next_zone;
if (can_commit && stateful_dnat) {
ovn_lflow_add(lflows, od, S_ROUTER_IN_DEFRAG, 10,
"ip && (!ct.trk || !ct.rpl)",
"ct_next(dnat);", lflow_ref);
ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 10,
"ip && ct.new", "ct_commit_to_zone(dnat);", lflow_ref);
}

if (can_commit && stateful_snat) {
/* XXX Add comment about the flag and why is it needed. */
ovn_lflow_add(lflows, od, S_ROUTER_IN_POST_UNSNAT, 10,
"ip && (!ct.trk || ct.new)",
"flags.unsnat_new = 1; next;", lflow_ref);
ovn_lflow_add(lflows, od, S_ROUTER_OUT_POST_UNDNAT, 10,
"ip && (!ct.trk || !ct.rpl) && "
"flags.unsnat_new == 1", "ct_next(snat);",
lflow_ref);
ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 10,
"ip && ct.new && flags.unsnat_new == 1",
"ct_commit_to_zone(snat);", lflow_ref);
}

if (use_common_zone && od->nbr->n_nat) {
ds_clear(match);
ds_put_cstr(match, "ip && ct_mark.natted == 1");
Expand Down
39 changes: 20 additions & 19 deletions northd/northd.h
Original file line number Diff line number Diff line change
Expand Up @@ -481,27 +481,28 @@ enum ovn_stage {
PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 3, "lr_in_ip_input") \
PIPELINE_STAGE(ROUTER, IN, DHCP_RELAY_REQ, 4, "lr_in_dhcp_relay_req") \
PIPELINE_STAGE(ROUTER, IN, UNSNAT, 5, "lr_in_unsnat") \
PIPELINE_STAGE(ROUTER, IN, DEFRAG, 6, "lr_in_defrag") \
PIPELINE_STAGE(ROUTER, IN, LB_AFF_CHECK, 7, "lr_in_lb_aff_check") \
PIPELINE_STAGE(ROUTER, IN, DNAT, 8, "lr_in_dnat") \
PIPELINE_STAGE(ROUTER, IN, LB_AFF_LEARN, 9, "lr_in_lb_aff_learn") \
PIPELINE_STAGE(ROUTER, IN, ECMP_STATEFUL, 10, "lr_in_ecmp_stateful") \
PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 11, "lr_in_nd_ra_options") \
PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 12, "lr_in_nd_ra_response") \
PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_PRE, 13, "lr_in_ip_routing_pre") \
PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 14, "lr_in_ip_routing") \
PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 15, "lr_in_ip_routing_ecmp") \
PIPELINE_STAGE(ROUTER, IN, POLICY, 16, "lr_in_policy") \
PIPELINE_STAGE(ROUTER, IN, POLICY_ECMP, 17, "lr_in_policy_ecmp") \
PIPELINE_STAGE(ROUTER, IN, DHCP_RELAY_RESP_CHK, 18, \
PIPELINE_STAGE(ROUTER, IN, POST_UNSNAT, 6, "lr_in_post_unsnat") \
PIPELINE_STAGE(ROUTER, IN, DEFRAG, 7, "lr_in_defrag") \
PIPELINE_STAGE(ROUTER, IN, LB_AFF_CHECK, 8, "lr_in_lb_aff_check") \
PIPELINE_STAGE(ROUTER, IN, DNAT, 9, "lr_in_dnat") \
PIPELINE_STAGE(ROUTER, IN, LB_AFF_LEARN, 10, "lr_in_lb_aff_learn") \
PIPELINE_STAGE(ROUTER, IN, ECMP_STATEFUL, 11, "lr_in_ecmp_stateful") \
PIPELINE_STAGE(ROUTER, IN, ND_RA_OPTIONS, 12, "lr_in_nd_ra_options") \
PIPELINE_STAGE(ROUTER, IN, ND_RA_RESPONSE, 13, "lr_in_nd_ra_response") \
PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_PRE, 14, "lr_in_ip_routing_pre") \
PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 15, "lr_in_ip_routing") \
PIPELINE_STAGE(ROUTER, IN, IP_ROUTING_ECMP, 16, "lr_in_ip_routing_ecmp") \
PIPELINE_STAGE(ROUTER, IN, POLICY, 17, "lr_in_policy") \
PIPELINE_STAGE(ROUTER, IN, POLICY_ECMP, 18, "lr_in_policy_ecmp") \
PIPELINE_STAGE(ROUTER, IN, DHCP_RELAY_RESP_CHK, 19, \
"lr_in_dhcp_relay_resp_chk") \
PIPELINE_STAGE(ROUTER, IN, DHCP_RELAY_RESP, 19, \
PIPELINE_STAGE(ROUTER, IN, DHCP_RELAY_RESP, 20, \
"lr_in_dhcp_relay_resp") \
PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 20, "lr_in_arp_resolve") \
PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN, 21, "lr_in_chk_pkt_len") \
PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 22, "lr_in_larger_pkts") \
PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 23, "lr_in_gw_redirect") \
PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 24, "lr_in_arp_request") \
PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 21, "lr_in_arp_resolve") \
PIPELINE_STAGE(ROUTER, IN, CHK_PKT_LEN, 22, "lr_in_chk_pkt_len") \
PIPELINE_STAGE(ROUTER, IN, LARGER_PKTS, 23, "lr_in_larger_pkts") \
PIPELINE_STAGE(ROUTER, IN, GW_REDIRECT, 24, "lr_in_gw_redirect") \
PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 25, "lr_in_arp_request") \
\
/* Logical router egress stages. */ \
PIPELINE_STAGE(ROUTER, OUT, CHECK_DNAT_LOCAL, 0, \
Expand Down
Loading

0 comments on commit 2fe6258

Please sign in to comment.