Skip to content

Commit

Permalink
controller: Migrate from ct zone UUID name to component name
Browse files Browse the repository at this point in the history
There are two scenarios that could cause unwanted
CT zone flush:

1) The SB DB is destroyed and recreated. The new
database will end up with different UUIDs for
various components.

2) Upgrade of existing SB DB to ovn-ic.
The components are the same as before, but scattered
between multiple SB DBs. This again leads to different
UUIDs in SB DB.

The CT zone name was based on datapath UUID which
causes flush when the UUID changes. Even if
the datapath is the same.

To prevent the unwanted flush migrate from UUID
to component name (LR/LS name). This allows
the CT zones to be stable across the before mentioned
scenarios.

For the migration to be "flush less" itself we need
to make sure to start the restoration process only
after controller is connected to the SB DB e.g. the
restoration can happen only during engine run and not
init as it was done previously.

Reported-at: https://bugzilla.redhat.com/2224199
Tested-by: Surya Seetharaman <sseethar@redhat.com>
Signed-off-by: Ales Musil <amusil@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Numan Siddique <numans@ovn.org>
  • Loading branch information
almusil authored and numansiddique committed Jul 26, 2023
1 parent f5638ee commit 5a1d82c
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 34 deletions.
106 changes: 89 additions & 17 deletions controller/ovn-controller.c
Original file line number Diff line number Diff line change
Expand Up @@ -744,8 +744,17 @@ update_ct_zones(const struct sset *local_lports,
HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
/* XXX Add method to limit zone assignment to logical router
* datapaths with NAT */
char *dnat = alloc_nat_zone_key(&ld->datapath->header_.uuid, "dnat");
char *snat = alloc_nat_zone_key(&ld->datapath->header_.uuid, "snat");
const char *name = smap_get(&ld->datapath->external_ids, "name");
if (!name) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' "
"skipping zone assignment.",
UUID_ARGS(&ld->datapath->header_.uuid));
continue;
}

char *dnat = alloc_nat_zone_key(name, "dnat");
char *snat = alloc_nat_zone_key(name, "snat");
sset_add(&all_users, dnat);
sset_add(&all_users, snat);

Expand Down Expand Up @@ -882,9 +891,66 @@ struct ed_type_ct_zones {
bool recomputed;
};

/* Replaces a UUID prefix from 'uuid_zone' (if any) with the
* corresponding Datapath_Binding.external_ids.name. Returns it
* as a new string that will be owned by the caller. */
static char *
ct_zone_name_from_uuid(const struct sbrec_datapath_binding_table *dp_table,
const char *uuid_zone)
{
struct uuid uuid;
if (!uuid_from_string_prefix(&uuid, uuid_zone)) {
return NULL;
}

const struct sbrec_datapath_binding *dp =
sbrec_datapath_binding_table_get_for_uuid(dp_table, &uuid);
if (!dp) {
return NULL;
}

const char *entity_name = smap_get(&dp->external_ids, "name");
if (!entity_name) {
return NULL;
}

return xasprintf("%s%s", entity_name, uuid_zone + UUID_LEN);
}

static void
ct_zone_restore(const struct sbrec_datapath_binding_table *dp_table,
struct ed_type_ct_zones *ct_zones_data, const char *name,
int zone)
{
VLOG_DBG("restoring ct zone %"PRId32" for '%s'", zone, name);

char *new_name = ct_zone_name_from_uuid(dp_table, name);
const char *current_name = name;
if (new_name) {
VLOG_DBG("ct zone %"PRId32" replace uuid name '%s' with '%s'",
zone, name, new_name);

/* Make sure we remove the uuid one in the next OvS DB commit without
* flush. */
add_pending_ct_zone_entry(&ct_zones_data->pending, CT_ZONE_DB_QUEUED,
zone, false, name);
/* Store the zone in OvS DB with name instead of uuid without flush.
* */
add_pending_ct_zone_entry(&ct_zones_data->pending, CT_ZONE_DB_QUEUED,
zone, true, new_name);
current_name = new_name;
}

simap_put(&ct_zones_data->current, current_name, zone);
bitmap_set1(ct_zones_data->bitmap, zone);

free(new_name);
}

static void
restore_ct_zones(const struct ovsrec_bridge_table *bridge_table,
const struct ovsrec_open_vswitch_table *ovs_table,
const struct sbrec_datapath_binding_table *dp_table,
struct ed_type_ct_zones *ct_zones_data)
{
memset(ct_zones_data->bitmap, 0, sizeof ct_zones_data->bitmap);
Expand All @@ -895,10 +961,8 @@ restore_ct_zones(const struct ovsrec_bridge_table *bridge_table,
struct ct_zone_pending_entry *ctpe = pending_node->data;

if (ctpe->add) {
VLOG_DBG("restoring ct zone %"PRId32" for '%s'", ctpe->zone,
pending_node->name);
bitmap_set1(ct_zones_data->bitmap, ctpe->zone);
simap_put(&ct_zones_data->current, pending_node->name, ctpe->zone);
ct_zone_restore(dp_table, ct_zones_data,
pending_node->name, ctpe->zone);
}
}

Expand Down Expand Up @@ -936,9 +1000,7 @@ restore_ct_zones(const struct ovsrec_bridge_table *bridge_table,
continue;
}

VLOG_DBG("restoring ct zone %"PRId32" for '%s'", zone, user);
bitmap_set1(ct_zones_data->bitmap, zone);
simap_put(&ct_zones_data->current, user, zone);
ct_zone_restore(dp_table, ct_zones_data, user, zone);
}
}

Expand Down Expand Up @@ -1089,6 +1151,10 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
ovsdb_idl_track_add_column(ovs_idl,
&ovsrec_flow_sample_collector_set_col_id);
mirror_register_ovs_idl(ovs_idl);
/* XXX: There is a potential bug in CT zone I-P node,
* the fact that we have to call recompute for the change of
* OVS.bridge.external_ids be reflected. Currently, we don't
* track that column which should be addressed in the future. */
}

#define SB_NODES \
Expand Down Expand Up @@ -2416,18 +2482,14 @@ port_groups_runtime_data_handler(struct engine_node *node, void *data)
}

static void *
en_ct_zones_init(struct engine_node *node, struct engine_arg *arg OVS_UNUSED)
en_ct_zones_init(struct engine_node *node OVS_UNUSED,
struct engine_arg *arg OVS_UNUSED)
{
struct ed_type_ct_zones *data = xzalloc(sizeof *data);
const struct ovsrec_open_vswitch_table *ovs_table =
EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node));
const struct ovsrec_bridge_table *bridge_table =
EN_OVSDB_GET(engine_get_input("OVS_bridge", node));

shash_init(&data->pending);
simap_init(&data->current);

restore_ct_zones(bridge_table, ovs_table, data);
return data;
}

Expand Down Expand Up @@ -2458,8 +2520,10 @@ en_ct_zones_run(struct engine_node *node, void *data)
EN_OVSDB_GET(engine_get_input("OVS_open_vswitch", node));
const struct ovsrec_bridge_table *bridge_table =
EN_OVSDB_GET(engine_get_input("OVS_bridge", node));
const struct sbrec_datapath_binding_table *dp_table =
EN_OVSDB_GET(engine_get_input("SB_datapath_binding", node));

restore_ct_zones(bridge_table, ovs_table, ct_zones_data);
restore_ct_zones(bridge_table, ovs_table, dp_table, ct_zones_data);
update_ct_zones(&rt_data->local_lports, &rt_data->local_datapaths,
&ct_zones_data->current, ct_zones_data->bitmap,
&ct_zones_data->pending);
Expand Down Expand Up @@ -2507,7 +2571,15 @@ ct_zones_datapath_binding_handler(struct engine_node *node, void *data)
/* Check if the requested snat zone has changed for the datapath
* or not. If so, then fall back to full recompute of
* ct_zone engine. */
char *snat_dp_zone_key = alloc_nat_zone_key(&dp->header_.uuid, "snat");
const char *name = smap_get(&dp->external_ids, "name");
if (!name) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"' skipping"
"zone check.", UUID_ARGS(&dp->header_.uuid));
continue;
}

char *snat_dp_zone_key = alloc_nat_zone_key(name, "snat");
struct simap_node *simap_node = simap_find(&ct_zones_data->current,
snat_dp_zone_key);
free(snat_dp_zone_key);
Expand Down
17 changes: 12 additions & 5 deletions controller/physical.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,17 +209,24 @@ static struct zone_ids
get_zone_ids(const struct sbrec_port_binding *binding,
const struct simap *ct_zones)
{
struct zone_ids zone_ids;
struct zone_ids zone_ids = {
.ct = simap_get(ct_zones, binding->logical_port)
};

zone_ids.ct = simap_get(ct_zones, binding->logical_port);
const char *name = smap_get(&binding->datapath->external_ids, "name");
if (!name) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
VLOG_ERR_RL(&rl, "Missing name for datapath '"UUID_FMT"'",
UUID_ARGS(&binding->datapath->header_.uuid));

const struct uuid *key = &binding->datapath->header_.uuid;
return zone_ids;
}

char *dnat = alloc_nat_zone_key(key, "dnat");
char *dnat = alloc_nat_zone_key(name, "dnat");
zone_ids.dnat = simap_get(ct_zones, dnat);
free(dnat);

char *snat = alloc_nat_zone_key(key, "snat");
char *snat = alloc_nat_zone_key(name, "snat");
zone_ids.snat = simap_get(ct_zones, snat);
free(snat);

Expand Down
4 changes: 2 additions & 2 deletions lib/ovn-util.c
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,9 @@ split_addresses(const char *addresses, struct svec *ipv4_addrs,
*
* It is the caller's responsibility to free the allocated memory. */
char *
alloc_nat_zone_key(const struct uuid *key, const char *type)
alloc_nat_zone_key(const char *name, const char *type)
{
return xasprintf(UUID_FMT"_%s", UUID_ARGS(key), type);
return xasprintf("%s_%s", name, type);
}

const char *
Expand Down
2 changes: 1 addition & 1 deletion lib/ovn-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ const char *find_lport_address(const struct lport_addresses *laddrs,
void split_addresses(const char *addresses, struct svec *ipv4_addrs,
struct svec *ipv6_addrs);

char *alloc_nat_zone_key(const struct uuid *key, const char *type);
char *alloc_nat_zone_key(const char *name, const char *type);

const char *default_nb_db(void);
const char *default_sb_db(void);
Expand Down
16 changes: 7 additions & 9 deletions tests/ovn-controller.at
Original file line number Diff line number Diff line change
Expand Up @@ -2446,8 +2446,7 @@ echo "$ct_zones"
port1_zone=$(get_zone_num "$ct_zones" ls0-hv1)
port2_zone=$(get_zone_num "$ct_zones" ls0-hv2)

lr_uuid=$(fetch_column Datapath_Binding _uuid external_ids:name=lr0)
snat_zone=$(get_zone_num "$ct_zones" ${lr_uuid}_snat)
snat_zone=$(get_zone_num "$ct_zones" lr0_snat)
echo "snat_zone is $snat_zone"

check test "$port1_zone" -ne "$port2_zone"
Expand All @@ -2456,7 +2455,7 @@ check test "$port1_zone" -ne "$snat_zone"

OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv1 $port1_zone])
OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv2 $port2_zone])
OVS_WAIT_UNTIL([check_ovsdb_zone ${lr_uuid}_snat $snat_zone])
OVS_WAIT_UNTIL([check_ovsdb_zone lr0_snat $snat_zone])

# Now purposely request an SNAT zone for lr0 that conflicts with a zone
# currently assigned to a logical port
Expand All @@ -2470,7 +2469,7 @@ echo "$ct_zones"

port1_zone=$(get_zone_num "$ct_zones" ls0-hv1)
port2_zone=$(get_zone_num "$ct_zones" ls0-hv2)
snat_zone=$(get_zone_num "$ct_zones" ${lr_uuid}_snat)
snat_zone=$(get_zone_num "$ct_zones" lr0_snat)

check test "$snat_zone" -eq "$snat_req_zone"
check test "$port1_zone" -ne "$port2_zone"
Expand All @@ -2479,7 +2478,7 @@ check test "$port1_zone" -ne "$snat_zone"

OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv1 $port1_zone])
OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv2 $port2_zone])
OVS_WAIT_UNTIL([check_ovsdb_zone ${lr_uuid}_snat $snat_zone])
OVS_WAIT_UNTIL([check_ovsdb_zone lr0_snat $snat_zone])

# Now create a conflict in the OVSDB and restart ovn-controller.

Expand All @@ -2493,7 +2492,7 @@ echo "$ct_zones"

port1_zone=$(get_zone_num "$ct_zones" ls0-hv1)
port2_zone=$(get_zone_num "$ct_zones" ls0-hv2)
snat_zone=$(get_zone_num "$ct_zones" ${lr_uuid}_snat)
snat_zone=$(get_zone_num "$ct_zones" lr0_snat)

check test "$snat_zone" -eq "$snat_req_zone"
check test "$port1_zone" -ne "$port2_zone"
Expand All @@ -2502,7 +2501,7 @@ check test "$port1_zone" -ne "$snat_zone"

OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv1 $port1_zone])
OVS_WAIT_UNTIL([check_ovsdb_zone ls0-hv2 $port2_zone])
OVS_WAIT_UNTIL([check_ovsdb_zone ${lr_uuid}_snat $snat_zone])
OVS_WAIT_UNTIL([check_ovsdb_zone lr0_snat $snat_zone])

OVN_CLEANUP([hv1])
AT_CLEANUP
Expand Down Expand Up @@ -2575,9 +2574,8 @@ check ovn-nbctl lrp-set-gateway-chassis lrp-gw hv1

check ovn-nbctl --wait=hv sync

lr_uuid=$(fetch_column Datapath_Binding _uuid external_ids:name=lr0)
ct_zones=$(ovn-appctl -t ovn-controller ct-zone-list)
zone_num=$(printf "$ct_zones" | grep ${lr_uuid}_snat | cut -d ' ' -f 2)
zone_num=$(printf "$ct_zones" | grep lr0_snat | cut -d ' ' -f 2)

check test "$zone_num" -eq 666

Expand Down
Loading

0 comments on commit 5a1d82c

Please sign in to comment.