[PATCH net-next 4/4] net: dsa: sja1105: implement management routes for cascaded switches

From: Vladimir Oltean
Date: Fri Sep 13 2024 - 09:17:22 EST


The SJA1105 management route concept was previously explained in commits
227d07a07ef1 ("net: dsa: sja1105: Add support for traffic through
standalone ports") and 0a51826c6e05 ("net: dsa: sja1105: Always send
through management routes in slot 0").

In a daisy chained topology with at least 2 switches, sending link-local
frames belonging to the downstream switch should program 2 management
routes: one on the upstream switch and one on the leaf switch. In the
general case, each switch along the TX path of the packet, starting from
the CPU, need a one-shot management route installed over SPI.

The driver currently does not handle this, but instead limits link-local
traffic support to a single switch, due to 2 major blockers:

1. There was no way up until now to calculate the path (the management
route itself) between the CPU and a leaf user port. Sure, we can start
with dp->cpu_dp and use dsa_routing_port() to figure out the cascade
port that targets the next switch. But we cannot make the jump from
one switch to the next. The dst->rtable is fundamentally flawed by
construction. It contains not only directly-connected link_dp entries,
but links to _all_ other cascade ports in the tree. For trees with 3
or more switches, this means that we don't know, by following
dst->rtable, if the link_dp that we pick is really one hop away, or
more than one hop away. So we might skip programming some switches
along the packet's path.

2. The current priv->mgmt_lock does not serialize enough code to work in
a cross-chip scenario. When sending a packet across a tree, we want
to block updates to the management route tables for all switches
along that path, not just for the leaf port (because link-local
traffic might be transmitted concurrently towards other ports).
Keeping this lock where it is (in struct sja1105_private, which is
per switch) will not work, because sja1105_port_deferred_xmit() would
have to acquire and then release N locks, and that's simply
impossible to do without risking AB/BA deadlocks.

To solve 1, recent changes have introduced struct dsa_port :: link_dp in
the DSA core, to make the hop-by-hop traversal of the DSA tree possible.
Using that information, we statically compute management routes for each
user port at switch setup time.

To solve 2, we go for the much more complex scheme of allocating a
tree-wide structure for managing the management routes, which holds a
single lock.

Signed-off-by: Vladimir Oltean <vladimir.oltean@xxxxxxx>
---
drivers/net/dsa/sja1105/sja1105.h | 43 ++++-
drivers/net/dsa/sja1105/sja1105_main.c | 253 ++++++++++++++++++++++---
2 files changed, 263 insertions(+), 33 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 8c66d3bf61f0..7753b4d62bc6 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -245,6 +245,43 @@ struct sja1105_flow_block {
int num_virtual_links;
};

+/**
+ * sja1105_mgmt_route_port: Representation of one port in a management route
+ * @dp: DSA user or cascade port
+ * @list: List node element for the mgmt_route->ports list membership
+ */
+struct sja1105_mgmt_route_port {
+ struct dsa_port *dp;
+ struct list_head list;
+};
+
+/**
+ * sja1105_mgmt_route: Structure to represent a SJA1105 management route
+ * @ports: List of ports on which the management route needs to be installed,
+ * starting with the downstream-facing cascade port of the switch which
+ * has the CPU connection, and ending with the user port of the leaf
+ * switch.
+ * @list: List node element for the mgmt_tree->routes list membership.
+ */
+struct sja1105_mgmt_route {
+ struct list_head ports;
+ struct list_head list;
+};
+
+/**
+ * sja1105_mgmt_tree: DSA switch tree-level structure for management routes
+ * @lock: Serializes transmission of management frames across the tree, so that
+ * the switches don't confuse them with one another.
+ * @routes: List of sja1105_mgmt_route structures, one for each user port in
+ * the tree.
+ * @refcount: Reference count.
+ */
+struct sja1105_mgmt_tree {
+ struct mutex lock;
+ struct list_head routes;
+ refcount_t refcount;
+};
+
struct sja1105_private {
struct sja1105_static_config static_config;
int rgmii_rx_delay_ps[SJA1105_MAX_NUM_PORTS];
@@ -259,13 +296,11 @@ struct sja1105_private {
size_t max_xfer_len;
struct spi_device *spidev;
struct dsa_switch *ds;
+ struct sja1105_mgmt_tree *mgmt_tree;
+ struct sja1105_mgmt_route **mgmt_routes;
u16 bridge_pvid[SJA1105_MAX_NUM_PORTS];
u16 tag_8021q_pvid[SJA1105_MAX_NUM_PORTS];
struct sja1105_flow_block flow_block;
- /* Serializes transmission of management frames so that
- * the switch doesn't confuse them with one another.
- */
- struct mutex mgmt_lock;
/* Serializes accesses to the FDB */
struct mutex fdb_lock;
/* PTP two-step TX timestamp ID, and its serialization lock */
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index bc7e50dcb57c..81e380ff8a56 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2302,8 +2302,8 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
int rc, i;
s64 now;

+ mutex_lock(&priv->mgmt_tree->lock);
mutex_lock(&priv->fdb_lock);
- mutex_lock(&priv->mgmt_lock);

mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries;

@@ -2414,8 +2414,8 @@ int sja1105_static_config_reload(struct sja1105_private *priv,
if (rc < 0)
goto out;
out:
- mutex_unlock(&priv->mgmt_lock);
mutex_unlock(&priv->fdb_lock);
+ mutex_unlock(&priv->mgmt_tree->lock);

return rc;
}
@@ -2668,39 +2668,41 @@ static int sja1105_prechangeupper(struct dsa_switch *ds, int port,
return 0;
}

-static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot,
- struct sk_buff *skb, bool takets)
+static int sja1105_mgmt_route_install(struct dsa_port *dp, const u8 *addr,
+ bool takets, int slot)
{
- struct sja1105_mgmt_entry mgmt_route = {0};
- struct sja1105_private *priv = ds->priv;
- struct ethhdr *hdr;
- int timeout = 10;
- int rc;
-
- hdr = eth_hdr(skb);
+ struct sja1105_mgmt_entry mgmt_route = {};
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;

- mgmt_route.macaddr = ether_addr_to_u64(hdr->h_dest);
+ mgmt_route.macaddr = ether_addr_to_u64(addr);
mgmt_route.destports = BIT(port);
mgmt_route.enfport = 1;
mgmt_route.tsreg = 0;
- mgmt_route.takets = takets;
+ /* Only the leaf port takes the TX timestamp, the cascade ports just
+ * route the packet towards the leaf switch
+ */
+ mgmt_route.takets = dsa_port_is_user(dp) ? takets : false;

- rc = sja1105_dynamic_config_write(priv, BLK_IDX_MGMT_ROUTE,
- slot, &mgmt_route, true);
- if (rc < 0) {
- kfree_skb(skb);
- return rc;
- }
+ return sja1105_dynamic_config_write(ds->priv, BLK_IDX_MGMT_ROUTE,
+ slot, &mgmt_route, true);
+}

- /* Transfer skb to the host port. */
- dsa_enqueue_skb(skb, dsa_to_port(ds, port)->user);
+static void sja1105_mgmt_route_poll(struct dsa_port *dp, int slot)
+{
+ struct sja1105_mgmt_entry mgmt_route = {};
+ struct dsa_switch *ds = dp->ds;
+ struct sja1105_private *priv;
+ int timeout = 10;
+ int rc;
+
+ priv = ds->priv;

- /* Wait until the switch has processed the frame */
do {
rc = sja1105_dynamic_config_read(priv, BLK_IDX_MGMT_ROUTE,
slot, &mgmt_route);
if (rc < 0) {
- dev_err_ratelimited(priv->ds->dev,
+ dev_err_ratelimited(ds->dev,
"failed to poll for mgmt route\n");
continue;
}
@@ -2720,8 +2722,36 @@ static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot,
*/
sja1105_dynamic_config_write(priv, BLK_IDX_MGMT_ROUTE,
slot, &mgmt_route, false);
- dev_err_ratelimited(priv->ds->dev, "xmit timed out\n");
+ dev_err_ratelimited(ds->dev, "xmit timed out\n");
}
+}
+
+static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot,
+ struct sk_buff *skb, bool takets)
+{
+ struct sja1105_mgmt_route_port *route_port;
+ struct sja1105_private *priv = ds->priv;
+ struct ethhdr *hdr = eth_hdr(skb);
+ struct sja1105_mgmt_route *route;
+ int rc;
+
+ route = priv->mgmt_routes[port];
+
+ list_for_each_entry(route_port, &route->ports, list) {
+ rc = sja1105_mgmt_route_install(route_port->dp, hdr->h_dest,
+ takets, slot);
+ if (rc) {
+ kfree_skb(skb);
+ return rc;
+ }
+ }
+
+ /* Transfer skb to the host port. */
+ dsa_enqueue_skb(skb, dsa_to_port(ds, port)->user);
+
+ /* Wait until the switches have processed the frame */
+ list_for_each_entry(route_port, &route->ports, list)
+ sja1105_mgmt_route_poll(route_port->dp, slot);

return NETDEV_TX_OK;
}
@@ -2743,7 +2773,7 @@ static void sja1105_port_deferred_xmit(struct kthread_work *work)

clone = SJA1105_SKB_CB(skb)->clone;

- mutex_lock(&priv->mgmt_lock);
+ mutex_lock(&priv->mgmt_tree->lock);

sja1105_mgmt_xmit(ds, port, 0, skb, !!clone);

@@ -2751,7 +2781,7 @@ static void sja1105_port_deferred_xmit(struct kthread_work *work)
if (clone)
sja1105_ptp_txtstamp_skb(ds, port, clone);

- mutex_unlock(&priv->mgmt_lock);
+ mutex_unlock(&priv->mgmt_tree->lock);

kfree(xmit_work);
}
@@ -3078,6 +3108,165 @@ static int sja1105_port_bridge_flags(struct dsa_switch *ds, int port,
return 0;
}

+static struct sja1105_mgmt_tree *sja1105_mgmt_tree_find(struct dsa_switch *ds)
+{
+ struct dsa_switch_tree *dst = ds->dst;
+ struct sja1105_private *priv;
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list) {
+ priv = dp->ds->priv;
+ if (priv->mgmt_tree)
+ return priv->mgmt_tree;
+ }
+
+ return NULL;
+}
+
+static struct sja1105_mgmt_tree *sja1105_mgmt_tree_get(struct dsa_switch *ds)
+{
+ struct sja1105_mgmt_tree *mgmt_tree = sja1105_mgmt_tree_find(ds);
+
+ if (mgmt_tree) {
+ refcount_inc(&mgmt_tree->refcount);
+ return mgmt_tree;
+ }
+
+ mgmt_tree = kzalloc(sizeof(*mgmt_tree), GFP_KERNEL);
+ if (!mgmt_tree)
+ return NULL;
+
+ INIT_LIST_HEAD(&mgmt_tree->routes);
+ refcount_set(&mgmt_tree->refcount, 1);
+ mutex_init(&mgmt_tree->lock);
+
+ return mgmt_tree;
+}
+
+static void sja1105_mgmt_tree_put(struct sja1105_mgmt_tree *mgmt_tree)
+{
+ if (!refcount_dec_and_test(&mgmt_tree->refcount))
+ return;
+
+ WARN_ON(!list_empty(&mgmt_tree->routes));
+ kfree(mgmt_tree);
+}
+
+static void sja1105_mgmt_route_destroy(struct sja1105_mgmt_route *mgmt_route)
+{
+ struct sja1105_mgmt_route_port *mgmt_route_port, *next;
+
+ list_for_each_entry_safe(mgmt_route_port, next, &mgmt_route->ports,
+ list) {
+ list_del(&mgmt_route_port->list);
+ kfree(mgmt_route_port);
+ }
+
+ kfree(mgmt_route);
+}
+
+static int sja1105_mgmt_route_create(struct dsa_port *dp)
+{
+ struct sja1105_mgmt_route_port *mgmt_route_port;
+ struct sja1105_mgmt_route *mgmt_route;
+ struct dsa_switch *ds = dp->ds;
+ struct sja1105_private *priv;
+ struct dsa_port *upstream_dp;
+ int upstream, rc;
+
+ priv = ds->priv;
+
+ mgmt_route = kzalloc(sizeof(*mgmt_route), GFP_KERNEL);
+ if (!mgmt_route)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&mgmt_route->ports);
+
+ priv->mgmt_routes[dp->index] = mgmt_route;
+
+ while (dp) {
+ mgmt_route_port = kzalloc(sizeof(*mgmt_route_port), GFP_KERNEL);
+ if (!mgmt_route_port) {
+ rc = -ENOMEM;
+ goto err_free_route;
+ }
+
+ mgmt_route_port->dp = dp;
+ list_add(&mgmt_route_port->list, &mgmt_route->ports);
+
+ upstream = dsa_upstream_port(dp->ds, dp->index);
+ upstream_dp = dsa_to_port(dp->ds, upstream);
+ if (dsa_port_is_cpu(upstream_dp))
+ break;
+
+ /* upstream_dp is a cascade port. Jump hop by hop towards the
+ * CPU port using the dp->link_dp adjacency information.
+ */
+ dp = upstream_dp->link_dp;
+ }
+
+ list_add_tail(&mgmt_route->list, &priv->mgmt_tree->routes);
+
+ return 0;
+
+err_free_route:
+ sja1105_mgmt_route_destroy(mgmt_route);
+
+ return rc;
+}
+
+static int sja1105_mgmt_setup(struct dsa_switch *ds)
+{
+ struct sja1105_private *priv = ds->priv;
+ struct dsa_port *dp;
+ int rc;
+
+ if (priv->info->tag_proto != DSA_TAG_PROTO_SJA1105)
+ return 0;
+
+ priv->mgmt_tree = sja1105_mgmt_tree_get(ds);
+ if (!priv->mgmt_tree)
+ return -ENOMEM;
+
+ priv->mgmt_routes = kcalloc(ds->num_ports, sizeof(*priv->mgmt_routes),
+ GFP_KERNEL);
+ if (!priv->mgmt_routes) {
+ rc = -ENOMEM;
+ goto err_put_tree;
+ }
+
+ dsa_switch_for_each_user_port(dp, ds) {
+ rc = sja1105_mgmt_route_create(dp);
+ if (rc)
+ goto err_destroy_routes;
+ }
+
+ return 0;
+
+err_destroy_routes:
+ dsa_switch_for_each_user_port_continue_reverse(dp, ds)
+ sja1105_mgmt_route_destroy(priv->mgmt_routes[dp->index]);
+err_put_tree:
+ sja1105_mgmt_tree_put(priv->mgmt_tree);
+
+ return rc;
+}
+
+static void sja1105_mgmt_teardown(struct dsa_switch *ds)
+{
+ struct sja1105_private *priv = ds->priv;
+ struct dsa_port *dp;
+
+ if (priv->info->tag_proto != DSA_TAG_PROTO_SJA1105)
+ return;
+
+ dsa_switch_for_each_user_port(dp, ds)
+ sja1105_mgmt_route_destroy(priv->mgmt_routes[dp->index]);
+
+ kfree(priv->mgmt_routes);
+ sja1105_mgmt_tree_put(priv->mgmt_tree);
+}
+
/* The programming model for the SJA1105 switch is "all-at-once" via static
* configuration tables. Some of these can be dynamically modified at runtime,
* but not the xMII mode parameters table.
@@ -3095,13 +3284,17 @@ static int sja1105_setup(struct dsa_switch *ds)
struct sja1105_private *priv = ds->priv;
int rc;

+ rc = sja1105_mgmt_setup(ds);
+ if (rc)
+ return rc;
+
if (priv->info->disable_microcontroller) {
rc = priv->info->disable_microcontroller(priv);
if (rc < 0) {
dev_err(ds->dev,
"Failed to disable microcontroller: %pe\n",
ERR_PTR(rc));
- return rc;
+ goto out_mgmt_teardown;
}
}

@@ -3109,7 +3302,7 @@ static int sja1105_setup(struct dsa_switch *ds)
rc = sja1105_static_config_load(priv);
if (rc < 0) {
dev_err(ds->dev, "Failed to load static config: %d\n", rc);
- return rc;
+ goto out_mgmt_teardown;
}

/* Configure the CGU (PHY link modes and speeds) */
@@ -3181,6 +3374,8 @@ static int sja1105_setup(struct dsa_switch *ds)
sja1105_tas_teardown(ds);
out_static_config_free:
sja1105_static_config_free(&priv->static_config);
+out_mgmt_teardown:
+ sja1105_mgmt_teardown(ds);

return rc;
}
@@ -3199,6 +3394,7 @@ static void sja1105_teardown(struct dsa_switch *ds)
sja1105_flower_teardown(ds);
sja1105_tas_teardown(ds);
sja1105_static_config_free(&priv->static_config);
+ sja1105_mgmt_teardown(ds);
}

static const struct phylink_mac_ops sja1105_phylink_mac_ops = {
@@ -3388,7 +3584,6 @@ static int sja1105_probe(struct spi_device *spi)

mutex_init(&priv->ptp_data.lock);
mutex_init(&priv->dynamic_config_lock);
- mutex_init(&priv->mgmt_lock);
mutex_init(&priv->fdb_lock);
spin_lock_init(&priv->ts_id_lock);

--
2.34.1