Re: [net-next PATCH v2] net: dsa: qca8k: pack driver struct and improve cache use
From: Vladimir Oltean
Date: Wed Mar 02 2022 - 20:53:42 EST
On Mon, Feb 28, 2022 at 12:04:08PM +0100, Ansuel Smith wrote:
> Pack qca8k priv and other struct using pahole and set the first priv
> struct entry to mgmt_master and mgmt_eth_data to speedup access.
> While at it also rework pcs struct and move it qca8k_ports_config
> following other configuration set for the cpu ports.
>
> Signed-off-by: Ansuel Smith <ansuelsmth@xxxxxxxxx>
> ---
How did you "pack" struct qca8k_priv exactly?
Before:
struct qca8k_priv {
u8 switch_id; /* 0 1 */
u8 switch_revision; /* 1 1 */
u8 mirror_rx; /* 2 1 */
u8 mirror_tx; /* 3 1 */
u8 lag_hash_mode; /* 4 1 */
bool legacy_phy_port_mapping; /* 5 1 */
struct qca8k_ports_config ports_config; /* 6 7 */
/* XXX 3 bytes hole, try to pack */
struct regmap * regmap; /* 16 8 */
struct mii_bus * bus; /* 24 8 */
struct ar8xxx_port_status port_sts[7]; /* 32 28 */
/* XXX 4 bytes hole, try to pack */
/* --- cacheline 1 boundary (64 bytes) --- */
struct dsa_switch * ds; /* 64 8 */
struct mutex reg_mutex; /* 72 160 */
/* --- cacheline 3 boundary (192 bytes) was 40 bytes ago --- */
struct device * dev; /* 232 8 */
struct dsa_switch_ops ops; /* 240 864 */
/* --- cacheline 17 boundary (1088 bytes) was 16 bytes ago --- */
struct gpio_desc * reset_gpio; /* 1104 8 */
unsigned int port_mtu[7]; /* 1112 28 */
/* XXX 4 bytes hole, try to pack */
struct net_device * mgmt_master; /* 1144 8 */
/* --- cacheline 18 boundary (1152 bytes) --- */
struct qca8k_mgmt_eth_data mgmt_eth_data; /* 1152 280 */
/* --- cacheline 22 boundary (1408 bytes) was 24 bytes ago --- */
struct qca8k_mib_eth_data mib_eth_data; /* 1432 272 */
/* --- cacheline 26 boundary (1664 bytes) was 40 bytes ago --- */
struct qca8k_mdio_cache mdio_cache; /* 1704 6 */
/* XXX 2 bytes hole, try to pack */
struct qca8k_pcs pcs_port_0; /* 1712 32 */
/* XXX last struct has 4 bytes of padding */
/* --- cacheline 27 boundary (1728 bytes) was 16 bytes ago --- */
struct qca8k_pcs pcs_port_6; /* 1744 32 */
/* XXX last struct has 4 bytes of padding */
/* size: 1776, cachelines: 28, members: 22 */
/* sum members: 1763, holes: 4, sum holes: 13 */
/* paddings: 2, sum paddings: 8 */
/* last cacheline: 48 bytes */
};
After:
struct qca8k_priv {
struct net_device * mgmt_master; /* 0 8 */
struct qca8k_mgmt_eth_data mgmt_eth_data; /* 8 280 */
/* --- cacheline 4 boundary (256 bytes) was 32 bytes ago --- */
struct qca8k_mdio_cache mdio_cache; /* 288 6 */
u8 switch_id; /* 294 1 */
u8 switch_revision; /* 295 1 */
u8 mirror_rx; /* 296 1 */
u8 mirror_tx; /* 297 1 */
u8 lag_hash_mode; /* 298 1 */
bool legacy_phy_port_mapping; /* 299 1 */
/* XXX 4 bytes hole, try to pack */
struct qca8k_ports_config ports_config; /* 304 72 */
/* --- cacheline 5 boundary (320 bytes) was 56 bytes ago --- */
struct regmap * regmap; /* 376 8 */
/* --- cacheline 6 boundary (384 bytes) --- */
struct mii_bus * bus; /* 384 8 */
struct ar8xxx_port_status port_sts[7]; /* 392 28 */
/* XXX 4 bytes hole, try to pack */
struct dsa_switch * ds; /* 424 8 */
struct mutex reg_mutex; /* 432 160 */
/* --- cacheline 9 boundary (576 bytes) was 16 bytes ago --- */
struct device * dev; /* 592 8 */
struct gpio_desc * reset_gpio; /* 600 8 */
struct dsa_switch_ops ops; /* 608 864 */
/* --- cacheline 23 boundary (1472 bytes) --- */
struct qca8k_mib_eth_data mib_eth_data; /* 1472 280 */
/* XXX last struct has 4 bytes of padding */
/* --- cacheline 27 boundary (1728 bytes) was 24 bytes ago --- */
unsigned int port_mtu[7]; /* 1752 28 */
/* size: 1784, cachelines: 28, members: 20 */
/* sum members: 1772, holes: 2, sum holes: 8 */
/* padding: 4 */
/* paddings: 1, sum paddings: 4 */
/* last cacheline: 56 bytes */
};
1776 vs 1784. That's... larger?!
Also, struct qca8k_priv is so large because the "ops" member is a full
copy of qca8k_switch_ops. I understand why commit db460c54b67f ("net:
dsa: qca8k: extend slave-bus implementations") did this, but I wonder,
is there no better way?
> drivers/net/dsa/qca8k.c | 8 ++++----
> drivers/net/dsa/qca8k.h | 33 ++++++++++++++++-----------------
> 2 files changed, 20 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
> index ee0dbf324268..8d059da5f0ca 100644
> --- a/drivers/net/dsa/qca8k.c
> +++ b/drivers/net/dsa/qca8k.c
> @@ -1685,11 +1685,11 @@ qca8k_phylink_mac_select_pcs(struct dsa_switch *ds, int port,
> case PHY_INTERFACE_MODE_1000BASEX:
> switch (port) {
> case 0:
> - pcs = &priv->pcs_port_0.pcs;
> + pcs = &priv->ports_config.qpcs[QCA8K_CPU_PORT0].pcs;
> break;
>
> case 6:
> - pcs = &priv->pcs_port_6.pcs;
> + pcs = &priv->ports_config.qpcs[QCA8K_CPU_PORT6].pcs;
> break;
> }
> break;
> @@ -2889,8 +2889,8 @@ qca8k_setup(struct dsa_switch *ds)
> if (ret)
> return ret;
>
> - qca8k_setup_pcs(priv, &priv->pcs_port_0, 0);
> - qca8k_setup_pcs(priv, &priv->pcs_port_6, 6);
> + qca8k_setup_pcs(priv, &priv->ports_config.qpcs[QCA8K_CPU_PORT0], 0);
> + qca8k_setup_pcs(priv, &priv->ports_config.qpcs[QCA8K_CPU_PORT6], 6);
>
> /* Make sure MAC06 is disabled */
> ret = regmap_clear_bits(priv->regmap, QCA8K_REG_PORT0_PAD_CTRL,
> diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
> index f375627174c8..611dc2335dbe 100644
> --- a/drivers/net/dsa/qca8k.h
> +++ b/drivers/net/dsa/qca8k.h
> @@ -341,18 +341,24 @@ enum {
>
> struct qca8k_mgmt_eth_data {
> struct completion rw_done;
> - struct mutex mutex; /* Enforce one mdio read/write at time */
> + u32 data[4];
> bool ack;
> u32 seq;
> - u32 data[4];
> + struct mutex mutex; /* Enforce one mdio read/write at time */
> };
>
> struct qca8k_mib_eth_data {
> struct completion rw_done;
> + u64 *data; /* pointer to ethtool data */
> + u8 req_port;
> struct mutex mutex; /* Process one command at time */
> refcount_t port_parsed; /* Counter to track parsed port */
> - u8 req_port;
> - u64 *data; /* pointer to ethtool data */
> +};
> +
> +struct qca8k_pcs {
> + struct phylink_pcs pcs;
> + struct qca8k_priv *priv;
> + int port;
> };
>
> struct qca8k_ports_config {
> @@ -361,6 +367,7 @@ struct qca8k_ports_config {
> bool sgmii_enable_pll;
> u8 rgmii_rx_delay[QCA8K_NUM_CPU_PORTS]; /* 0: CPU port0, 1: CPU port6 */
> u8 rgmii_tx_delay[QCA8K_NUM_CPU_PORTS]; /* 0: CPU port0, 1: CPU port6 */
> + struct qca8k_pcs qpcs[QCA8K_NUM_CPU_PORTS];
> };
>
> struct qca8k_mdio_cache {
> @@ -376,13 +383,10 @@ struct qca8k_mdio_cache {
> u16 hi;
> };
>
> -struct qca8k_pcs {
> - struct phylink_pcs pcs;
> - struct qca8k_priv *priv;
> - int port;
> -};
> -
> struct qca8k_priv {
> + struct net_device *mgmt_master; /* Track if mdio/mib Ethernet is available */
> + struct qca8k_mgmt_eth_data mgmt_eth_data;
> + struct qca8k_mdio_cache mdio_cache;
> u8 switch_id;
> u8 switch_revision;
> u8 mirror_rx;
> @@ -396,15 +400,10 @@ struct qca8k_priv {
> struct dsa_switch *ds;
> struct mutex reg_mutex;
> struct device *dev;
> - struct dsa_switch_ops ops;
> struct gpio_desc *reset_gpio;
> - unsigned int port_mtu[QCA8K_NUM_PORTS];
> - struct net_device *mgmt_master; /* Track if mdio/mib Ethernet is available */
> - struct qca8k_mgmt_eth_data mgmt_eth_data;
> + struct dsa_switch_ops ops;
> struct qca8k_mib_eth_data mib_eth_data;
> - struct qca8k_mdio_cache mdio_cache;
> - struct qca8k_pcs pcs_port_0;
> - struct qca8k_pcs pcs_port_6;
> + unsigned int port_mtu[QCA8K_NUM_PORTS];
> };
>
> struct qca8k_mib_desc {
> --
> 2.34.1
>