[PATCH v2 net-next 22/26] igb: add XDP generic per-channel statistics

From: Alexander Lobakin
Date: Tue Nov 23 2021 - 11:44:37 EST


Make igb driver collect and provide all generic XDP counters.
Unfortunately, igb has an unified ice_ring structure for both Rx
and Tx, so embedding xdp_drv_rx_stats would bloat it for no good.
Store XDP stats in a separate array with a lifetime of a netdev.
Unlike other Intel drivers, igb has no support for XSK, so we can't
use full xdp_drv_stats here. IGB_MAX_ALLOC_QUEUES is introduced
purely for convenience to not hardcode 16 twice more.
Reuse previously introduced helpers where possible. Performance
wavering from incrementing a bunch of counters on hotpath is around
stddev at [64 ... 1532] frame sizes.

Signed-off-by: Alexander Lobakin <alexandr.lobakin@xxxxxxxxx>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@xxxxxxxxx>
Reviewed-by: Michal Swiatkowski <michal.swiatkowski@xxxxxxxxxxxxxxx>
---
drivers/net/ethernet/intel/igb/igb.h | 14 ++-
drivers/net/ethernet/intel/igb/igb_main.c | 102 ++++++++++++++++++++--
2 files changed, 105 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 2d3daf022651..a6c5355b82fc 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -303,6 +303,11 @@ struct igb_rx_queue_stats {
u64 alloc_failed;
};

+struct igb_xdp_stats {
+ struct xdp_rx_drv_stats rx;
+ struct xdp_tx_drv_stats tx;
+} ____cacheline_aligned;
+
struct igb_ring_container {
struct igb_ring *ring; /* pointer to linked list of rings */
unsigned int total_bytes; /* total bytes processed this int */
@@ -356,6 +361,7 @@ struct igb_ring {
struct u64_stats_sync rx_syncp;
};
};
+ struct igb_xdp_stats *xdp_stats;
struct xdp_rxq_info xdp_rxq;
} ____cacheline_internodealigned_in_smp;

@@ -531,6 +537,8 @@ struct igb_mac_addr {
#define IGB_MAC_STATE_SRC_ADDR 0x4
#define IGB_MAC_STATE_QUEUE_STEERING 0x8

+#define IGB_MAX_ALLOC_QUEUES 16
+
/* board specific private data structure */
struct igb_adapter {
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
@@ -554,11 +562,11 @@ struct igb_adapter {
u16 tx_work_limit;
u32 tx_timeout_count;
int num_tx_queues;
- struct igb_ring *tx_ring[16];
+ struct igb_ring *tx_ring[IGB_MAX_ALLOC_QUEUES];

/* RX */
int num_rx_queues;
- struct igb_ring *rx_ring[16];
+ struct igb_ring *rx_ring[IGB_MAX_ALLOC_QUEUES];

u32 max_frame_size;
u32 min_frame_size;
@@ -664,6 +672,8 @@ struct igb_adapter {
struct igb_mac_addr *mac_table;
struct vf_mac_filter vf_macs;
struct vf_mac_filter *vf_mac_list;
+
+ struct igb_xdp_stats *xdp_stats;
};

/* flags controlling PTP/1588 function */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 18a019a47182..c4e1ea9bc4a8 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1266,6 +1266,7 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter,

u64_stats_init(&ring->tx_syncp);
u64_stats_init(&ring->tx_syncp2);
+ ring->xdp_stats = adapter->xdp_stats + txr_idx;

/* assign ring to adapter */
adapter->tx_ring[txr_idx] = ring;
@@ -1300,6 +1301,7 @@ static int igb_alloc_q_vector(struct igb_adapter *adapter,
ring->queue_index = rxr_idx;

u64_stats_init(&ring->rx_syncp);
+ ring->xdp_stats = adapter->xdp_stats + rxr_idx;

/* assign ring to adapter */
adapter->rx_ring[rxr_idx] = ring;
@@ -2973,6 +2975,9 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
nxmit++;
}

+ if (unlikely(nxmit < n))
+ xdp_update_tx_drv_err(&tx_ring->xdp_stats->tx, n - nxmit);
+
__netif_tx_unlock(nq);

if (unlikely(flags & XDP_XMIT_FLUSH))
@@ -2981,6 +2986,42 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
return nxmit;
}

+static int igb_get_xdp_stats_nch(const struct net_device *dev, u32 attr_id)
+{
+ switch (attr_id) {
+ case IFLA_XDP_XSTATS_TYPE_XDP:
+ return IGB_MAX_ALLOC_QUEUES;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int igb_get_xdp_stats(const struct net_device *dev, u32 attr_id,
+ void *attr_data)
+{
+ const struct igb_adapter *adapter = netdev_priv(dev);
+ const struct igb_xdp_stats *drv_iter = adapter->xdp_stats;
+ struct ifla_xdp_stats *iter = attr_data;
+ u32 i;
+
+ switch (attr_id) {
+ case IFLA_XDP_XSTATS_TYPE_XDP:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < IGB_MAX_ALLOC_QUEUES; i++) {
+ xdp_fetch_rx_drv_stats(iter, &drv_iter->rx);
+ xdp_fetch_tx_drv_stats(iter, &drv_iter->tx);
+
+ drv_iter++;
+ iter++;
+ }
+
+ return 0;
+}
+
static const struct net_device_ops igb_netdev_ops = {
.ndo_open = igb_open,
.ndo_stop = igb_close,
@@ -3007,6 +3048,8 @@ static const struct net_device_ops igb_netdev_ops = {
.ndo_setup_tc = igb_setup_tc,
.ndo_bpf = igb_xdp,
.ndo_xdp_xmit = igb_xdp_xmit,
+ .ndo_get_xdp_stats_nch = igb_get_xdp_stats_nch,
+ .ndo_get_xdp_stats = igb_get_xdp_stats,
};

/**
@@ -3620,6 +3663,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (hw->flash_address)
iounmap(hw->flash_address);
err_sw_init:
+ kfree(adapter->xdp_stats);
kfree(adapter->mac_table);
kfree(adapter->shadow_vfta);
igb_clear_interrupt_scheme(adapter);
@@ -3833,6 +3877,7 @@ static void igb_remove(struct pci_dev *pdev)
iounmap(hw->flash_address);
pci_release_mem_regions(pdev);

+ kfree(adapter->xdp_stats);
kfree(adapter->mac_table);
kfree(adapter->shadow_vfta);
free_netdev(netdev);
@@ -3962,6 +4007,7 @@ static int igb_sw_init(struct igb_adapter *adapter)
struct e1000_hw *hw = &adapter->hw;
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
+ u32 i;

pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);

@@ -4019,6 +4065,19 @@ static int igb_sw_init(struct igb_adapter *adapter)
if (!adapter->shadow_vfta)
return -ENOMEM;

+ adapter->xdp_stats = kcalloc(IGB_MAX_ALLOC_QUEUES,
+ sizeof(*adapter->xdp_stats),
+ GFP_KERNEL);
+ if (!adapter->xdp_stats)
+ return -ENOMEM;
+
+ for (i = 0; i < IGB_MAX_ALLOC_QUEUES; i++) {
+ struct igb_xdp_stats *xdp_stats = adapter->xdp_stats + i;
+
+ xdp_init_rx_drv_stats(&xdp_stats->rx);
+ xdp_init_tx_drv_stats(&xdp_stats->tx);
+ }
+
/* This call may decrease the number of queues */
if (igb_init_interrupt_scheme(adapter, true)) {
dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
@@ -6264,8 +6323,10 @@ int igb_xmit_xdp_ring(struct igb_adapter *adapter,

len = xdpf->len;

- if (unlikely(!igb_desc_unused(tx_ring)))
+ if (unlikely(!igb_desc_unused(tx_ring))) {
+ xdp_update_tx_drv_full(&tx_ring->xdp_stats->tx);
return IGB_XDP_CONSUMED;
+ }

dma = dma_map_single(tx_ring->dev, xdpf->data, len, DMA_TO_DEVICE);
if (dma_mapping_error(tx_ring->dev, dma))
@@ -8045,6 +8106,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
unsigned int total_bytes = 0, total_packets = 0;
unsigned int budget = q_vector->tx.work_limit;
unsigned int i = tx_ring->next_to_clean;
+ u32 xdp_packets = 0, xdp_bytes = 0;

if (test_bit(__IGB_DOWN, &adapter->state))
return true;
@@ -8075,10 +8137,13 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
total_packets += tx_buffer->gso_segs;

/* free the skb */
- if (tx_buffer->type == IGB_TYPE_SKB)
+ if (tx_buffer->type == IGB_TYPE_SKB) {
napi_consume_skb(tx_buffer->skb, napi_budget);
- else
+ } else {
xdp_return_frame(tx_buffer->xdpf);
+ xdp_bytes += tx_buffer->bytecount;
+ xdp_packets++;
+ }

/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -8135,6 +8200,8 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
tx_ring->tx_stats.bytes += total_bytes;
tx_ring->tx_stats.packets += total_packets;
u64_stats_update_end(&tx_ring->tx_syncp);
+ xdp_update_tx_drv_stats(&tx_ring->xdp_stats->tx, xdp_packets,
+ xdp_bytes);
q_vector->tx.total_bytes += total_bytes;
q_vector->tx.total_packets += total_packets;

@@ -8393,7 +8460,8 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,

static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
struct igb_ring *rx_ring,
- struct xdp_buff *xdp)
+ struct xdp_buff *xdp,
+ struct xdp_rx_drv_stats_local *lrstats)
{
int err, result = IGB_XDP_PASS;
struct bpf_prog *xdp_prog;
@@ -8404,32 +8472,46 @@ static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
if (!xdp_prog)
goto xdp_out;

+ lrstats->bytes += xdp->data_end - xdp->data;
+ lrstats->packets++;
+
prefetchw(xdp->data_hard_start); /* xdp_frame write */

act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
+ lrstats->pass++;
break;
case XDP_TX:
result = igb_xdp_xmit_back(adapter, xdp);
- if (result == IGB_XDP_CONSUMED)
+ if (result == IGB_XDP_CONSUMED) {
+ lrstats->tx_errors++;
goto out_failure;
+ }
+ lrstats->tx++;
break;
case XDP_REDIRECT:
err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
- if (err)
+ if (err) {
+ lrstats->redirect_errors++;
goto out_failure;
+ }
result = IGB_XDP_REDIR;
+ lrstats->redirect++;
break;
default:
bpf_warn_invalid_xdp_action(act);
- fallthrough;
+ lrstats->invalid++;
+ goto out_failure;
case XDP_ABORTED:
+ lrstats->aborted++;
out_failure:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
- fallthrough;
+ result = IGB_XDP_CONSUMED;
+ break;
case XDP_DROP:
result = IGB_XDP_CONSUMED;
+ lrstats->drop++;
break;
}
xdp_out:
@@ -8677,6 +8759,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
{
struct igb_adapter *adapter = q_vector->adapter;
struct igb_ring *rx_ring = q_vector->rx.ring;
+ struct xdp_rx_drv_stats_local lrstats = { };
struct sk_buff *skb = rx_ring->skb;
unsigned int total_bytes = 0, total_packets = 0;
u16 cleaned_count = igb_desc_unused(rx_ring);
@@ -8740,7 +8823,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size);
#endif
- skb = igb_run_xdp(adapter, rx_ring, &xdp);
+ skb = igb_run_xdp(adapter, rx_ring, &xdp, &lrstats);
}

if (IS_ERR(skb)) {
@@ -8814,6 +8897,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
rx_ring->rx_stats.packets += total_packets;
rx_ring->rx_stats.bytes += total_bytes;
u64_stats_update_end(&rx_ring->rx_syncp);
+ xdp_update_rx_drv_stats(&rx_ring->xdp_stats->rx, &lrstats);
q_vector->rx.total_packets += total_packets;
q_vector->rx.total_bytes += total_bytes;

--
2.33.1