summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c10
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c1
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c11
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.c4
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c18
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h5
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c10
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_fp.c1
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c10
-rw-r--r--drivers/net/tun.c24
-rw-r--r--drivers/net/virtio_net.c14
-rw-r--r--include/linux/filter.h2
-rw-r--r--include/linux/netdevice.h2
-rw-r--r--include/net/xdp.h48
-rw-r--r--include/uapi/linux/bpf.h3
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c69
-rw-r--r--net/core/filter.c19
-rw-r--r--net/core/xdp.c73
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/xdp_rxq_info_kern.c96
-rw-r--r--samples/bpf/xdp_rxq_info_user.c531
36 files changed, 990 insertions, 28 deletions
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 9efbdc6f1fcb..89c3c8760a78 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2247,6 +2247,9 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
if (rxr->xdp_prog)
bpf_prog_put(rxr->xdp_prog);
+ if (xdp_rxq_info_is_reg(&rxr->xdp_rxq))
+ xdp_rxq_info_unreg(&rxr->xdp_rxq);
+
kfree(rxr->rx_tpa);
rxr->rx_tpa = NULL;
@@ -2280,6 +2283,10 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp)
ring = &rxr->rx_ring_struct;
+ rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i);
+ if (rc < 0)
+ return rc;
+
rc = bnxt_alloc_ring(bp, ring);
if (rc)
return rc;
@@ -2834,6 +2841,9 @@ void bnxt_set_ring_params(struct bnxt *bp)
bp->cp_ring_mask = bp->cp_bit - 1;
}
+/* Changing allocation mode of RX rings.
+ * TODO: Update when extending xdp_rxq_info to support allocation modes.
+ */
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
{
if (page_mode) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 5359a1f0045f..2d268fc26f5e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -23,6 +23,7 @@
#include <net/devlink.h>
#include <net/dst_metadata.h>
#include <net/switchdev.h>
+#include <net/xdp.h>
struct tx_bd {
__le32 tx_bd_len_flags_type;
@@ -664,6 +665,7 @@ struct bnxt_rx_ring_info {
struct bnxt_ring_struct rx_ring_struct;
struct bnxt_ring_struct rx_agg_ring_struct;
+ struct xdp_rxq_info xdp_rxq;
};
struct bnxt_cp_ring_info {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 261e5847557a..1389ab5e05df 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -96,6 +96,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
xdp.data = *data_ptr;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = *data_ptr + *len;
+ xdp.rxq = &rxr->xdp_rxq;
orig_data = xdp.data;
mapping = rx_buf->mapping - bp->rx_dma_offset;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 52b3a6044f85..21618d0d694f 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -521,7 +521,7 @@ static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr)
static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
- struct sk_buff **skb)
+ struct rcv_queue *rq, struct sk_buff **skb)
{
struct xdp_buff xdp;
struct page *page;
@@ -545,6 +545,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
xdp.data = (void *)cpu_addr;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
+ xdp.rxq = &rq->xdp_rxq;
orig_data = xdp.data;
rcu_read_lock();
@@ -698,7 +699,8 @@ static inline void nicvf_set_rxhash(struct net_device *netdev,
static void nicvf_rcv_pkt_handler(struct net_device *netdev,
struct napi_struct *napi,
- struct cqe_rx_t *cqe_rx, struct snd_queue *sq)
+ struct cqe_rx_t *cqe_rx,
+ struct snd_queue *sq, struct rcv_queue *rq)
{
struct sk_buff *skb = NULL;
struct nicvf *nic = netdev_priv(netdev);
@@ -724,7 +726,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
/* For XDP, ignore pkts spanning multiple pages */
if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) {
/* Packet consumed by XDP */
- if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb))
+ if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb))
return;
} else {
skb = nicvf_get_rcv_skb(snic, cqe_rx,
@@ -781,6 +783,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
struct cqe_rx_t *cq_desc;
struct netdev_queue *txq;
struct snd_queue *sq = &qs->sq[cq_idx];
+ struct rcv_queue *rq = &qs->rq[cq_idx];
unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx;
spin_lock_bh(&cq->lock);
@@ -811,7 +814,7 @@ loop:
switch (cq_desc->cqe_type) {
case CQE_TYPE_RX:
- nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq);
+ nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq);
work_done++;
break;
case CQE_TYPE_SEND:
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index f38ea349aa00..14e62c6ac342 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -760,6 +760,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
if (!rq->enable) {
nicvf_reclaim_rcv_queue(nic, qs, qidx);
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
return;
}
@@ -772,6 +773,9 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
/* all writes of RBDR data to be loaded into L2 Cache as well*/
rq->caching = 1;
+ /* Driver have no proper error path for failed XDP RX-queue info reg */
+ WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx) < 0);
+
/* Send a mailbox msg to PF to config RQ */
mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
mbx.rq.qs_num = qs->vnic_id;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 178ab6e8e3c5..7d1e4e2aaad0 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -12,6 +12,7 @@
#include <linux/netdevice.h>
#include <linux/iommu.h>
#include <linux/bpf.h>
+#include <net/xdp.h>
#include "q_struct.h"
#define MAX_QUEUE_SET 128
@@ -255,6 +256,7 @@ struct rcv_queue {
u8 start_qs_rbdr_idx; /* RBDR idx in the above QS */
u8 caching;
struct rx_tx_queue_stats stats;
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned_in_smp;
struct cmp_queue {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 5f6cf7212d4f..cfd788b4fd7a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1585,6 +1585,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
*/
rx_rings[i].desc = NULL;
rx_rings[i].rx_bi = NULL;
+ /* Clear cloned XDP RX-queue info before setup call */
+ memset(&rx_rings[i].xdp_rxq, 0, sizeof(rx_rings[i].xdp_rxq));
/* this is to allow wr32 to have something to write to
* during early allocation of Rx buffers
*/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 4566d66ffc7c..2a8a85e3ae8f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -27,6 +27,7 @@
#include <linux/prefetch.h>
#include <net/busy_poll.h>
#include <linux/bpf_trace.h>
+#include <net/xdp.h>
#include "i40e.h"
#include "i40e_trace.h"
#include "i40e_prototype.h"
@@ -1236,6 +1237,8 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
void i40e_free_rx_resources(struct i40e_ring *rx_ring)
{
i40e_clean_rx_ring(rx_ring);
+ if (rx_ring->vsi->type == I40E_VSI_MAIN)
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
rx_ring->xdp_prog = NULL;
kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL;
@@ -1256,6 +1259,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
{
struct device *dev = rx_ring->dev;
+ int err = -ENOMEM;
int bi_size;
/* warn if we are about to overwrite the pointer */
@@ -1283,13 +1287,21 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
+ /* XDP RX-queue info only needed for RX rings exposed to XDP */
+ if (rx_ring->vsi->type == I40E_VSI_MAIN) {
+ err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+ rx_ring->queue_index);
+ if (err < 0)
+ goto err;
+ }
+
rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
return 0;
err:
kfree(rx_ring->rx_bi);
rx_ring->rx_bi = NULL;
- return -ENOMEM;
+ return err;
}
/**
@@ -2068,11 +2080,13 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
struct sk_buff *skb = rx_ring->skb;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
bool failure = false, xdp_xmit = false;
+ struct xdp_buff xdp;
+
+ xdp.rxq = &rx_ring->xdp_rxq;
while (likely(total_rx_packets < (unsigned int)budget)) {
struct i40e_rx_buffer *rx_buffer;
union i40e_rx_desc *rx_desc;
- struct xdp_buff xdp;
unsigned int size;
u16 vlan_tag;
u8 rx_ptype;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index fbae1182e2ea..2d08760fc4ce 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -27,6 +27,8 @@
#ifndef _I40E_TXRX_H_
#define _I40E_TXRX_H_
+#include <net/xdp.h>
+
/* Interrupt Throttling and Rate Limiting Goodies */
#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */
@@ -428,6 +430,7 @@ struct i40e_ring {
*/
struct i40e_channel *ch;
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 468c3555a629..8611763d6129 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -53,6 +53,7 @@
#include <linux/dca.h>
#endif
+#include <net/xdp.h>
#include <net/busy_poll.h>
/* common prefix used by pr_<> macros */
@@ -371,6 +372,7 @@ struct ixgbe_ring {
struct ixgbe_tx_queue_stats tx_stats;
struct ixgbe_rx_queue_stats rx_stats;
};
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_internodealigned_in_smp;
enum ixgbe_ring_f_enum {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 0aad1c2a3667..0aaf70b3cfcd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1156,6 +1156,10 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
memcpy(&temp_ring[i], adapter->rx_ring[i],
sizeof(struct ixgbe_ring));
+ /* Clear copied XDP RX-queue info */
+ memset(&temp_ring[i].xdp_rxq, 0,
+ sizeof(temp_ring[i].xdp_rxq));
+
temp_ring[i].count = new_rx_count;
err = ixgbe_setup_rx_resources(adapter, &temp_ring[i]);
if (err) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 7737a05c717c..95aba975b391 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2318,12 +2318,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
#endif /* IXGBE_FCOE */
u16 cleaned_count = ixgbe_desc_unused(rx_ring);
bool xdp_xmit = false;
+ struct xdp_buff xdp;
+
+ xdp.rxq = &rx_ring->xdp_rxq;
while (likely(total_rx_packets < budget)) {
union ixgbe_adv_rx_desc *rx_desc;
struct ixgbe_rx_buffer *rx_buffer;
struct sk_buff *skb;
- struct xdp_buff xdp;
unsigned int size;
/* return some buffers to hardware, one at a time is too slow */
@@ -6444,6 +6446,11 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
+ /* XDP RX-queue info */
+ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
+ rx_ring->queue_index) < 0)
+ goto err;
+
rx_ring->xdp_prog = adapter->xdp_prog;
return 0;
@@ -6541,6 +6548,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
ixgbe_clean_rx_ring(rx_ring);
rx_ring->xdp_prog = NULL;
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 21bc17fa3854..8fc51bc29003 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2172,8 +2172,9 @@ static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
prof->rx_ring_size, priv->stride,
- node))
+ node, i))
goto err;
+
}
#ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5f9dbc9a7f5b..b4d144e67514 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -262,7 +262,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev)
int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring **pring,
- u32 size, u16 stride, int node)
+ u32 size, u16 stride, int node, int queue_index)
{
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_rx_ring *ring;
@@ -286,6 +286,9 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
ring->log_stride = ffs(ring->stride) - 1;
ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
+ if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index) < 0)
+ goto err_ring;
+
tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
sizeof(struct mlx4_en_rx_alloc));
ring->rx_info = vzalloc_node(tmp, node);
@@ -293,7 +296,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
ring->rx_info = vzalloc(tmp);
if (!ring->rx_info) {
err = -ENOMEM;
- goto err_ring;
+ goto err_xdp_info;
}
}
@@ -317,6 +320,8 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
err_info:
vfree(ring->rx_info);
ring->rx_info = NULL;
+err_xdp_info:
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
err_ring:
kfree(ring);
*pring = NULL;
@@ -440,6 +445,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
lockdep_is_held(&mdev->state_lock));
if (old_prog)
bpf_prog_put(old_prog);
+ xdp_rxq_info_unreg(&ring->xdp_rxq);
mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
vfree(ring->rx_info);
ring->rx_info = NULL;
@@ -652,6 +658,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
int cq_ring = cq->ring;
bool doorbell_pending;
struct mlx4_cqe *cqe;
+ struct xdp_buff xdp;
int polled = 0;
int index;
@@ -666,6 +673,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
rcu_read_lock();
xdp_prog = rcu_dereference(ring->xdp_prog);
+ xdp.rxq = &ring->xdp_rxq;
doorbell_pending = 0;
/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
@@ -750,7 +758,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
* read bytes but not past the end of the frag.
*/
if (xdp_prog) {
- struct xdp_buff xdp;
dma_addr_t dma;
void *orig_data;
u32 act;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 7db3d0d9bfce..f470ae37d937 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -46,6 +46,7 @@
#endif
#include <linux/cpu_rmap.h>
#include <linux/ptp_clock_kernel.h>
+#include <net/xdp.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/qp.h>
@@ -356,6 +357,7 @@ struct mlx4_en_rx_ring {
unsigned long dropped;
int hwtstamp_rx_filter;
cpumask_var_t affinity_mask;
+ struct xdp_rxq_info xdp_rxq;
};
struct mlx4_en_cq {
@@ -720,7 +722,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev);
void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv);
int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring **pring,
- u32 size, u16 stride, int node);
+ u32 size, u16 stride, int node, int queue_index);
void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring **pring,
u32 size, u16 stride);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 543060c305a0..5299310f2481 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -46,6 +46,7 @@
#include <linux/mlx5/transobj.h>
#include <linux/rhashtable.h>
#include <net/switchdev.h>
+#include <net/xdp.h>
#include "wq.h"
#include "mlx5_core.h"
#include "en_stats.h"
@@ -571,6 +572,9 @@ struct mlx5e_rq {
u32 rqn;
struct mlx5_core_dev *mdev;
struct mlx5_core_mkey umr_mkey;
+
+ /* XDP read-mostly */
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned_in_smp;
struct mlx5e_channel {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3aa1c90e7c86..539bd1d24396 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -582,6 +582,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
goto err_rq_wq_destroy;
}
+ if (xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix) < 0)
+ goto err_rq_wq_destroy;
+
rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
rq->buff.headroom = params->rq_headroom;
@@ -687,6 +690,7 @@ err_destroy_umr_mkey:
err_rq_wq_destroy:
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
mlx5_wq_destroy(&rq->wq_ctrl);
return err;
@@ -699,6 +703,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
if (rq->xdp_prog)
bpf_prog_put(rq->xdp_prog);
+ xdp_rxq_info_unreg(&rq->xdp_rxq);
+
switch (rq->wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
mlx5e_rq_free_mpwqe_info(rq);
@@ -2766,6 +2772,9 @@ static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
if (err)
return err;
+ /* Mark as unused given "Drop-RQ" packets never reach XDP */
+ xdp_rxq_info_unused(&rq->xdp_rxq);
+
rq->mdev = mdev;
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 5b499c7a698f..7b38480811d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -812,6 +812,7 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + *len;
xdp.data_hard_start = va;
+ xdp.rxq = &rq->xdp_rxq;
act = bpf_prog_run_xdp(prog, &xdp);
switch (act) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 3801c52098d5..0e564cfabe7e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -47,6 +47,7 @@
#include <linux/netdevice.h>
#include <linux/pci.h>
#include <linux/io-64-nonatomic-hi-lo.h>
+#include <net/xdp.h>
#include "nfp_net_ctrl.h"
@@ -350,6 +351,7 @@ struct nfp_net_rx_buf {
* @rxds: Virtual address of FL/RX ring in host memory
* @dma: DMA address of the FL/RX ring
* @size: Size, in bytes, of the FL/RX ring (needed to free)
+ * @xdp_rxq: RX-ring info avail for XDP
*/
struct nfp_net_rx_ring {
struct nfp_net_r_vector *r_vec;
@@ -361,13 +363,14 @@ struct nfp_net_rx_ring {
u32 idx;
int fl_qcidx;
+ unsigned int size;
u8 __iomem *qcp_fl;
struct nfp_net_rx_buf *rxbufs;
struct nfp_net_rx_desc *rxds;
dma_addr_t dma;
- unsigned int size;
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned;
/**
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 0add4870ce2e..45b8cae937be 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1608,11 +1608,13 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
unsigned int true_bufsz;
struct sk_buff *skb;
int pkts_polled = 0;
+ struct xdp_buff xdp;
int idx;
rcu_read_lock();
xdp_prog = READ_ONCE(dp->xdp_prog);
true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
+ xdp.rxq = &rx_ring->xdp_rxq;
tx_ring = r_vec->xdp_ring;
while (pkts_polled < budget) {
@@ -1703,7 +1705,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
dp->bpf_offload_xdp) && !meta.portid) {
void *orig_data = rxbuf->frag + pkt_off;
unsigned int dma_off;
- struct xdp_buff xdp;
int act;
xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
@@ -2252,6 +2253,7 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
kfree(rx_ring->rxbufs);
if (rx_ring->rxds)
@@ -2275,7 +2277,11 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
static int
nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
{
- int sz;
+ int sz, err;
+
+ err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, rx_ring->idx);
+ if (err < 0)
+ return err;
rx_ring->cnt = dp->rxd_cnt;
rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 8a336517baac..8116cfd30fad 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -40,6 +40,7 @@
#include <linux/kernel.h>
#include <linux/mutex.h>
#include <linux/bpf.h>
+#include <net/xdp.h>
#include <linux/qed/qede_rdma.h>
#include <linux/io.h>
#ifdef CONFIG_RFS_ACCEL
@@ -345,6 +346,7 @@ struct qede_rx_queue {
u64 xdp_no_pass;
void *handle;
+ struct xdp_rxq_info xdp_rxq;
};
union db_prod {
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 48ec4c56cddf..dafc079ab6b9 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1006,6 +1006,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
xdp.data = xdp.data_hard_start + *data_offset;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + *len;
+ xdp.rxq = &rxq->xdp_rxq;
/* Queues always have a full reset currently, so for the time
* being until there's atomic program replace just mark read
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 90d79ae2a48f..9929b4370ce6 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -765,6 +765,12 @@ static void qede_free_fp_array(struct qede_dev *edev)
fp = &edev->fp_array[i];
kfree(fp->sb_info);
+ /* Handle mem alloc failure case where qede_init_fp
+ * didn't register xdp_rxq_info yet.
+ * Implicit only (fp->type & QEDE_FASTPATH_RX)
+ */
+ if (fp->rxq && xdp_rxq_info_is_reg(&fp->rxq->xdp_rxq))
+ xdp_rxq_info_unreg(&fp->rxq->xdp_rxq);
kfree(fp->rxq);
kfree(fp->xdp_tx);
kfree(fp->txq);
@@ -1493,6 +1499,10 @@ static void qede_init_fp(struct qede_dev *edev)
else
fp->rxq->data_direction = DMA_FROM_DEVICE;
fp->rxq->dev = &edev->pdev->dev;
+
+ /* Driver have no error path from here */
+ WARN_ON(xdp_rxq_info_reg(&fp->rxq->xdp_rxq, edev->ndev,
+ fp->rxq->rxq_id) < 0);
}
if (fp->type & QEDE_FASTPATH_TX) {
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e367d6310353..e7c5f4b2a9a6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -180,6 +180,7 @@ struct tun_file {
struct list_head next;
struct tun_struct *detached;
struct skb_array tx_array;
+ struct xdp_rxq_info xdp_rxq;
};
struct tun_flow_entry {
@@ -687,8 +688,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun->dev->reg_state == NETREG_REGISTERED)
unregister_netdevice(tun->dev);
}
- if (tun)
+ if (tun) {
skb_array_cleanup(&tfile->tx_array);
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
+ }
sock_put(&tfile->sk);
}
}
@@ -728,11 +731,13 @@ static void tun_detach_all(struct net_device *dev)
tun_napi_del(tun, tfile);
/* Drop read queue */
tun_queue_purge(tfile);
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
sock_put(&tfile->sk);
}
list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
tun_enable_queue(tfile);
tun_queue_purge(tfile);
+ xdp_rxq_info_unreg(&tfile->xdp_rxq);
sock_put(&tfile->sk);
}
BUG_ON(tun->numdisabled != 0);
@@ -784,6 +789,22 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
tfile->queue_index = tun->numqueues;
tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
+
+ if (tfile->detached) {
+ /* Re-attach detached tfile, updating XDP queue_index */
+ WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq));
+
+ if (tfile->xdp_rxq.queue_index != tfile->queue_index)
+ tfile->xdp_rxq.queue_index = tfile->queue_index;
+ } else {
+ /* Setup XDP RX-queue info, for new tfile getting attached */
+ err = xdp_rxq_info_reg(&tfile->xdp_rxq,
+ tun->dev, tfile->queue_index);
+ if (err < 0)
+ goto out;
+ err = 0;
+ }
+
rcu_assign_pointer(tfile->tun, tun);
rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
tun->numqueues++;
@@ -1508,6 +1529,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
xdp.data = buf + pad;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
+ xdp.rxq = &tfile->xdp_rxq;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6fb7b658a6cc..ed8299343728 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -31,6 +31,7 @@
#include <linux/average.h>
#include <linux/filter.h>
#include <net/route.h>
+#include <net/xdp.h>
static int napi_weight = NAPI_POLL_WEIGHT;
module_param(napi_weight, int, 0444);
@@ -115,6 +116,8 @@ struct receive_queue {
/* Name of this receive queue: input.$index */
char name[40];
+
+ struct xdp_rxq_info xdp_rxq;
};
struct virtnet_info {
@@ -559,6 +562,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdp.data = xdp.data_hard_start + xdp_headroom;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
+ xdp.rxq = &rq->xdp_rxq;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -692,6 +696,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
xdp.data = data + vi->hdr_len;
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + (len - vi->hdr_len);
+ xdp.rxq = &rq->xdp_rxq;
+
act = bpf_prog_run_xdp(xdp_prog, &xdp);
if (act != XDP_PASS)
@@ -1225,13 +1231,18 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
- int i;
+ int i, err;
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
/* Make sure we have some buffers: if oom use wq. */
if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
+
+ err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i);
+ if (err < 0)
+ return err;
+
virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
}
@@ -1560,6 +1571,7 @@ static int virtnet_close(struct net_device *dev)
cancel_delayed_work_sync(&vi->refill);
for (i = 0; i < vi->max_queue_pairs; i++) {
+ xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
napi_disable(&vi->rq[i].napi);
virtnet_napi_tx_disable(&vi->sq[i].napi);
}
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 2b0df2703671..425056c7f96c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -20,6 +20,7 @@
#include <linux/set_memory.h>
#include <linux/kallsyms.h>
+#include <net/xdp.h>
#include <net/sch_generic.h>
#include <uapi/linux/filter.h>
@@ -503,6 +504,7 @@ struct xdp_buff {
void *data_end;
void *data_meta;
void *data_hard_start;
+ struct xdp_rxq_info *rxq;
};
/* Compute the linear packet data range [data, data_end) which
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 49bfc6eec74c..440b000f07f4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -44,6 +44,7 @@
#include <net/dcbnl.h>
#endif
#include <net/netprio_cgroup.h>
+#include <net/xdp.h>
#include <linux/netdev_features.h>
#include <linux/neighbour.h>
@@ -686,6 +687,7 @@ struct netdev_rx_queue {
#endif
struct kobject kobj;
struct net_device *dev;
+ struct xdp_rxq_info xdp_rxq;
} ____cacheline_aligned_in_smp;
/*
diff --git a/include/net/xdp.h b/include/net/xdp.h
new file mode 100644
index 000000000000..b2362ddfa694
--- /dev/null
+++ b/include/net/xdp.h
@@ -0,0 +1,48 @@
+/* include/net/xdp.h
+ *
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * Released under terms in GPL version 2. See COPYING.
+ */
+#ifndef __LINUX_NET_XDP_H__
+#define __LINUX_NET_XDP_H__
+
+/**
+ * DOC: XDP RX-queue information
+ *
+ * The XDP RX-queue info (xdp_rxq_info) is associated with the driver
+ * level RX-ring queues. It is information that is specific to how
+ * the driver have configured a given RX-ring queue.
+ *
+ * Each xdp_buff frame received in the driver carry a (pointer)
+ * reference to this xdp_rxq_info structure. This provides the XDP
+ * data-path read-access to RX-info for both kernel and bpf-side
+ * (limited subset).
+ *
+ * For now, direct access is only safe while running in NAPI/softirq
+ * context. Contents is read-mostly and must not be updated during
+ * driver NAPI/softirq poll.
+ *
+ * The driver usage API is a register and unregister API.
+ *
+ * The struct is not directly tied to the XDP prog. A new XDP prog
+ * can be attached as long as it doesn't change the underlying
+ * RX-ring. If the RX-ring does change significantly, the NIC driver
+ * naturally need to stop the RX-ring before purging and reallocating
+ * memory. In that process the driver MUST call unregistor (which
+ * also apply for driver shutdown and unload). The register API is
+ * also mandatory during RX-ring setup.
+ */
+
+struct xdp_rxq_info {
+ struct net_device *dev;
+ u32 queue_index;
+ u32 reg_state;
+} ____cacheline_aligned; /* perf critical, avoid false-sharing */
+
+int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index);
+void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
+void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
+bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
+
+#endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f2f8b36e2ad4..405317f9c064 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -899,6 +899,9 @@ struct xdp_md {
__u32 data;
__u32 data_end;
__u32 data_meta;
+ /* Below access go though struct xdp_rxq_info */
+ __u32 ingress_ifindex; /* rxq->dev->ifindex */
+ __u32 rx_queue_index; /* rxq->queue_index */
};
enum sk_action {
diff --git a/net/core/Makefile b/net/core/Makefile
index 1fd0a9c88b1b..6dbbba8c57ae 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
- fib_notifier.o
+ fib_notifier.o xdp.o
obj-y += net-sysfs.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 2eb66c0d9cdb..d7925ef8743d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3906,9 +3906,33 @@ drop:
return NET_RX_DROP;
}
+static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct netdev_rx_queue *rxqueue;
+
+ rxqueue = dev->_rx;
+
+ if (skb_rx_queue_recorded(skb)) {
+ u16 index = skb_get_rx_queue(skb);
+
+ if (unlikely(index >= dev->real_num_rx_queues)) {
+ WARN_ONCE(dev->real_num_rx_queues > 1,
+ "%s received packet on queue %u, but number "
+ "of RX queues is %u\n",
+ dev->name, index, dev->real_num_rx_queues);
+
+ return rxqueue; /* Return first rxqueue */
+ }
+ rxqueue += index;
+ }
+ return rxqueue;
+}
+
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
+ struct netdev_rx_queue *rxqueue;
u32 metalen, act = XDP_DROP;
struct xdp_buff xdp;
void *orig_data;
@@ -3952,6 +3976,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
xdp.data_hard_start = skb->data - skb_headroom(skb);
orig_data = xdp.data;
+ rxqueue = netif_get_rxqueue(skb);
+ xdp.rxq = &rxqueue->xdp_rxq;
+
act = bpf_prog_run_xdp(xdp_prog, &xdp);
off = xdp.data - orig_data;
@@ -7589,12 +7616,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
}
EXPORT_SYMBOL(netif_stacked_transfer_operstate);
-#ifdef CONFIG_SYSFS
static int netif_alloc_rx_queues(struct net_device *dev)
{
unsigned int i, count = dev->num_rx_queues;
struct netdev_rx_queue *rx;
size_t sz = count * sizeof(*rx);
+ int err = 0;
BUG_ON(count < 1);
@@ -7604,11 +7631,39 @@ static int netif_alloc_rx_queues(struct net_device *dev)
dev->_rx = rx;
- for (i = 0; i < count; i++)
+ for (i = 0; i < count; i++) {
rx[i].dev = dev;
+
+ /* XDP RX-queue setup */
+ err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
+ if (err < 0)
+ goto err_rxq_info;
+ }
return 0;
+
+err_rxq_info:
+ /* Rollback successful reg's and free other resources */
+ while (i--)
+ xdp_rxq_info_unreg(&rx[i].xdp_rxq);
+ kfree(dev->_rx);
+ dev->_rx = NULL;
+ return err;
+}
+
+static void netif_free_rx_queues(struct net_device *dev)
+{
+ unsigned int i, count = dev->num_rx_queues;
+ struct netdev_rx_queue *rx;
+
+ /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
+ if (!dev->_rx)
+ return;
+
+ rx = dev->_rx;
+
+ for (i = 0; i < count; i++)
+ xdp_rxq_info_unreg(&rx[i].xdp_rxq);
}
-#endif
static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue, void *_unused)
@@ -8169,12 +8224,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
return NULL;
}
-#ifdef CONFIG_SYSFS
if (rxqs < 1) {
pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
return NULL;
}
-#endif
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
@@ -8231,12 +8284,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
if (netif_alloc_netdev_queues(dev))
goto free_all;
-#ifdef CONFIG_SYSFS
dev->num_rx_queues = rxqs;
dev->real_num_rx_queues = rxqs;
if (netif_alloc_rx_queues(dev))
goto free_all;
-#endif
strcpy(dev->name, name);
dev->name_assign_type = name_assign_type;
@@ -8275,9 +8326,7 @@ void free_netdev(struct net_device *dev)
might_sleep();
netif_free_tx_queues(dev);
-#ifdef CONFIG_SYSFS
- kvfree(dev->_rx);
-#endif
+ netif_free_rx_queues(dev);
kfree(rcu_dereference_protected(dev->ingress_queue, 1));
diff --git a/net/core/filter.c b/net/core/filter.c
index 130b842c3a15..acdb94c0e97f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4304,6 +4304,25 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data_end));
break;
+ case offsetof(struct xdp_md, ingress_ifindex):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, rxq));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct xdp_rxq_info, dev));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct net_device,
+ ifindex, 4, target_size));
+ break;
+ case offsetof(struct xdp_md, rx_queue_index):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, rxq));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ bpf_target_off(struct xdp_rxq_info,
+ queue_index, 4, target_size));
+ break;
}
return insn - insn_buf;
diff --git a/net/core/xdp.c b/net/core/xdp.c
new file mode 100644
index 000000000000..097a0f74e004
--- /dev/null
+++ b/net/core/xdp.c
@@ -0,0 +1,73 @@
+/* net/core/xdp.c
+ *
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * Released under terms in GPL version 2. See COPYING.
+ */
+#include <linux/types.h>
+#include <linux/mm.h>
+
+#include <net/xdp.h>
+
+#define REG_STATE_NEW 0x0
+#define REG_STATE_REGISTERED 0x1
+#define REG_STATE_UNREGISTERED 0x2
+#define REG_STATE_UNUSED 0x3
+
+void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
+{
+ /* Simplify driver cleanup code paths, allow unreg "unused" */
+ if (xdp_rxq->reg_state == REG_STATE_UNUSED)
+ return;
+
+ WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
+
+ xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
+ xdp_rxq->dev = NULL;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
+
+static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
+{
+ memset(xdp_rxq, 0, sizeof(*xdp_rxq));
+}
+
+/* Returns 0 on success, negative on failure */
+int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
+ struct net_device *dev, u32 queue_index)
+{
+ if (xdp_rxq->reg_state == REG_STATE_UNUSED) {
+ WARN(1, "Driver promised not to register this");
+ return -EINVAL;
+ }
+
+ if (xdp_rxq->reg_state == REG_STATE_REGISTERED) {
+ WARN(1, "Missing unregister, handled but fix driver");
+ xdp_rxq_info_unreg(xdp_rxq);
+ }
+
+ if (!dev) {
+ WARN(1, "Missing net_device from driver");
+ return -ENODEV;
+ }
+
+ /* State either UNREGISTERED or NEW */
+ xdp_rxq_info_init(xdp_rxq);
+ xdp_rxq->dev = dev;
+ xdp_rxq->queue_index = queue_index;
+
+ xdp_rxq->reg_state = REG_STATE_REGISTERED;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
+
+void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
+{
+ xdp_rxq->reg_state = REG_STATE_UNUSED;
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_unused);
+
+bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
+{
+ return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4fb944a7ecf8..3ff7a05bea9a 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -41,6 +41,7 @@ hostprogs-y += xdp_redirect
hostprogs-y += xdp_redirect_map
hostprogs-y += xdp_redirect_cpu
hostprogs-y += xdp_monitor
+hostprogs-y += xdp_rxq_info
hostprogs-y += syscall_tp
# Libbpf dependencies
@@ -90,6 +91,7 @@ xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
+xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
# Tell kbuild to always build the programs
@@ -139,6 +141,7 @@ always += xdp_redirect_kern.o
always += xdp_redirect_map_kern.o
always += xdp_redirect_cpu_kern.o
always += xdp_monitor_kern.o
+always += xdp_rxq_info_kern.o
always += syscall_tp_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
@@ -182,6 +185,7 @@ HOSTLOADLIBES_xdp_redirect += -lelf
HOSTLOADLIBES_xdp_redirect_map += -lelf
HOSTLOADLIBES_xdp_redirect_cpu += -lelf
HOSTLOADLIBES_xdp_monitor += -lelf
+HOSTLOADLIBES_xdp_rxq_info += -lelf
HOSTLOADLIBES_syscall_tp += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c
new file mode 100644
index 000000000000..3fd209291653
--- /dev/null
+++ b/samples/bpf/xdp_rxq_info_kern.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ *
+ * Example howto extract XDP RX-queue info
+ */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Config setup from with userspace
+ *
+ * User-side setup ifindex in config_map, to verify that
+ * ctx->ingress_ifindex is correct (against configured ifindex)
+ */
+struct config {
+ __u32 action;
+ int ifindex;
+};
+struct bpf_map_def SEC("maps") config_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct config),
+ .max_entries = 1,
+};
+
+/* Common stats data record (shared with userspace) */
+struct datarec {
+ __u64 processed;
+ __u64 issue;
+};
+
+struct bpf_map_def SEC("maps") stats_global_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 1,
+};
+
+#define MAX_RXQs 64
+
+/* Stats per rx_queue_index (per CPU) */
+struct bpf_map_def SEC("maps") rx_queue_index_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = MAX_RXQs + 1,
+};
+
+SEC("xdp_prog0")
+int xdp_prognum0(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct datarec *rec, *rxq_rec;
+ int ingress_ifindex;
+ struct config *config;
+ u32 key = 0;
+
+ /* Global stats record */
+ rec = bpf_map_lookup_elem(&stats_global_map, &key);
+ if (!rec)
+ return XDP_ABORTED;
+ rec->processed++;
+
+ /* Accessing ctx->ingress_ifindex, cause BPF to rewrite BPF
+ * instructions inside kernel to access xdp_rxq->dev->ifindex
+ */
+ ingress_ifindex = ctx->ingress_ifindex;
+
+ config = bpf_map_lookup_elem(&config_map, &key);
+ if (!config)
+ return XDP_ABORTED;
+
+ /* Simple test: check ctx provided ifindex is as expected */
+ if (ingress_ifindex != config->ifindex) {
+ /* count this error case */
+ rec->issue++;
+ return XDP_ABORTED;
+ }
+
+ /* Update stats per rx_queue_index. Handle if rx_queue_index
+ * is larger than stats map can contain info for.
+ */
+ key = ctx->rx_queue_index;
+ if (key >= MAX_RXQs)
+ key = MAX_RXQs;
+ rxq_rec = bpf_map_lookup_elem(&rx_queue_index_map, &key);
+ if (!rxq_rec)
+ return XDP_ABORTED;
+ rxq_rec->processed++;
+ if (key == MAX_RXQs)
+ rxq_rec->issue++;
+
+ return config->action;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
new file mode 100644
index 000000000000..32430e8b3a6a
--- /dev/null
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -0,0 +1,531 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ */
+static const char *__doc__ = " XDP RX-queue info extract example\n\n"
+ "Monitor how many packets per sec (pps) are received\n"
+ "per NIC RX queue index and which CPU processed the packet\n"
+ ;
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <locale.h>
+#include <sys/resource.h>
+#include <getopt.h>
+#include <net/if.h>
+#include <time.h>
+
+#include <arpa/inet.h>
+#include <linux/if_link.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "bpf_util.h"
+
+static int ifindex = -1;
+static char ifname_buf[IF_NAMESIZE];
+static char *ifname;
+
+static __u32 xdp_flags;
+
+/* Exit return codes */
+#define EXIT_OK 0
+#define EXIT_FAIL 1
+#define EXIT_FAIL_OPTION 2
+#define EXIT_FAIL_XDP 3
+#define EXIT_FAIL_BPF 4
+#define EXIT_FAIL_MEM 5
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"dev", required_argument, NULL, 'd' },
+ {"skb-mode", no_argument, NULL, 'S' },
+ {"sec", required_argument, NULL, 's' },
+ {"no-separators", no_argument, NULL, 'z' },
+ {"action", required_argument, NULL, 'a' },
+ {0, 0, NULL, 0 }
+};
+
+static void int_exit(int sig)
+{
+ fprintf(stderr,
+ "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
+ ifindex, ifname);
+ if (ifindex > -1)
+ set_link_xdp_fd(ifindex, -1, xdp_flags);
+ exit(EXIT_OK);
+}
+
+struct config {
+ __u32 action;
+ int ifindex;
+};
+#define XDP_ACTION_MAX (XDP_TX + 1)
+#define XDP_ACTION_MAX_STRLEN 11
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+ [XDP_ABORTED] = "XDP_ABORTED",
+ [XDP_DROP] = "XDP_DROP",
+ [XDP_PASS] = "XDP_PASS",
+ [XDP_TX] = "XDP_TX",
+};
+
+static const char *action2str(int action)
+{
+ if (action < XDP_ACTION_MAX)
+ return xdp_action_names[action];
+ return NULL;
+}
+
+static int parse_xdp_action(char *action_str)
+{
+ size_t maxlen;
+ __u64 action = -1;
+ int i;
+
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ maxlen = XDP_ACTION_MAX_STRLEN;
+ if (strncmp(xdp_action_names[i], action_str, maxlen) == 0) {
+ action = i;
+ break;
+ }
+ }
+ return action;
+}
+
+static void list_xdp_actions(void)
+{
+ int i;
+
+ printf("Available XDP --action <options>\n");
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ printf("\t%s\n", xdp_action_names[i]);
+ printf("\n");
+}
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf("\nDOCUMENTATION:\n%s\n", __doc__);
+ printf(" Usage: %s (options-see-below)\n", argv[0]);
+ printf(" Listing options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value:%d)",
+ *long_options[i].flag);
+ else
+ printf(" short-option: -%c",
+ long_options[i].val);
+ printf("\n");
+ }
+ printf("\n");
+ list_xdp_actions();
+}
+
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+static __u64 gettime(void)
+{
+ struct timespec t;
+ int res;
+
+ res = clock_gettime(CLOCK_MONOTONIC, &t);
+ if (res < 0) {
+ fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
+ exit(EXIT_FAIL);
+ }
+ return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+/* Common stats data record shared with _kern.c */
+struct datarec {
+ __u64 processed;
+ __u64 issue;
+};
+struct record {
+ __u64 timestamp;
+ struct datarec total;
+ struct datarec *cpu;
+};
+struct stats_record {
+ struct record stats;
+ struct record *rxq;
+};
+
+static struct datarec *alloc_record_per_cpu(void)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct datarec *array;
+ size_t size;
+
+ size = sizeof(struct datarec) * nr_cpus;
+ array = malloc(size);
+ memset(array, 0, size);
+ if (!array) {
+ fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
+ exit(EXIT_FAIL_MEM);
+ }
+ return array;
+}
+
+static struct record *alloc_record_per_rxq(void)
+{
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ struct record *array;
+ size_t size;
+
+ size = sizeof(struct record) * nr_rxqs;
+ array = malloc(size);
+ memset(array, 0, size);
+ if (!array) {
+ fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs);
+ exit(EXIT_FAIL_MEM);
+ }
+ return array;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ struct stats_record *rec;
+ int i;
+
+ rec = malloc(sizeof(*rec));
+ memset(rec, 0, sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "Mem alloc error\n");
+ exit(EXIT_FAIL_MEM);
+ }
+ rec->rxq = alloc_record_per_rxq();
+ for (i = 0; i < nr_rxqs; i++)
+ rec->rxq[i].cpu = alloc_record_per_cpu();
+
+ rec->stats.cpu = alloc_record_per_cpu();
+ return rec;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ int i;
+
+ for (i = 0; i < nr_rxqs; i++)
+ free(r->rxq[i].cpu);
+
+ free(r->rxq);
+ free(r->stats.cpu);
+ free(r);
+}
+
+static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct datarec values[nr_cpus];
+ __u64 sum_processed = 0;
+ __u64 sum_issue = 0;
+ int i;
+
+ if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+ fprintf(stderr,
+ "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+ return false;
+ }
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = values[i].processed;
+ sum_processed += values[i].processed;
+ rec->cpu[i].issue = values[i].issue;
+ sum_issue += values[i].issue;
+ }
+ rec->total.processed = sum_processed;
+ rec->total.issue = sum_issue;
+ return true;
+}
+
+static void stats_collect(struct stats_record *rec)
+{
+ int fd, i, max_rxqs;
+
+ fd = map_data[1].fd; /* map: stats_global_map */
+ map_collect_percpu(fd, 0, &rec->stats);
+
+ fd = map_data[2].fd; /* map: rx_queue_index_map */
+ max_rxqs = map_data[2].def.max_entries;
+ for (i = 0; i < max_rxqs; i++)
+ map_collect_percpu(fd, i, &rec->rxq[i]);
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double) period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->processed - p->processed;
+ pps = packets / period_;
+ }
+ return pps;
+}
+
+static __u64 calc_errs_pps(struct datarec *r,
+ struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->issue - p->issue;
+ pps = packets / period_;
+ }
+ return pps;
+}
+
+static void stats_print(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int action)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ unsigned int nr_rxqs = map_data[2].def.max_entries;
+ double pps = 0, err = 0;
+ struct record *rec, *prev;
+ double t;
+ int rxq;
+ int i;
+
+ /* Header */
+ printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n",
+ ifname, ifindex, action2str(action));
+
+ /* stats_global_map */
+ {
+ char *fmt_rx = "%-15s %-7d %'-11.0f %'-10.0f %s\n";
+ char *fm2_rx = "%-15s %-7s %'-11.0f\n";
+ char *errstr = "";
+
+ printf("%-15s %-7s %-11s %-11s\n",
+ "XDP stats", "CPU", "pps", "issue-pps");
+
+ rec = &stats_rec->stats;
+ prev = &stats_prev->stats;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps (r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (err > 0)
+ errstr = "invalid-ifindex";
+ if (pps > 0)
+ printf(fmt_rx, "XDP-RX CPU",
+ i, pps, err, errstr);
+ }
+ pps = calc_pps (&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+ printf(fm2_rx, "XDP-RX CPU", "total", pps, err);
+ }
+
+ /* rx_queue_index_map */
+ printf("\n%-15s %-7s %-11s %-11s\n",
+ "RXQ stats", "RXQ:CPU", "pps", "issue-pps");
+
+ for (rxq = 0; rxq < nr_rxqs; rxq++) {
+ char *fmt_rx = "%-15s %3d:%-3d %'-11.0f %'-10.0f %s\n";
+ char *fm2_rx = "%-15s %3d:%-3s %'-11.0f\n";
+ char *errstr = "";
+ int rxq_ = rxq;
+
+ /* Last RXQ in map catch overflows */
+ if (rxq_ == nr_rxqs - 1)
+ rxq_ = -1;
+
+ rec = &stats_rec->rxq[rxq];
+ prev = &stats_prev->rxq[rxq];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps (r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (err > 0) {
+ if (rxq_ == -1)
+ errstr = "map-overflow-RXQ";
+ else
+ errstr = "err";
+ }
+ if (pps > 0)
+ printf(fmt_rx, "rx_queue_index",
+ rxq_, i, pps, err, errstr);
+ }
+ pps = calc_pps (&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+ if (pps || err)
+ printf(fm2_rx, "rx_queue_index", rxq_, "sum", pps, err);
+ }
+}
+
+
+/* Pointer swap trick */
+static inline void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+static void stats_poll(int interval, int action)
+{
+ struct stats_record *record, *prev;
+
+ record = alloc_stats_record();
+ prev = alloc_stats_record();
+ stats_collect(record);
+
+ while (1) {
+ swap(&prev, &record);
+ stats_collect(record);
+ stats_print(record, prev, action);
+ sleep(interval);
+ }
+
+ free_stats_record(record);
+ free_stats_record(prev);
+}
+
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ bool use_separators = true;
+ struct config cfg = { 0 };
+ char filename[256];
+ int longindex = 0;
+ int interval = 2;
+ __u32 key = 0;
+ int opt, err;
+
+ char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
+ int action = XDP_PASS; /* Default action */
+ char *action_str = NULL;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
+ if (load_bpf_file(filename)) {
+ fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf);
+ return EXIT_FAIL;
+ }
+
+ if (!prog_fd[0]) {
+ fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
+ return EXIT_FAIL;
+ }
+
+ /* Parse commands line args */
+ while ((opt = getopt_long(argc, argv, "hSd:",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 'd':
+ if (strlen(optarg) >= IF_NAMESIZE) {
+ fprintf(stderr, "ERR: --dev name too long\n");
+ goto error;
+ }
+ ifname = (char *)&ifname_buf;
+ strncpy(ifname, optarg, IF_NAMESIZE);
+ ifindex = if_nametoindex(ifname);
+ if (ifindex == 0) {
+ fprintf(stderr,
+ "ERR: --dev name unknown err(%d):%s\n",
+ errno, strerror(errno));
+ goto error;
+ }
+ break;
+ case 's':
+ interval = atoi(optarg);
+ break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'z':
+ use_separators = false;
+ break;
+ case 'a':
+ action_str = (char *)&action_str_buf;
+ strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN);
+ break;
+ case 'h':
+ error:
+ default:
+ usage(argv);
+ return EXIT_FAIL_OPTION;
+ }
+ }
+ /* Required option */
+ if (ifindex == -1) {
+ fprintf(stderr, "ERR: required option --dev missing\n");
+ usage(argv);
+ return EXIT_FAIL_OPTION;
+ }
+ cfg.ifindex = ifindex;
+
+ /* Parse action string */
+ if (action_str) {
+ action = parse_xdp_action(action_str);
+ if (action < 0) {
+ fprintf(stderr, "ERR: Invalid XDP --action: %s\n",
+ action_str);
+ list_xdp_actions();
+ return EXIT_FAIL_OPTION;
+ }
+ }
+ cfg.action = action;
+
+ /* Trick to pretty printf with thousands separators use %' */
+ if (use_separators)
+ setlocale(LC_NUMERIC, "en_US");
+
+ /* User-side setup ifindex in config_map */
+ err = bpf_map_update_elem(map_fd[0], &key, &cfg, 0);
+ if (err) {
+ fprintf(stderr, "Store config failed (err:%d)\n", err);
+ exit(EXIT_FAIL_BPF);
+ }
+
+ /* Remove XDP program when program is interrupted */
+ signal(SIGINT, int_exit);
+
+ if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+ fprintf(stderr, "link set xdp fd failed\n");
+ return EXIT_FAIL_XDP;
+ }
+
+ stats_poll(interval, action);
+ return EXIT_OK;
+}