Netdev List

Netdev List
 help / color / mirror / Atom feed

* [bpf-next V4 PATCH 02/14] xdp/mlx5: setup xdp_rxq_info
From: Jesper Dangaard Brouer @ 2018-01-03 10:25 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: netdev, dsahern, Matan Barak, Saeed Mahameed,
	Jesper Dangaard Brouer, gospo, bjorn.topel, michael.chan,
	Tariq Toukan
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

The mlx5 driver have a special drop-RQ queue (one per interface) that
simply drops all incoming traffic. It helps driver keep other HW
objects (flow steering) alive upon down/up operations.  It is
temporarily pointed by flow steering objects during the interface
setup, and when interface is down. It lacks many fields that are set
in a regular RQ (for example its state is never switched to
MLX5_RQC_STATE_RDY). (Thanks to Tariq Toukan for explanation).

The XDP RX-queue info for this drop-RQ marked as unused, which
allow us to use the same takedown/free code path as other RX-queues.

Driver hook points for xdp_rxq_info:
 * reg   : mlx5e_alloc_rq()
 * unused: mlx5e_alloc_drop_rq()
 * unreg : mlx5e_free_rq()

Tested on actual hardware with samples/bpf program

Cc: Saeed Mahameed <saeedm@mellanox.com>
Cc: Matan Barak <matanb@mellanox.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |    4 ++++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |    9 +++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   |    1 +
 3 files changed, 14 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 543060c305a0..5299310f2481 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -46,6 +46,7 @@
 #include <linux/mlx5/transobj.h>
 #include <linux/rhashtable.h>
 #include <net/switchdev.h>
+#include <net/xdp.h>
 #include "wq.h"
 #include "mlx5_core.h"
 #include "en_stats.h"
@@ -571,6 +572,9 @@ struct mlx5e_rq {
 	u32                    rqn;
 	struct mlx5_core_dev  *mdev;
 	struct mlx5_core_mkey  umr_mkey;
+
+	/* XDP read-mostly */
+	struct xdp_rxq_info    xdp_rxq;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_channel {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3aa1c90e7c86..539bd1d24396 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -582,6 +582,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		goto err_rq_wq_destroy;
 	}
 
+	if (xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix) < 0)
+		goto err_rq_wq_destroy;
+
 	rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
 	rq->buff.headroom = params->rq_headroom;
 
@@ -687,6 +690,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 err_rq_wq_destroy:
 	if (rq->xdp_prog)
 		bpf_prog_put(rq->xdp_prog);
+	xdp_rxq_info_unreg(&rq->xdp_rxq);
 	mlx5_wq_destroy(&rq->wq_ctrl);
 
 	return err;
@@ -699,6 +703,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
 	if (rq->xdp_prog)
 		bpf_prog_put(rq->xdp_prog);
 
+	xdp_rxq_info_unreg(&rq->xdp_rxq);
+
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		mlx5e_rq_free_mpwqe_info(rq);
@@ -2766,6 +2772,9 @@ static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
 	if (err)
 		return err;
 
+	/* Mark as unused given "Drop-RQ" packets never reach XDP */
+	xdp_rxq_info_unused(&rq->xdp_rxq);
+
 	rq->mdev = mdev;
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 5b499c7a698f..7b38480811d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -812,6 +812,7 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + *len;
 	xdp.data_hard_start = va;
+	xdp.rxq = &rq->xdp_rxq;
 
 	act = bpf_prog_run_xdp(prog, &xdp);
 	switch (act) {

^ permalink raw reply related

* [bpf-next V4 PATCH 03/14] i40e: setup xdp_rxq_info
From: Jesper Dangaard Brouer @ 2018-01-03 10:25 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: Paul Menzel, netdev, dsahern, intel-wired-lan, Jeff Kirsher,
	Jesper Dangaard Brouer, gospo, bjorn.topel, michael.chan
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

The i40e driver has a special "FDIR" RX-ring (I40E_VSI_FDIR) which is
a sideband channel for configuring/updating the flow director tables.
This (i40e_vsi_)type does not invoke XDP-ebpf code.

As suggested by Björn (V2): Instead of marking this I40E_VSI_FDIR RX-ring
a special case, reverse the logic and only select RX-rings of type
I40E_VSI_MAIN to register xdp_rxq_info's for.

Driver hook points for xdp_rxq_info:
 * reg  : i40e_setup_rx_descriptors (via i40e_vsi_setup_rx_resources)
 * unreg: i40e_free_rx_resources    (via i40e_vsi_free_rx_resources)

Tested on actual hardware with samples/bpf program.

V2: Fixed bug in i40e_set_ringparam (memset zero) + match on I40E_VSI_MAIN.
V4: Update patch desc that got out-of-sync with code.

Cc: intel-wired-lan@lists.osuosl.org
Cc: Björn Töpel <bjorn.topel@intel.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Reviewed-by: Paul Menzel <pmenzel@molgen.mpg.de>
---
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c |    2 ++
 drivers/net/ethernet/intel/i40e/i40e_txrx.c    |   18 ++++++++++++++++--
 drivers/net/ethernet/intel/i40e/i40e_txrx.h    |    3 +++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 5f6cf7212d4f..cfd788b4fd7a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1585,6 +1585,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
 			 */
 			rx_rings[i].desc = NULL;
 			rx_rings[i].rx_bi = NULL;
+			/* Clear cloned XDP RX-queue info before setup call */
+			memset(&rx_rings[i].xdp_rxq, 0, sizeof(rx_rings[i].xdp_rxq));
 			/* this is to allow wr32 to have something to write to
 			 * during early allocation of Rx buffers
 			 */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 4566d66ffc7c..2a8a85e3ae8f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -27,6 +27,7 @@
 #include <linux/prefetch.h>
 #include <net/busy_poll.h>
 #include <linux/bpf_trace.h>
+#include <net/xdp.h>
 #include "i40e.h"
 #include "i40e_trace.h"
 #include "i40e_prototype.h"
@@ -1236,6 +1237,8 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
 {
 	i40e_clean_rx_ring(rx_ring);
+	if (rx_ring->vsi->type == I40E_VSI_MAIN)
+		xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 	rx_ring->xdp_prog = NULL;
 	kfree(rx_ring->rx_bi);
 	rx_ring->rx_bi = NULL;
@@ -1256,6 +1259,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
 {
 	struct device *dev = rx_ring->dev;
+	int err = -ENOMEM;
 	int bi_size;
 
 	/* warn if we are about to overwrite the pointer */
@@ -1283,13 +1287,21 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
 
+	/* XDP RX-queue info only needed for RX rings exposed to XDP */
+	if (rx_ring->vsi->type == I40E_VSI_MAIN) {
+		err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
+				       rx_ring->queue_index);
+		if (err < 0)
+			goto err;
+	}
+
 	rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
 
 	return 0;
 err:
 	kfree(rx_ring->rx_bi);
 	rx_ring->rx_bi = NULL;
-	return -ENOMEM;
+	return err;
 }
 
 /**
@@ -2068,11 +2080,13 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	struct sk_buff *skb = rx_ring->skb;
 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
 	bool failure = false, xdp_xmit = false;
+	struct xdp_buff xdp;
+
+	xdp.rxq = &rx_ring->xdp_rxq;
 
 	while (likely(total_rx_packets < (unsigned int)budget)) {
 		struct i40e_rx_buffer *rx_buffer;
 		union i40e_rx_desc *rx_desc;
-		struct xdp_buff xdp;
 		unsigned int size;
 		u16 vlan_tag;
 		u8 rx_ptype;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index fbae1182e2ea..2d08760fc4ce 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -27,6 +27,8 @@
 #ifndef _I40E_TXRX_H_
 #define _I40E_TXRX_H_
 
+#include <net/xdp.h>
+
 /* Interrupt Throttling and Rate Limiting Goodies */
 
 #define I40E_MAX_ITR               0x0FF0  /* reg uses 2 usec resolution */
@@ -428,6 +430,7 @@ struct i40e_ring {
 					 */
 
 	struct i40e_channel *ch;
+	struct xdp_rxq_info xdp_rxq;
 } ____cacheline_internodealigned_in_smp;
 
 static inline bool ring_uses_build_skb(struct i40e_ring *ring)

^ permalink raw reply related

* [bpf-next V4 PATCH 04/14] ixgbe: setup xdp_rxq_info
From: Jesper Dangaard Brouer @ 2018-01-03 10:25 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: netdev, dsahern, Alexander Duyck, intel-wired-lan, Jeff Kirsher,
	Jesper Dangaard Brouer, gospo, bjorn.topel, michael.chan
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

Driver hook points for xdp_rxq_info:
 * reg  : ixgbe_setup_rx_resources()
 * unreg: ixgbe_free_rx_resources()

Tested on actual hardware.

V2: Fix ixgbe_set_ringparam, clear xdp_rxq_info in temp_ring

Cc: intel-wired-lan@lists.osuosl.org
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h         |    2 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |    4 ++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |   10 +++++++++-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 468c3555a629..8611763d6129 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -53,6 +53,7 @@
 #include <linux/dca.h>
 #endif
 
+#include <net/xdp.h>
 #include <net/busy_poll.h>
 
 /* common prefix used by pr_<> macros */
@@ -371,6 +372,7 @@ struct ixgbe_ring {
 		struct ixgbe_tx_queue_stats tx_stats;
 		struct ixgbe_rx_queue_stats rx_stats;
 	};
+	struct xdp_rxq_info xdp_rxq;
 } ____cacheline_internodealigned_in_smp;
 
 enum ixgbe_ring_f_enum {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 0aad1c2a3667..0aaf70b3cfcd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1156,6 +1156,10 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
 			memcpy(&temp_ring[i], adapter->rx_ring[i],
 			       sizeof(struct ixgbe_ring));
 
+			/* Clear copied XDP RX-queue info */
+			memset(&temp_ring[i].xdp_rxq, 0,
+			       sizeof(temp_ring[i].xdp_rxq));
+
 			temp_ring[i].count = new_rx_count;
 			err = ixgbe_setup_rx_resources(adapter, &temp_ring[i]);
 			if (err) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 7737a05c717c..95aba975b391 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2318,12 +2318,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 #endif /* IXGBE_FCOE */
 	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
 	bool xdp_xmit = false;
+	struct xdp_buff xdp;
+
+	xdp.rxq = &rx_ring->xdp_rxq;
 
 	while (likely(total_rx_packets < budget)) {
 		union ixgbe_adv_rx_desc *rx_desc;
 		struct ixgbe_rx_buffer *rx_buffer;
 		struct sk_buff *skb;
-		struct xdp_buff xdp;
 		unsigned int size;
 
 		/* return some buffers to hardware, one at a time is too slow */
@@ -6444,6 +6446,11 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
 	rx_ring->next_to_clean = 0;
 	rx_ring->next_to_use = 0;
 
+	/* XDP RX-queue info */
+	if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
+			     rx_ring->queue_index) < 0)
+		goto err;
+
 	rx_ring->xdp_prog = adapter->xdp_prog;
 
 	return 0;
@@ -6541,6 +6548,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
 	ixgbe_clean_rx_ring(rx_ring);
 
 	rx_ring->xdp_prog = NULL;
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
 

^ permalink raw reply related

* [bpf-next V4 PATCH 05/14] xdp/qede: setup xdp_rxq_info and intro xdp_rxq_info_is_reg
From: Jesper Dangaard Brouer @ 2018-01-03 10:25 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: bjorn.topel, netdev, Jesper Dangaard Brouer, Ariel Elior, dsahern,
	gospo, everest-linux-l2, michael.chan
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

The driver code qede_free_fp_array() depend on kfree() can be called
with a NULL pointer. This stems from the qede_alloc_fp_array()
function which either (kz)alloc memory for fp->txq or fp->rxq.
This also simplifies error handling code in case of memory allocation
failures, but xdp_rxq_info_unreg need to know the difference.

Introduce xdp_rxq_info_is_reg() to handle if a memory allocation fails
and detect this is the failure path by seeing that xdp_rxq_info was
not registred yet, which first happens after successful alloaction in
qede_init_fp().

Driver hook points for xdp_rxq_info:
 * reg  : qede_init_fp
 * unreg: qede_free_fp_array

Tested on actual hardware with samples/bpf program.

V2: Driver have no proper error path for failed XDP RX-queue info reg, as
qede_init_fp() is a void function.

Cc: everest-linux-l2@cavium.com
Cc: Ariel Elior <Ariel.Elior@cavium.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 drivers/net/ethernet/qlogic/qede/qede.h      |    2 ++
 drivers/net/ethernet/qlogic/qede/qede_fp.c   |    1 +
 drivers/net/ethernet/qlogic/qede/qede_main.c |   10 ++++++++++
 include/net/xdp.h                            |    1 +
 net/core/xdp.c                               |    6 ++++++
 5 files changed, 20 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 8a336517baac..8116cfd30fad 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -40,6 +40,7 @@
 #include <linux/kernel.h>
 #include <linux/mutex.h>
 #include <linux/bpf.h>
+#include <net/xdp.h>
 #include <linux/qed/qede_rdma.h>
 #include <linux/io.h>
 #ifdef CONFIG_RFS_ACCEL
@@ -345,6 +346,7 @@ struct qede_rx_queue {
 	u64 xdp_no_pass;
 
 	void *handle;
+	struct xdp_rxq_info xdp_rxq;
 };
 
 union db_prod {
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 48ec4c56cddf..dafc079ab6b9 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1006,6 +1006,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
 	xdp.data = xdp.data_hard_start + *data_offset;
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + *len;
+	xdp.rxq = &rxq->xdp_rxq;
 
 	/* Queues always have a full reset currently, so for the time
 	 * being until there's atomic program replace just mark read
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 90d79ae2a48f..9929b4370ce6 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -765,6 +765,12 @@ static void qede_free_fp_array(struct qede_dev *edev)
 			fp = &edev->fp_array[i];
 
 			kfree(fp->sb_info);
+			/* Handle mem alloc failure case where qede_init_fp
+			 * didn't register xdp_rxq_info yet.
+			 * Implicit only (fp->type & QEDE_FASTPATH_RX)
+			 */
+			if (fp->rxq && xdp_rxq_info_is_reg(&fp->rxq->xdp_rxq))
+				xdp_rxq_info_unreg(&fp->rxq->xdp_rxq);
 			kfree(fp->rxq);
 			kfree(fp->xdp_tx);
 			kfree(fp->txq);
@@ -1493,6 +1499,10 @@ static void qede_init_fp(struct qede_dev *edev)
 			else
 				fp->rxq->data_direction = DMA_FROM_DEVICE;
 			fp->rxq->dev = &edev->pdev->dev;
+
+			/* Driver have no error path from here */
+			WARN_ON(xdp_rxq_info_reg(&fp->rxq->xdp_rxq, edev->ndev,
+						 fp->rxq->rxq_id) < 0);
 		}
 
 		if (fp->type & QEDE_FASTPATH_TX) {
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 86c41631a908..b2362ddfa694 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -43,5 +43,6 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
 		     struct net_device *dev, u32 queue_index);
 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
+bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
 
 #endif /* __LINUX_NET_XDP_H__ */
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 229bc5a0ee04..097a0f74e004 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -65,3 +65,9 @@ void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
 	xdp_rxq->reg_state = REG_STATE_UNUSED;
 }
 EXPORT_SYMBOL_GPL(xdp_rxq_info_unused);
+
+bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
+{
+	return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
+}
+EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);

^ permalink raw reply related

* [bpf-next V4 PATCH 06/14] mlx4: setup xdp_rxq_info
From: Jesper Dangaard Brouer @ 2018-01-03 10:25 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: netdev, dsahern, Jesper Dangaard Brouer, gospo, bjorn.topel,
	michael.chan, Tariq Toukan
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

Driver hook points for xdp_rxq_info:
 * reg  : mlx4_en_create_rx_ring
 * unreg: mlx4_en_destroy_rx_ring

Tested on actual hardware.

Cc: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |    3 ++-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     |   13 ++++++++++---
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |    4 +++-
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 21bc17fa3854..8fc51bc29003 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2172,8 +2172,9 @@ static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 
 		if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
 					   prof->rx_ring_size, priv->stride,
-					   node))
+					   node, i))
 			goto err;
+
 	}
 
 #ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5f9dbc9a7f5b..b4d144e67514 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -262,7 +262,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev)
 
 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_rx_ring **pring,
-			   u32 size, u16 stride, int node)
+			   u32 size, u16 stride, int node, int queue_index)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_rx_ring *ring;
@@ -286,6 +286,9 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 	ring->log_stride = ffs(ring->stride) - 1;
 	ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
 
+	if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index) < 0)
+		goto err_ring;
+
 	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
 					sizeof(struct mlx4_en_rx_alloc));
 	ring->rx_info = vzalloc_node(tmp, node);
@@ -293,7 +296,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 		ring->rx_info = vzalloc(tmp);
 		if (!ring->rx_info) {
 			err = -ENOMEM;
-			goto err_ring;
+			goto err_xdp_info;
 		}
 	}
 
@@ -317,6 +320,8 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 err_info:
 	vfree(ring->rx_info);
 	ring->rx_info = NULL;
+err_xdp_info:
+	xdp_rxq_info_unreg(&ring->xdp_rxq);
 err_ring:
 	kfree(ring);
 	*pring = NULL;
@@ -440,6 +445,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 					lockdep_is_held(&mdev->state_lock));
 	if (old_prog)
 		bpf_prog_put(old_prog);
+	xdp_rxq_info_unreg(&ring->xdp_rxq);
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
 	vfree(ring->rx_info);
 	ring->rx_info = NULL;
@@ -652,6 +658,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	int cq_ring = cq->ring;
 	bool doorbell_pending;
 	struct mlx4_cqe *cqe;
+	struct xdp_buff xdp;
 	int polled = 0;
 	int index;
 
@@ -666,6 +673,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(ring->xdp_prog);
+	xdp.rxq = &ring->xdp_rxq;
 	doorbell_pending = 0;
 
 	/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
@@ -750,7 +758,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 		 * read bytes but not past the end of the frag.
 		 */
 		if (xdp_prog) {
-			struct xdp_buff xdp;
 			dma_addr_t dma;
 			void *orig_data;
 			u32 act;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 7db3d0d9bfce..f470ae37d937 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -46,6 +46,7 @@
 #endif
 #include <linux/cpu_rmap.h>
 #include <linux/ptp_clock_kernel.h>
+#include <net/xdp.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/qp.h>
@@ -356,6 +357,7 @@ struct mlx4_en_rx_ring {
 	unsigned long dropped;
 	int hwtstamp_rx_filter;
 	cpumask_var_t affinity_mask;
+	struct xdp_rxq_info xdp_rxq;
 };
 
 struct mlx4_en_cq {
@@ -720,7 +722,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev);
 void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv);
 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_rx_ring **pring,
-			   u32 size, u16 stride, int node);
+			   u32 size, u16 stride, int node, int queue_index);
 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_rx_ring **pring,
 			     u32 size, u16 stride);

^ permalink raw reply related

* [bpf-next V4 PATCH 07/14] bnxt_en: setup xdp_rxq_info
From: Jesper Dangaard Brouer @ 2018-01-03 10:25 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: netdev, Jesper Dangaard Brouer, dsahern, michael.chan,
	bjorn.topel, gospo, Andy Gospodarek
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

Driver hook points for xdp_rxq_info:
 * reg  : bnxt_alloc_rx_rings
 * unreg: bnxt_free_rx_rings

This driver should be updated to re-register when changing
allocation mode of RX rings.

Tested on actual hardware.

Cc: Andy Gospodarek <andy@greyhouse.net>
Cc: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |   10 ++++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |    2 ++
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c |    1 +
 3 files changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 9efbdc6f1fcb..89c3c8760a78 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2247,6 +2247,9 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
 		if (rxr->xdp_prog)
 			bpf_prog_put(rxr->xdp_prog);
 
+		if (xdp_rxq_info_is_reg(&rxr->xdp_rxq))
+			xdp_rxq_info_unreg(&rxr->xdp_rxq);
+
 		kfree(rxr->rx_tpa);
 		rxr->rx_tpa = NULL;
 
@@ -2280,6 +2283,10 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp)
 
 		ring = &rxr->rx_ring_struct;
 
+		rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i);
+		if (rc < 0)
+			return rc;
+
 		rc = bnxt_alloc_ring(bp, ring);
 		if (rc)
 			return rc;
@@ -2834,6 +2841,9 @@ void bnxt_set_ring_params(struct bnxt *bp)
 	bp->cp_ring_mask = bp->cp_bit - 1;
 }
 
+/* Changing allocation mode of RX rings.
+ * TODO: Update when extending xdp_rxq_info to support allocation modes.
+ */
 int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
 {
 	if (page_mode) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 5359a1f0045f..2d268fc26f5e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -23,6 +23,7 @@
 #include <net/devlink.h>
 #include <net/dst_metadata.h>
 #include <net/switchdev.h>
+#include <net/xdp.h>
 
 struct tx_bd {
 	__le32 tx_bd_len_flags_type;
@@ -664,6 +665,7 @@ struct bnxt_rx_ring_info {
 
 	struct bnxt_ring_struct	rx_ring_struct;
 	struct bnxt_ring_struct	rx_agg_ring_struct;
+	struct xdp_rxq_info	xdp_rxq;
 };
 
 struct bnxt_cp_ring_info {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 261e5847557a..1389ab5e05df 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -96,6 +96,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 	xdp.data = *data_ptr;
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = *data_ptr + *len;
+	xdp.rxq = &rxr->xdp_rxq;
 	orig_data = xdp.data;
 	mapping = rx_buf->mapping - bp->rx_dma_offset;
 

^ permalink raw reply related

* [bpf-next V4 PATCH 08/14] nfp: setup xdp_rxq_info
From: Jesper Dangaard Brouer @ 2018-01-03 10:25 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: dsahern, Jakub Kicinski, oss-drivers, Simon Horman,
	Jesper Dangaard Brouer, netdev, gospo, bjorn.topel, michael.chan
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

Driver hook points for xdp_rxq_info:
 * reg  : nfp_net_rx_ring_alloc
 * unreg: nfp_net_rx_ring_free

In struct nfp_net_rx_ring moved member @size into a hole on 64-bit.
Thus, the size remaines the same after adding member @xdp_rxq.

Cc: oss-drivers@netronome.com
Cc: Jakub Kicinski <jakub.kicinski@netronome.com>
Cc: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |    5 ++++-
 .../net/ethernet/netronome/nfp/nfp_net_common.c    |   10 ++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 3801c52098d5..0e564cfabe7e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -47,6 +47,7 @@
 #include <linux/netdevice.h>
 #include <linux/pci.h>
 #include <linux/io-64-nonatomic-hi-lo.h>
+#include <net/xdp.h>
 
 #include "nfp_net_ctrl.h"
 
@@ -350,6 +351,7 @@ struct nfp_net_rx_buf {
  * @rxds:       Virtual address of FL/RX ring in host memory
  * @dma:        DMA address of the FL/RX ring
  * @size:       Size, in bytes, of the FL/RX ring (needed to free)
+ * @xdp_rxq:    RX-ring info avail for XDP
  */
 struct nfp_net_rx_ring {
 	struct nfp_net_r_vector *r_vec;
@@ -361,13 +363,14 @@ struct nfp_net_rx_ring {
 	u32 idx;
 
 	int fl_qcidx;
+	unsigned int size;
 	u8 __iomem *qcp_fl;
 
 	struct nfp_net_rx_buf *rxbufs;
 	struct nfp_net_rx_desc *rxds;
 
 	dma_addr_t dma;
-	unsigned int size;
+	struct xdp_rxq_info xdp_rxq;
 } ____cacheline_aligned;
 
 /**
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 0add4870ce2e..45b8cae937be 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1608,11 +1608,13 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 	unsigned int true_bufsz;
 	struct sk_buff *skb;
 	int pkts_polled = 0;
+	struct xdp_buff xdp;
 	int idx;
 
 	rcu_read_lock();
 	xdp_prog = READ_ONCE(dp->xdp_prog);
 	true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
+	xdp.rxq = &rx_ring->xdp_rxq;
 	tx_ring = r_vec->xdp_ring;
 
 	while (pkts_polled < budget) {
@@ -1703,7 +1705,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 				  dp->bpf_offload_xdp) && !meta.portid) {
 			void *orig_data = rxbuf->frag + pkt_off;
 			unsigned int dma_off;
-			struct xdp_buff xdp;
 			int act;
 
 			xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
@@ -2252,6 +2253,7 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
 	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
 	struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
 
+	xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
 	kfree(rx_ring->rxbufs);
 
 	if (rx_ring->rxds)
@@ -2275,7 +2277,11 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
 static int
 nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
 {
-	int sz;
+	int sz, err;
+
+	err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, rx_ring->idx);
+	if (err < 0)
+		return err;
 
 	rx_ring->cnt = dp->rxd_cnt;
 	rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;

^ permalink raw reply related

* [bpf-next V4 PATCH 09/14] thunderx: setup xdp_rxq_info
From: Jesper Dangaard Brouer @ 2018-01-03 10:25 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: bjorn.topel, Robert Richter, netdev, dsahern,
	Jesper Dangaard Brouer, gospo, Sunil Goutham, michael.chan,
	linux-arm-kernel
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

This driver uses a bool scheme for "enable"/"disable" when setting up
different resources.  Thus, the hook points for xdp_rxq_info is done
in the same function call nicvf_rcv_queue_config().  This is activated
through enable/disable via nicvf_config_data_transfer(), which is tied
into nicvf_stop()/nicvf_open().

Extending driver packet handler call-path nicvf_rcv_pkt_handler() with
a pointer to the given struct rcv_queue, in-order to access the
xdp_rxq_info data area (in nicvf_xdp_rx()).

V2: Driver have no proper error path for failed XDP RX-queue info reg,
as nicvf_rcv_queue_config is a void function.

Cc: linux-arm-kernel@lists.infradead.org
Cc: Sunil Goutham <sgoutham@cavium.com>
Cc: Robert Richter <rric@kernel.org>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c   |   11 +++++++----
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c |    4 ++++
 drivers/net/ethernet/cavium/thunder/nicvf_queues.h |    2 ++
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 52b3a6044f85..21618d0d694f 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -521,7 +521,7 @@ static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr)
 
 static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 				struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
-				struct sk_buff **skb)
+				struct rcv_queue *rq, struct sk_buff **skb)
 {
 	struct xdp_buff xdp;
 	struct page *page;
@@ -545,6 +545,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 	xdp.data = (void *)cpu_addr;
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + len;
+	xdp.rxq = &rq->xdp_rxq;
 	orig_data = xdp.data;
 
 	rcu_read_lock();
@@ -698,7 +699,8 @@ static inline void nicvf_set_rxhash(struct net_device *netdev,
 
 static void nicvf_rcv_pkt_handler(struct net_device *netdev,
 				  struct napi_struct *napi,
-				  struct cqe_rx_t *cqe_rx, struct snd_queue *sq)
+				  struct cqe_rx_t *cqe_rx,
+				  struct snd_queue *sq, struct rcv_queue *rq)
 {
 	struct sk_buff *skb = NULL;
 	struct nicvf *nic = netdev_priv(netdev);
@@ -724,7 +726,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
 	/* For XDP, ignore pkts spanning multiple pages */
 	if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) {
 		/* Packet consumed by XDP */
-		if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb))
+		if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb))
 			return;
 	} else {
 		skb = nicvf_get_rcv_skb(snic, cqe_rx,
@@ -781,6 +783,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
 	struct cqe_rx_t *cq_desc;
 	struct netdev_queue *txq;
 	struct snd_queue *sq = &qs->sq[cq_idx];
+	struct rcv_queue *rq = &qs->rq[cq_idx];
 	unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx;
 
 	spin_lock_bh(&cq->lock);
@@ -811,7 +814,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
 
 		switch (cq_desc->cqe_type) {
 		case CQE_TYPE_RX:
-			nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq);
+			nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq);
 			work_done++;
 		break;
 		case CQE_TYPE_SEND:
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index f38ea349aa00..14e62c6ac342 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -760,6 +760,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
 
 	if (!rq->enable) {
 		nicvf_reclaim_rcv_queue(nic, qs, qidx);
+		xdp_rxq_info_unreg(&rq->xdp_rxq);
 		return;
 	}
 
@@ -772,6 +773,9 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
 	/* all writes of RBDR data to be loaded into L2 Cache as well*/
 	rq->caching = 1;
 
+	/* Driver have no proper error path for failed XDP RX-queue info reg */
+	WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx) < 0);
+
 	/* Send a mailbox msg to PF to config RQ */
 	mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
 	mbx.rq.qs_num = qs->vnic_id;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 178ab6e8e3c5..7d1e4e2aaad0 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -12,6 +12,7 @@
 #include <linux/netdevice.h>
 #include <linux/iommu.h>
 #include <linux/bpf.h>
+#include <net/xdp.h>
 #include "q_struct.h"
 
 #define MAX_QUEUE_SET			128
@@ -255,6 +256,7 @@ struct rcv_queue {
 	u8		start_qs_rbdr_idx; /* RBDR idx in the above QS */
 	u8		caching;
 	struct		rx_tx_queue_stats stats;
+	struct xdp_rxq_info xdp_rxq;
 } ____cacheline_aligned_in_smp;
 
 struct cmp_queue {

^ permalink raw reply related

* [bpf-next V4 PATCH 10/14] tun: setup xdp_rxq_info
From: Jesper Dangaard Brouer @ 2018-01-03 10:25 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: Willem de Bruijn, Michael S. Tsirkin, netdev, Jason Wang, dsahern,
	Jesper Dangaard Brouer, gospo, bjorn.topel, michael.chan
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

Driver hook points for xdp_rxq_info:
 * reg  : tun_attach
 * unreg: __tun_detach

I've done some manual testing of this tun driver, but I would
appriciate good review and someone else running their use-case tests,
as I'm not 100% sure I understand the tfile->detached semantics.

V2: Removed the skb_array_cleanup() call from V1 by request from Jason Wang.

Cc: Jason Wang <jasowang@redhat.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c |   24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e367d6310353..e7c5f4b2a9a6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -180,6 +180,7 @@ struct tun_file {
 	struct list_head next;
 	struct tun_struct *detached;
 	struct skb_array tx_array;
+	struct xdp_rxq_info xdp_rxq;
 };
 
 struct tun_flow_entry {
@@ -687,8 +688,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 			    tun->dev->reg_state == NETREG_REGISTERED)
 				unregister_netdevice(tun->dev);
 		}
-		if (tun)
+		if (tun) {
 			skb_array_cleanup(&tfile->tx_array);
+			xdp_rxq_info_unreg(&tfile->xdp_rxq);
+		}
 		sock_put(&tfile->sk);
 	}
 }
@@ -728,11 +731,13 @@ static void tun_detach_all(struct net_device *dev)
 		tun_napi_del(tun, tfile);
 		/* Drop read queue */
 		tun_queue_purge(tfile);
+		xdp_rxq_info_unreg(&tfile->xdp_rxq);
 		sock_put(&tfile->sk);
 	}
 	list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
 		tun_enable_queue(tfile);
 		tun_queue_purge(tfile);
+		xdp_rxq_info_unreg(&tfile->xdp_rxq);
 		sock_put(&tfile->sk);
 	}
 	BUG_ON(tun->numdisabled != 0);
@@ -784,6 +789,22 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
 
 	tfile->queue_index = tun->numqueues;
 	tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
+
+	if (tfile->detached) {
+		/* Re-attach detached tfile, updating XDP queue_index */
+		WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq));
+
+		if (tfile->xdp_rxq.queue_index    != tfile->queue_index)
+			tfile->xdp_rxq.queue_index = tfile->queue_index;
+	} else {
+		/* Setup XDP RX-queue info, for new tfile getting attached */
+		err = xdp_rxq_info_reg(&tfile->xdp_rxq,
+				       tun->dev, tfile->queue_index);
+		if (err < 0)
+			goto out;
+		err = 0;
+	}
+
 	rcu_assign_pointer(tfile->tun, tun);
 	rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
 	tun->numqueues++;
@@ -1508,6 +1529,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 		xdp.data = buf + pad;
 		xdp_set_data_meta_invalid(&xdp);
 		xdp.data_end = xdp.data + len;
+		xdp.rxq = &tfile->xdp_rxq;
 		orig_data = xdp.data;
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 

^ permalink raw reply related

* [bpf-next V4 PATCH 11/14] virtio_net: setup xdp_rxq_info
From: Jesper Dangaard Brouer @ 2018-01-03 10:26 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: Michael S. Tsirkin, netdev, Jason Wang, dsahern, virtualization,
	Jesper Dangaard Brouer, gospo, bjorn.topel, michael.chan
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

The virtio_net driver doesn't dynamically change the RX-ring queue
layout and backing pages, but instead reject XDP setup if all the
conditions for XDP is not meet.  Thus, the xdp_rxq_info also remains
fairly static.  This allow us to simply add the reg/unreg to
net_device open/close functions.

Driver hook points for xdp_rxq_info:
 * reg  : virtnet_open
 * unreg: virtnet_close

V3:
 - bugfix, also setup xdp.rxq in receive_mergeable()
 - Tested bpf-sample prog inside guest on a virtio_net device

Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/virtio_net.c |   14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6fb7b658a6cc..ed8299343728 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -31,6 +31,7 @@
 #include <linux/average.h>
 #include <linux/filter.h>
 #include <net/route.h>
+#include <net/xdp.h>
 
 static int napi_weight = NAPI_POLL_WEIGHT;
 module_param(napi_weight, int, 0444);
@@ -115,6 +116,8 @@ struct receive_queue {
 
 	/* Name of this receive queue: input.$index */
 	char name[40];
+
+	struct xdp_rxq_info xdp_rxq;
 };
 
 struct virtnet_info {
@@ -559,6 +562,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 		xdp.data = xdp.data_hard_start + xdp_headroom;
 		xdp_set_data_meta_invalid(&xdp);
 		xdp.data_end = xdp.data + len;
+		xdp.rxq = &rq->xdp_rxq;
 		orig_data = xdp.data;
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
@@ -692,6 +696,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		xdp.data = data + vi->hdr_len;
 		xdp_set_data_meta_invalid(&xdp);
 		xdp.data_end = xdp.data + (len - vi->hdr_len);
+		xdp.rxq = &rq->xdp_rxq;
+
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
 		if (act != XDP_PASS)
@@ -1225,13 +1231,18 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
 static int virtnet_open(struct net_device *dev)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
-	int i;
+	int i, err;
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		if (i < vi->curr_queue_pairs)
 			/* Make sure we have some buffers: if oom use wq. */
 			if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
 				schedule_delayed_work(&vi->refill, 0);
+
+		err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i);
+		if (err < 0)
+			return err;
+
 		virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
 		virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
 	}
@@ -1560,6 +1571,7 @@ static int virtnet_close(struct net_device *dev)
 	cancel_delayed_work_sync(&vi->refill);
 
 	for (i = 0; i < vi->max_queue_pairs; i++) {
+		xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
 		napi_disable(&vi->rq[i].napi);
 		virtnet_napi_tx_disable(&vi->sq[i].napi);
 	}

^ permalink raw reply related

* [bpf-next V4 PATCH 12/14] xdp: generic XDP handling of xdp_rxq_info
From: Jesper Dangaard Brouer @ 2018-01-03 10:26 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: netdev, Jesper Dangaard Brouer, dsahern, gospo, bjorn.topel,
	michael.chan
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

Hook points for xdp_rxq_info:
 * reg  : netif_alloc_rx_queues
 * unreg: netif_free_rx_queues

The net_device have some members (num_rx_queues + real_num_rx_queues)
and data-area (dev->_rx with struct netdev_rx_queue's) that were
primarily used for exporting information about RPS (CONFIG_RPS) queues
to sysfs (CONFIG_SYSFS).

For generic XDP extend struct netdev_rx_queue with the xdp_rxq_info,
and remove some of the CONFIG_SYSFS ifdefs.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 include/linux/netdevice.h |    2 +
 net/core/dev.c            |   69 ++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 352066e4eeef..143aa9afc30f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -44,6 +44,7 @@
 #include <net/dcbnl.h>
 #endif
 #include <net/netprio_cgroup.h>
+#include <net/xdp.h>
 
 #include <linux/netdev_features.h>
 #include <linux/neighbour.h>
@@ -686,6 +687,7 @@ struct netdev_rx_queue {
 #endif
 	struct kobject			kobj;
 	struct net_device		*dev;
+	struct xdp_rxq_info		xdp_rxq;
 } ____cacheline_aligned_in_smp;
 
 /*
diff --git a/net/core/dev.c b/net/core/dev.c
index 2eb66c0d9cdb..d7925ef8743d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3906,9 +3906,33 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 	return NET_RX_DROP;
 }
 
+static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	struct netdev_rx_queue *rxqueue;
+
+	rxqueue = dev->_rx;
+
+	if (skb_rx_queue_recorded(skb)) {
+		u16 index = skb_get_rx_queue(skb);
+
+		if (unlikely(index >= dev->real_num_rx_queues)) {
+			WARN_ONCE(dev->real_num_rx_queues > 1,
+				  "%s received packet on queue %u, but number "
+				  "of RX queues is %u\n",
+				  dev->name, index, dev->real_num_rx_queues);
+
+			return rxqueue; /* Return first rxqueue */
+		}
+		rxqueue += index;
+	}
+	return rxqueue;
+}
+
 static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 				     struct bpf_prog *xdp_prog)
 {
+	struct netdev_rx_queue *rxqueue;
 	u32 metalen, act = XDP_DROP;
 	struct xdp_buff xdp;
 	void *orig_data;
@@ -3952,6 +3976,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	xdp.data_hard_start = skb->data - skb_headroom(skb);
 	orig_data = xdp.data;
 
+	rxqueue = netif_get_rxqueue(skb);
+	xdp.rxq = &rxqueue->xdp_rxq;
+
 	act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
 	off = xdp.data - orig_data;
@@ -7589,12 +7616,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 }
 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
 
-#ifdef CONFIG_SYSFS
 static int netif_alloc_rx_queues(struct net_device *dev)
 {
 	unsigned int i, count = dev->num_rx_queues;
 	struct netdev_rx_queue *rx;
 	size_t sz = count * sizeof(*rx);
+	int err = 0;
 
 	BUG_ON(count < 1);
 
@@ -7604,11 +7631,39 @@ static int netif_alloc_rx_queues(struct net_device *dev)
 
 	dev->_rx = rx;
 
-	for (i = 0; i < count; i++)
+	for (i = 0; i < count; i++) {
 		rx[i].dev = dev;
+
+		/* XDP RX-queue setup */
+		err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
+		if (err < 0)
+			goto err_rxq_info;
+	}
 	return 0;
+
+err_rxq_info:
+	/* Rollback successful reg's and free other resources */
+	while (i--)
+		xdp_rxq_info_unreg(&rx[i].xdp_rxq);
+	kfree(dev->_rx);
+	dev->_rx = NULL;
+	return err;
+}
+
+static void netif_free_rx_queues(struct net_device *dev)
+{
+	unsigned int i, count = dev->num_rx_queues;
+	struct netdev_rx_queue *rx;
+
+	/* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
+	if (!dev->_rx)
+		return;
+
+	rx = dev->_rx;
+
+	for (i = 0; i < count; i++)
+		xdp_rxq_info_unreg(&rx[i].xdp_rxq);
 }
-#endif
 
 static void netdev_init_one_queue(struct net_device *dev,
 				  struct netdev_queue *queue, void *_unused)
@@ -8169,12 +8224,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 		return NULL;
 	}
 
-#ifdef CONFIG_SYSFS
 	if (rxqs < 1) {
 		pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
 		return NULL;
 	}
-#endif
 
 	alloc_size = sizeof(struct net_device);
 	if (sizeof_priv) {
@@ -8231,12 +8284,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	if (netif_alloc_netdev_queues(dev))
 		goto free_all;
 
-#ifdef CONFIG_SYSFS
 	dev->num_rx_queues = rxqs;
 	dev->real_num_rx_queues = rxqs;
 	if (netif_alloc_rx_queues(dev))
 		goto free_all;
-#endif
 
 	strcpy(dev->name, name);
 	dev->name_assign_type = name_assign_type;
@@ -8275,9 +8326,7 @@ void free_netdev(struct net_device *dev)
 
 	might_sleep();
 	netif_free_tx_queues(dev);
-#ifdef CONFIG_SYSFS
-	kvfree(dev->_rx);
-#endif
+	netif_free_rx_queues(dev);
 
 	kfree(rcu_dereference_protected(dev->ingress_queue, 1));
 

^ permalink raw reply related

* [bpf-next V4 PATCH 13/14] bpf: finally expose xdp_rxq_info to XDP bpf-programs
From: Jesper Dangaard Brouer @ 2018-01-03 10:26 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: netdev, Jesper Dangaard Brouer, Alexei Starovoitov, dsahern,
	gospo, bjorn.topel, michael.chan
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

Now all XDP driver have been updated to setup xdp_rxq_info and assign
this to xdp_buff->rxq.  Thus, it is now safe to enable access to some
of the xdp_rxq_info struct members.

This patch extend xdp_md and expose UAPI to userspace for
ingress_ifindex and rx_queue_index.  Access happens via bpf
instruction rewrite, that load data directly from struct xdp_rxq_info.

* ingress_ifindex map to xdp_rxq_info->dev->ifindex
* rx_queue_index  map to xdp_rxq_info->queue_index

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h |    3 +++
 net/core/filter.c        |   19 +++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 69eabfcb9bdb..a6000a95d40e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -899,6 +899,9 @@ struct xdp_md {
 	__u32 data;
 	__u32 data_end;
 	__u32 data_meta;
+	/* Below access go though struct xdp_rxq_info */
+	__u32 ingress_ifindex; /* rxq->dev->ifindex */
+	__u32 rx_queue_index;  /* rxq->queue_index  */
 };
 
 enum sk_action {
diff --git a/net/core/filter.c b/net/core/filter.c
index 130b842c3a15..acdb94c0e97f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4304,6 +4304,25 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 				      si->dst_reg, si->src_reg,
 				      offsetof(struct xdp_buff, data_end));
 		break;
+	case offsetof(struct xdp_md, ingress_ifindex):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct xdp_buff, rxq));
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
+				      si->dst_reg, si->dst_reg,
+				      offsetof(struct xdp_rxq_info, dev));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct net_device,
+						     ifindex, 4, target_size));
+		break;
+	case offsetof(struct xdp_md, rx_queue_index):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct xdp_buff, rxq));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct xdp_rxq_info,
+						queue_index, 4, target_size));
+		break;
 	}
 
 	return insn - insn_buf;

^ permalink raw reply related

* [bpf-next V4 PATCH 14/14] samples/bpf: program demonstrating access to xdp_rxq_info
From: Jesper Dangaard Brouer @ 2018-01-03 10:26 UTC (permalink / raw)
  To: Daniel Borkmann, Alexei Starovoitov
  Cc: netdev, Jesper Dangaard Brouer, dsahern, gospo, bjorn.topel,
	michael.chan
In-Reply-To: <151497504273.18176.10177133999720101758.stgit@firesoul>

This sample program can be used for monitoring and reporting how many
packets per sec (pps) are received per NIC RX queue index and which
CPU processed the packet. In itself it is a useful tool for quickly
identifying RSS imbalance issues, see below.

The default XDP action is XDP_PASS in-order to provide a monitor
mode. For benchmarking purposes it is possible to specify other XDP
actions on the cmdline --action.

Output below shows an imbalance RSS case where most RXQ's deliver to
CPU-0 while CPU-2 only get packets from a single RXQ.  Looking at
things from a CPU level the two CPUs are processing approx the same
amount, BUT looking at the rx_queue_index levels it is clear that
RXQ-2 receive much better service, than other RXQs which all share CPU-0.

Running XDP on dev:i40e1 (ifindex:3) action:XDP_PASS
XDP stats       CPU     pps         issue-pps
XDP-RX CPU      0       900,473     0
XDP-RX CPU      2       906,921     0
XDP-RX CPU      total   1,807,395

RXQ stats       RXQ:CPU pps         issue-pps
rx_queue_index    0:0   180,098     0
rx_queue_index    0:sum 180,098
rx_queue_index    1:0   180,098     0
rx_queue_index    1:sum 180,098
rx_queue_index    2:2   906,921     0
rx_queue_index    2:sum 906,921
rx_queue_index    3:0   180,098     0
rx_queue_index    3:sum 180,098
rx_queue_index    4:0   180,082     0
rx_queue_index    4:sum 180,082
rx_queue_index    5:0   180,093     0
rx_queue_index    5:sum 180,093

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
 samples/bpf/Makefile            |    4 
 samples/bpf/xdp_rxq_info_kern.c |   96 +++++++
 samples/bpf/xdp_rxq_info_user.c |  531 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 631 insertions(+)
 create mode 100644 samples/bpf/xdp_rxq_info_kern.c
 create mode 100644 samples/bpf/xdp_rxq_info_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4fb944a7ecf8..3ff7a05bea9a 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -41,6 +41,7 @@ hostprogs-y += xdp_redirect
 hostprogs-y += xdp_redirect_map
 hostprogs-y += xdp_redirect_cpu
 hostprogs-y += xdp_monitor
+hostprogs-y += xdp_rxq_info
 hostprogs-y += syscall_tp
 
 # Libbpf dependencies
@@ -90,6 +91,7 @@ xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
 xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
 xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
 xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
+xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
 syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
 
 # Tell kbuild to always build the programs
@@ -139,6 +141,7 @@ always += xdp_redirect_kern.o
 always += xdp_redirect_map_kern.o
 always += xdp_redirect_cpu_kern.o
 always += xdp_monitor_kern.o
+always += xdp_rxq_info_kern.o
 always += syscall_tp_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
@@ -182,6 +185,7 @@ HOSTLOADLIBES_xdp_redirect += -lelf
 HOSTLOADLIBES_xdp_redirect_map += -lelf
 HOSTLOADLIBES_xdp_redirect_cpu += -lelf
 HOSTLOADLIBES_xdp_monitor += -lelf
+HOSTLOADLIBES_xdp_rxq_info += -lelf
 HOSTLOADLIBES_syscall_tp += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c
new file mode 100644
index 000000000000..3fd209291653
--- /dev/null
+++ b/samples/bpf/xdp_rxq_info_kern.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ *
+ *  Example howto extract XDP RX-queue info
+ */
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Config setup from with userspace
+ *
+ * User-side setup ifindex in config_map, to verify that
+ * ctx->ingress_ifindex is correct (against configured ifindex)
+ */
+struct config {
+	__u32 action;
+	int ifindex;
+};
+struct bpf_map_def SEC("maps") config_map = {
+	.type		= BPF_MAP_TYPE_ARRAY,
+	.key_size	= sizeof(int),
+	.value_size	= sizeof(struct config),
+	.max_entries	= 1,
+};
+
+/* Common stats data record (shared with userspace) */
+struct datarec {
+	__u64 processed;
+	__u64 issue;
+};
+
+struct bpf_map_def SEC("maps") stats_global_map = {
+	.type		= BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size	= sizeof(u32),
+	.value_size	= sizeof(struct datarec),
+	.max_entries	= 1,
+};
+
+#define MAX_RXQs 64
+
+/* Stats per rx_queue_index (per CPU) */
+struct bpf_map_def SEC("maps") rx_queue_index_map = {
+	.type		= BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size	= sizeof(u32),
+	.value_size	= sizeof(struct datarec),
+	.max_entries	= MAX_RXQs + 1,
+};
+
+SEC("xdp_prog0")
+int  xdp_prognum0(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data     = (void *)(long)ctx->data;
+	struct datarec *rec, *rxq_rec;
+	int ingress_ifindex;
+	struct config *config;
+	u32 key = 0;
+
+	/* Global stats record */
+	rec = bpf_map_lookup_elem(&stats_global_map, &key);
+	if (!rec)
+		return XDP_ABORTED;
+	rec->processed++;
+
+	/* Accessing ctx->ingress_ifindex, cause BPF to rewrite BPF
+	 * instructions inside kernel to access xdp_rxq->dev->ifindex
+	 */
+	ingress_ifindex = ctx->ingress_ifindex;
+
+	config = bpf_map_lookup_elem(&config_map, &key);
+	if (!config)
+		return XDP_ABORTED;
+
+	/* Simple test: check ctx provided ifindex is as expected */
+	if (ingress_ifindex != config->ifindex) {
+		/* count this error case */
+		rec->issue++;
+		return XDP_ABORTED;
+	}
+
+	/* Update stats per rx_queue_index. Handle if rx_queue_index
+	 * is larger than stats map can contain info for.
+	 */
+	key = ctx->rx_queue_index;
+	if (key >= MAX_RXQs)
+		key = MAX_RXQs;
+	rxq_rec = bpf_map_lookup_elem(&rx_queue_index_map, &key);
+	if (!rxq_rec)
+		return XDP_ABORTED;
+	rxq_rec->processed++;
+	if (key == MAX_RXQs)
+		rxq_rec->issue++;
+
+	return config->action;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
new file mode 100644
index 000000000000..32430e8b3a6a
--- /dev/null
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -0,0 +1,531 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ */
+static const char *__doc__ = " XDP RX-queue info extract example\n\n"
+	"Monitor how many packets per sec (pps) are received\n"
+	"per NIC RX queue index and which CPU processed the packet\n"
+	;
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <locale.h>
+#include <sys/resource.h>
+#include <getopt.h>
+#include <net/if.h>
+#include <time.h>
+
+#include <arpa/inet.h>
+#include <linux/if_link.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "bpf_util.h"
+
+static int ifindex = -1;
+static char ifname_buf[IF_NAMESIZE];
+static char *ifname;
+
+static __u32 xdp_flags;
+
+/* Exit return codes */
+#define EXIT_OK		0
+#define EXIT_FAIL		1
+#define EXIT_FAIL_OPTION	2
+#define EXIT_FAIL_XDP		3
+#define EXIT_FAIL_BPF		4
+#define EXIT_FAIL_MEM		5
+
+static const struct option long_options[] = {
+	{"help",	no_argument,		NULL, 'h' },
+	{"dev",		required_argument,	NULL, 'd' },
+	{"skb-mode",	no_argument,		NULL, 'S' },
+	{"sec",		required_argument,	NULL, 's' },
+	{"no-separators", no_argument,		NULL, 'z' },
+	{"action",	required_argument,	NULL, 'a' },
+	{0, 0, NULL,  0 }
+};
+
+static void int_exit(int sig)
+{
+	fprintf(stderr,
+		"Interrupted: Removing XDP program on ifindex:%d device:%s\n",
+		ifindex, ifname);
+	if (ifindex > -1)
+		set_link_xdp_fd(ifindex, -1, xdp_flags);
+	exit(EXIT_OK);
+}
+
+struct config {
+	__u32 action;
+	int ifindex;
+};
+#define XDP_ACTION_MAX (XDP_TX + 1)
+#define XDP_ACTION_MAX_STRLEN 11
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+	[XDP_ABORTED]	= "XDP_ABORTED",
+	[XDP_DROP]	= "XDP_DROP",
+	[XDP_PASS]	= "XDP_PASS",
+	[XDP_TX]	= "XDP_TX",
+};
+
+static const char *action2str(int action)
+{
+	if (action < XDP_ACTION_MAX)
+		return xdp_action_names[action];
+	return NULL;
+}
+
+static int parse_xdp_action(char *action_str)
+{
+	size_t maxlen;
+	__u64 action = -1;
+	int i;
+
+	for (i = 0; i < XDP_ACTION_MAX; i++) {
+		maxlen = XDP_ACTION_MAX_STRLEN;
+		if (strncmp(xdp_action_names[i], action_str, maxlen) == 0) {
+			action = i;
+			break;
+		}
+	}
+	return action;
+}
+
+static void list_xdp_actions(void)
+{
+	int i;
+
+	printf("Available XDP --action <options>\n");
+	for (i = 0; i < XDP_ACTION_MAX; i++)
+		printf("\t%s\n", xdp_action_names[i]);
+	printf("\n");
+}
+
+static void usage(char *argv[])
+{
+	int i;
+
+	printf("\nDOCUMENTATION:\n%s\n", __doc__);
+	printf(" Usage: %s (options-see-below)\n", argv[0]);
+	printf(" Listing options:\n");
+	for (i = 0; long_options[i].name != 0; i++) {
+		printf(" --%-12s", long_options[i].name);
+		if (long_options[i].flag != NULL)
+			printf(" flag (internal value:%d)",
+				*long_options[i].flag);
+		else
+			printf(" short-option: -%c",
+				long_options[i].val);
+		printf("\n");
+	}
+	printf("\n");
+	list_xdp_actions();
+}
+
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+static __u64 gettime(void)
+{
+	struct timespec t;
+	int res;
+
+	res = clock_gettime(CLOCK_MONOTONIC, &t);
+	if (res < 0) {
+		fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
+		exit(EXIT_FAIL);
+	}
+	return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+/* Common stats data record shared with _kern.c */
+struct datarec {
+	__u64 processed;
+	__u64 issue;
+};
+struct record {
+	__u64 timestamp;
+	struct datarec total;
+	struct datarec *cpu;
+};
+struct stats_record {
+	struct record stats;
+	struct record *rxq;
+};
+
+static struct datarec *alloc_record_per_cpu(void)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	struct datarec *array;
+	size_t size;
+
+	size = sizeof(struct datarec) * nr_cpus;
+	array = malloc(size);
+	memset(array, 0, size);
+	if (!array) {
+		fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
+		exit(EXIT_FAIL_MEM);
+	}
+	return array;
+}
+
+static struct record *alloc_record_per_rxq(void)
+{
+	unsigned int nr_rxqs = map_data[2].def.max_entries;
+	struct record *array;
+	size_t size;
+
+	size = sizeof(struct record) * nr_rxqs;
+	array = malloc(size);
+	memset(array, 0, size);
+	if (!array) {
+		fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs);
+		exit(EXIT_FAIL_MEM);
+	}
+	return array;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+	unsigned int nr_rxqs = map_data[2].def.max_entries;
+	struct stats_record *rec;
+	int i;
+
+	rec = malloc(sizeof(*rec));
+	memset(rec, 0, sizeof(*rec));
+	if (!rec) {
+		fprintf(stderr, "Mem alloc error\n");
+		exit(EXIT_FAIL_MEM);
+	}
+	rec->rxq = alloc_record_per_rxq();
+	for (i = 0; i < nr_rxqs; i++)
+		rec->rxq[i].cpu = alloc_record_per_cpu();
+
+	rec->stats.cpu = alloc_record_per_cpu();
+	return rec;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+	unsigned int nr_rxqs = map_data[2].def.max_entries;
+	int i;
+
+	for (i = 0; i < nr_rxqs; i++)
+		free(r->rxq[i].cpu);
+
+	free(r->rxq);
+	free(r->stats.cpu);
+	free(r);
+}
+
+static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
+{
+	/* For percpu maps, userspace gets a value per possible CPU */
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	struct datarec values[nr_cpus];
+	__u64 sum_processed = 0;
+	__u64 sum_issue = 0;
+	int i;
+
+	if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+		fprintf(stderr,
+			"ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+		return false;
+	}
+	/* Get time as close as possible to reading map contents */
+	rec->timestamp = gettime();
+
+	/* Record and sum values from each CPU */
+	for (i = 0; i < nr_cpus; i++) {
+		rec->cpu[i].processed = values[i].processed;
+		sum_processed        += values[i].processed;
+		rec->cpu[i].issue = values[i].issue;
+		sum_issue        += values[i].issue;
+	}
+	rec->total.processed = sum_processed;
+	rec->total.issue     = sum_issue;
+	return true;
+}
+
+static void stats_collect(struct stats_record *rec)
+{
+	int fd, i, max_rxqs;
+
+	fd = map_data[1].fd; /* map: stats_global_map */
+	map_collect_percpu(fd, 0, &rec->stats);
+
+	fd = map_data[2].fd; /* map: rx_queue_index_map */
+	max_rxqs = map_data[2].def.max_entries;
+	for (i = 0; i < max_rxqs; i++)
+		map_collect_percpu(fd, i, &rec->rxq[i]);
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+	double period_ = 0;
+	__u64 period = 0;
+
+	period = r->timestamp - p->timestamp;
+	if (period > 0)
+		period_ = ((double) period / NANOSEC_PER_SEC);
+
+	return period_;
+}
+
+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
+{
+	__u64 packets = 0;
+	__u64 pps = 0;
+
+	if (period_ > 0) {
+		packets = r->processed - p->processed;
+		pps = packets / period_;
+	}
+	return pps;
+}
+
+static __u64 calc_errs_pps(struct datarec *r,
+			    struct datarec *p, double period_)
+{
+	__u64 packets = 0;
+	__u64 pps = 0;
+
+	if (period_ > 0) {
+		packets = r->issue - p->issue;
+		pps = packets / period_;
+	}
+	return pps;
+}
+
+static void stats_print(struct stats_record *stats_rec,
+			struct stats_record *stats_prev,
+			int action)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	unsigned int nr_rxqs = map_data[2].def.max_entries;
+	double pps = 0, err = 0;
+	struct record *rec, *prev;
+	double t;
+	int rxq;
+	int i;
+
+	/* Header */
+	printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n",
+	       ifname, ifindex, action2str(action));
+
+	/* stats_global_map */
+	{
+		char *fmt_rx = "%-15s %-7d %'-11.0f %'-10.0f %s\n";
+		char *fm2_rx = "%-15s %-7s %'-11.0f\n";
+		char *errstr = "";
+
+		printf("%-15s %-7s %-11s %-11s\n",
+		       "XDP stats", "CPU", "pps", "issue-pps");
+
+		rec  =  &stats_rec->stats;
+		prev = &stats_prev->stats;
+		t = calc_period(rec, prev);
+		for (i = 0; i < nr_cpus; i++) {
+			struct datarec *r = &rec->cpu[i];
+			struct datarec *p = &prev->cpu[i];
+
+			pps = calc_pps     (r, p, t);
+			err = calc_errs_pps(r, p, t);
+			if (err > 0)
+				errstr = "invalid-ifindex";
+			if (pps > 0)
+				printf(fmt_rx, "XDP-RX CPU",
+					i, pps, err, errstr);
+		}
+		pps  = calc_pps     (&rec->total, &prev->total, t);
+		err  = calc_errs_pps(&rec->total, &prev->total, t);
+		printf(fm2_rx, "XDP-RX CPU", "total", pps, err);
+	}
+
+	/* rx_queue_index_map */
+	printf("\n%-15s %-7s %-11s %-11s\n",
+	       "RXQ stats", "RXQ:CPU", "pps", "issue-pps");
+
+	for (rxq = 0; rxq < nr_rxqs; rxq++) {
+		char *fmt_rx = "%-15s %3d:%-3d %'-11.0f %'-10.0f %s\n";
+		char *fm2_rx = "%-15s %3d:%-3s %'-11.0f\n";
+		char *errstr = "";
+		int rxq_ = rxq;
+
+		/* Last RXQ in map catch overflows */
+		if (rxq_ == nr_rxqs - 1)
+			rxq_ = -1;
+
+		rec  =  &stats_rec->rxq[rxq];
+		prev = &stats_prev->rxq[rxq];
+		t = calc_period(rec, prev);
+		for (i = 0; i < nr_cpus; i++) {
+			struct datarec *r = &rec->cpu[i];
+			struct datarec *p = &prev->cpu[i];
+
+			pps = calc_pps     (r, p, t);
+			err = calc_errs_pps(r, p, t);
+			if (err > 0) {
+				if (rxq_ == -1)
+					errstr = "map-overflow-RXQ";
+				else
+					errstr = "err";
+			}
+			if (pps > 0)
+				printf(fmt_rx, "rx_queue_index",
+				       rxq_, i, pps, err, errstr);
+		}
+		pps  = calc_pps     (&rec->total, &prev->total, t);
+		err  = calc_errs_pps(&rec->total, &prev->total, t);
+		if (pps || err)
+			printf(fm2_rx, "rx_queue_index", rxq_, "sum", pps, err);
+	}
+}
+
+
+/* Pointer swap trick */
+static inline void swap(struct stats_record **a, struct stats_record **b)
+{
+	struct stats_record *tmp;
+
+	tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
+
+static void stats_poll(int interval, int action)
+{
+	struct stats_record *record, *prev;
+
+	record = alloc_stats_record();
+	prev   = alloc_stats_record();
+	stats_collect(record);
+
+	while (1) {
+		swap(&prev, &record);
+		stats_collect(record);
+		stats_print(record, prev, action);
+		sleep(interval);
+	}
+
+	free_stats_record(record);
+	free_stats_record(prev);
+}
+
+
+int main(int argc, char **argv)
+{
+	struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+	bool use_separators = true;
+	struct config cfg = { 0 };
+	char filename[256];
+	int longindex = 0;
+	int interval = 2;
+	__u32 key = 0;
+	int opt, err;
+
+	char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
+	int action = XDP_PASS; /* Default action */
+	char *action_str = NULL;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
+
+	if (load_bpf_file(filename)) {
+		fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf);
+		return EXIT_FAIL;
+	}
+
+	if (!prog_fd[0]) {
+		fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
+		return EXIT_FAIL;
+	}
+
+	/* Parse commands line args */
+	while ((opt = getopt_long(argc, argv, "hSd:",
+				  long_options, &longindex)) != -1) {
+		switch (opt) {
+		case 'd':
+			if (strlen(optarg) >= IF_NAMESIZE) {
+				fprintf(stderr, "ERR: --dev name too long\n");
+				goto error;
+			}
+			ifname = (char *)&ifname_buf;
+			strncpy(ifname, optarg, IF_NAMESIZE);
+			ifindex = if_nametoindex(ifname);
+			if (ifindex == 0) {
+				fprintf(stderr,
+					"ERR: --dev name unknown err(%d):%s\n",
+					errno, strerror(errno));
+				goto error;
+			}
+			break;
+		case 's':
+			interval = atoi(optarg);
+			break;
+		case 'S':
+			xdp_flags |= XDP_FLAGS_SKB_MODE;
+			break;
+		case 'z':
+			use_separators = false;
+			break;
+		case 'a':
+			action_str = (char *)&action_str_buf;
+			strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN);
+			break;
+		case 'h':
+		error:
+		default:
+			usage(argv);
+			return EXIT_FAIL_OPTION;
+		}
+	}
+	/* Required option */
+	if (ifindex == -1) {
+		fprintf(stderr, "ERR: required option --dev missing\n");
+		usage(argv);
+		return EXIT_FAIL_OPTION;
+	}
+	cfg.ifindex = ifindex;
+
+	/* Parse action string */
+	if (action_str) {
+		action = parse_xdp_action(action_str);
+		if (action < 0) {
+			fprintf(stderr, "ERR: Invalid XDP --action: %s\n",
+				action_str);
+			list_xdp_actions();
+			return EXIT_FAIL_OPTION;
+		}
+	}
+	cfg.action = action;
+
+	/* Trick to pretty printf with thousands separators use %' */
+	if (use_separators)
+		setlocale(LC_NUMERIC, "en_US");
+
+	/* User-side setup ifindex in config_map */
+	err = bpf_map_update_elem(map_fd[0], &key, &cfg, 0);
+	if (err) {
+		fprintf(stderr, "Store config failed (err:%d)\n", err);
+		exit(EXIT_FAIL_BPF);
+	}
+
+	/* Remove XDP program when program is interrupted */
+	signal(SIGINT, int_exit);
+
+	if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
+		fprintf(stderr, "link set xdp fd failed\n");
+		return EXIT_FAIL_XDP;
+	}
+
+	stats_poll(interval, action);
+	return EXIT_OK;
+}

^ permalink raw reply related

* Re: [PATCH net] sctp: fix error path in sctp_stream_init
From: Neil Horman @ 2018-01-03 10:32 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner; +Cc: netdev, linux-sctp, Xin Long, Vlad Yasevich
In-Reply-To: <74b65f56d6912704f3ef79a46242cc2ca567e4e3.1514928664.git.marcelo.leitner@gmail.com>

On Tue, Jan 02, 2018 at 07:44:37PM -0200, Marcelo Ricardo Leitner wrote:
> syzbot noticed a NULL pointer dereference panic in sctp_stream_free()
> which was caused by an incomplete error handling in sctp_stream_init().
> By not clearing stream->outcnt, it made a for() in sctp_stream_free()
> think that it had elements to free, but not, leading to the panic.
> 
> As suggested by Xin Long, this patch also simplifies the error path by
> moving it to the only if() that uses it.
> 
> See-also: https://www.spinics.net/lists/netdev/msg473756.html
> See-also: https://www.spinics.net/lists/netdev/msg465024.html
> Reported-by: syzbot <syzkaller@googlegroups.com>
> Fixes: f952be79cebd ("sctp: introduce struct sctp_stream_out_ext")
> Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
> ---
>  net/sctp/stream.c | 22 ++++++++++------------
>  1 file changed, 10 insertions(+), 12 deletions(-)
> 
> diff --git a/net/sctp/stream.c b/net/sctp/stream.c
> index 76ea66be0bbee7d3f018676d52c8b95ba06dbcb1..524dfeb94c41ab1ac735746a8acf93af1c96ae48 100644
> --- a/net/sctp/stream.c
> +++ b/net/sctp/stream.c
> @@ -156,9 +156,9 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
>  	sctp_stream_outq_migrate(stream, NULL, outcnt);
>  	sched->sched_all(stream);
>  
> -	i = sctp_stream_alloc_out(stream, outcnt, gfp);
> -	if (i)
> -		return i;
> +	ret = sctp_stream_alloc_out(stream, outcnt, gfp);
> +	if (ret)
> +		goto out;
>  
>  	stream->outcnt = outcnt;
>  	for (i = 0; i < stream->outcnt; i++)
> @@ -170,19 +170,17 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
>  	if (!incnt)
>  		goto out;
>  
> -	i = sctp_stream_alloc_in(stream, incnt, gfp);
> -	if (i) {
> -		ret = -ENOMEM;
> -		goto free;
> +	ret = sctp_stream_alloc_in(stream, incnt, gfp);
> +	if (ret) {
> +		sched->free(stream);
> +		kfree(stream->out);
> +		stream->out = NULL;
> +		stream->outcnt = 0;
> +		goto out;
>  	}
>  
>  	stream->incnt = incnt;
> -	goto out;
>  
> -free:
> -	sched->free(stream);
> -	kfree(stream->out);
> -	stream->out = NULL;
>  out:
>  	return ret;
>  }
> -- 
> 2.14.3
> 
> 
Acked-by: Neil Horman <nhorman@tuxdriver.com>

^ permalink raw reply

* Re: [PATCH] flex_can: Correct the checking for frame length in flexcan_start_xmit()
From: Marc Kleine-Budde @ 2018-01-03 10:35 UTC (permalink / raw)
  To: Luu An Phu, wg; +Cc: linux-can, netdev, stefan-gabriel.mirea
In-Reply-To: <1514864658-13194-1-git-send-email-phu.luuan@nxp.com>


[-- Attachment #1.1: Type: text/plain, Size: 766 bytes --]

On 01/02/2018 04:44 AM, Luu An Phu wrote:
> From: "phu.luuan" <phu.luuan@nxp.com>
> 
> The flexcan_start_xmit() function compares the frame length with data register
> length to write frame content into data[0] and data[1] register. Data register
> length is 4 bytes and frame maximum length is 8 bytes.
> 
> Fix the check that compares frame length with 3. Because the register length
> is 4.
> 
> Signed-off-by: Luu An Phu <phu.luuan@nxp.com>

Applied to can.

Tnx,
Marc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply

* Re: [PATCH] net: ipv4: emulate READ_ONCE() on ->hdrincl bit-field in raw_sendmsg()
From: Stefano Brivio @ 2018-01-03 10:37 UTC (permalink / raw)
  To: Nicolai Stange
  Cc: David S. Miller, Alexey Kuznetsov, Hideaki YOSHIFUJI,
	Mohamed Ghannam, Michal Kubecek, Miroslav Benes, netdev,
	linux-kernel
In-Reply-To: <87vagjjnpn.fsf@suse.de>

Hi Nicolai,

On Wed, 03 Jan 2018 10:28:20 +0100
Nicolai Stange <nstange@suse.de> wrote:

> Hi Stefano,
> 
> Stefano Brivio <sbrivio@redhat.com> writes:
> 
> > On Tue,  2 Jan 2018 17:30:20 +0100
> > Nicolai Stange <nstange@suse.de> wrote:
> >  
> >> [...]
> >>
> >> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
> >> index 5b9bd5c33d9d..e84290c28c0c 100644
> >> --- a/net/ipv4/raw.c
> >> +++ b/net/ipv4/raw.c
> >> @@ -513,16 +513,18 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> >>  	int err;
> >>  	struct ip_options_data opt_copy;
> >>  	struct raw_frag_vec rfv;
> >> -	int hdrincl;
> >> +	int hdrincl, __hdrincl;
> >>  
> >>  	err = -EMSGSIZE;
> >>  	if (len > 0xFFFF)
> >>  		goto out;
> >>  
> >>  	/* hdrincl should be READ_ONCE(inet->hdrincl)
> >> -	 * but READ_ONCE() doesn't work with bit fields
> >> +	 * but READ_ONCE() doesn't work with bit fields.
> >> +	 * Emulate it by doing the READ_ONCE() from an intermediate int.
> >>  	 */
> >> -	hdrincl = inet->hdrincl;
> >> +	__hdrincl = inet->hdrincl;
> >> +	hdrincl = READ_ONCE(__hdrincl);  
> >
> > I guess you don't actually need to use a third variable. What about
> > doing READ_ONCE() on hdrincl itself after the first assignment?
> >
> > Perhaps something like the patch below -- applies to net.git, yields
> > same binary output as your version with gcc 6, looks IMHO more
> > straightforward:
> >
> > diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
> > index 125c1eab3eaa..8c2f783a95fc 100644
> > --- a/net/ipv4/raw.c
> > +++ b/net/ipv4/raw.c
> > @@ -519,10 +519,12 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
> >  	if (len > 0xFFFF)
> >  		goto out;
> >  
> > -	/* hdrincl should be READ_ONCE(inet->hdrincl)
> > -	 * but READ_ONCE() doesn't work with bit fields
> > +	/* hdrincl should be READ_ONCE(inet->hdrincl) but READ_ONCE() doesn't
> > +	 * work with bit fields. Emulate it by adding a further sequence point.
> >  	 */
> >  	hdrincl = inet->hdrincl;
> > +	hdrincl = READ_ONCE(hdrincl);
> > +  
> 
> Yes, this does also work. In fact, after having been lowered into SSA
> form, it should be equivalent to what I posted.
> 
> So, it's a matter of preference/style and I'd leave the decision on
> this to the maintainers -- for me, either way is fine.
> 
> I don't like the "sequence point" wording in the comment above though:
> AFAICS, if taken in the meaning of C99, it's not any sequence point but
> the volatile access in READ_ONCE() which ensures that there won't be any
> reloads from ->hdrincl. If you don't mind, I'll adjust that comment if
> asked to resend with your solution.

Well, by "by adding a further sequence point" I refer to what we have
to do to emulate READ_ONCE(), not to the reason why we need READ_ONCE().

However, this is a likely sign that my comment isn't that clear either.
So unless you have better ideas, I would go with:

+	/* hdrincl should be READ_ONCE(inet->hdrincl) but READ_ONCE() doesn't
+	 * work with bit fields. Doing this indirectly yields the same result.

but I really hope you have a better idea. :)

-- 
Stefano

^ permalink raw reply

* Re: ACPI issues on cold power on [bisected]
From: Jonathan McDowell @ 2018-01-03 10:38 UTC (permalink / raw)
  To: Joonsoo Kim
  Cc: Rafael J. Wysocki, ACPI Devel Maling List,
	Linux Kernel Mailing List, Linux Memory Management List, netdev
In-Reply-To: <20180103021129.GB26517@js1304-P5Q-DELUXE>

On Wed, Jan 03, 2018 at 11:11:29AM +0900, Joonsoo Kim wrote:
> On Tue, Jan 02, 2018 at 11:25:01AM +0100, Rafael J. Wysocki wrote:
> > On Tue, Jan 2, 2018 at 3:54 AM, Joonsoo Kim <iamjoonsoo.kim@lge.com> wrote:
> > > On Fri, Dec 29, 2017 at 04:36:59PM +0000, Jonathan McDowell wrote:
> > >> On Fri, Dec 22, 2017 at 09:21:09AM +0900, Joonsoo Kim wrote:
> > >> > On Fri, Dec 08, 2017 at 03:11:59PM +0000, Jonathan McDowell wrote:
> > >> > > I've been sitting on this for a while and should have spent time to
> > >> > > investigate sooner, but it's been an odd failure mode that wasn't quite
> > >> > > obvious.
> > >> > >
> > >> > > In 4.9 if I cold power on my laptop (Dell E7240) it fails to boot - I
> > >> > > don't see anything after grub says its booting. In 4.10 onwards the
> > >> > > laptop boots, but I get an Oops as part of the boot and ACPI is unhappy
> > >> > > (no suspend, no clean poweroff, no ACPI buttons). The Oops is below;
> > >> > > taken from 4.12 as that's the most recent error dmesg I have saved but
> > >> > > also seen back in 4.10. It's always address 0x30 for the dereference.
> > >> > >
> > >> > > Rebooting the laptop does not lead to these problems; it's *only* from a
> > >> > > complete cold boot that they arise (which didn't help me in terms of
> > >> > > being able to reliably bisect). Once I realised that I was able to
> > >> > > bisect, but it leads me to an odd commit:
> > >> > >
> > >> > > 86d9f48534e800e4d62cdc1b5aaf539f4c1d47d6
> > >> > > (mm/slab: fix kmemcg cache creation delayed issue)
> > >> > >
> > >> > > If I revert this then I can cold boot without problems.
> > >> > >
> > >> > > Also I don't see the problem with a stock Debian kernel, I think because
> > >> > > the ACPI support is modularised.
> > >> >
> > >> > Sorry for late response. I was on a long vacation.
> > >>
> > >> No problem. I've been trying to get around to diagnosing this for a
> > >> while now anyway and this isn't a great time of year for fast responses.
> > >>
> > >> > I have tried to solve the problem however I don't find any clue yet.
> > >> >
> > >> > >From my analysis, oops report shows that 'struct sock *ssk' passed to
> > >> > netlink_broadcast_filtered() is NULL. It means that some of
> > >> > netlink_kernel_create() returns NULL. Maybe, it is due to slab
> > >> > allocation failure. Could you check it by inserting some log on that
> > >> > part? The issue cannot be reproducible in my side so I need your help.
> > >>
> > >> I've added some debug in acpi_bus_generate_netlink_event +
> > >> genlmsg_multicast and the problem seems to be that genlmsg_multicast is
> > >> getting called when init_net.genl_sock has not yet been initialised,
> > >> leading to the NULL deference.
> > >>
> > >> Full dmesg output from a cold 4.14.8 boot at:
> > >>
> > >> https://the.earth.li/~noodles/acpi-problem/dmesg-4.14.8-broken
> > >>
> > >> And the same kernel after a reboot ("shutdown -r now"):
> > >>
> > >> https://the.earth.li/~noodles/acpi-problem/dmesg-4.14.8-working
> > >>
> > >> Patch that I've applied is at
> > >>
> > >> https://the.earth.li/~noodles/acpi-problem/debug-acpi.diff
> > >>
> > >
> > > Thanks for testing! It's very helpful.
> > >
> > >> The interesting difference seems to be:
> > >>
> > >>  PCI: Using ACPI for IRQ routing
> > >> +ACPI: Generating event type 208 (:9DBB5994-A997-11DA-B012-B622A1EF5492)
> > >> +ERROR: init_net.genl_sock is NULL
> > >> +BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
> > >> +IP: netlink_broadcast_filtered+0x20/0x3d0
> > >> +PGD 0 P4D 0
> > >> +Oops: 0000 [#1] SMP
> > >> +Modules linked in:
> > >> +CPU: 0 PID: 29 Comm: kworker/0:1 Not tainted 4.14.8+ #1
> > >> +Hardware name: Dell Inc. Latitude E7240/07RPNV, BIOS A22 10/18/2017
> > >> +Workqueue: kacpi_notify acpi_os_execute_deferred
> > >>
> > >> 9DBB5994-A997-11DA-B012-B622A1EF5492 is the Dell WMI event GUID and
> > >> there's no visible event for it on a reboot, just on a cold power on.
> > >> Some sort of ordering issues such that genl_sock is being initialised
> > >> later with the slab change?
> > >
> > > I have checked that there is an ordering issue.
> > >
> > > genl_init() which initializes init_net->genl_sock is called on
> > > subsys_initcall().
> > >
> > > acpi_wmi_init() which schedules acpi_wmi_notify_handler() to the
> > > workqueue is called on subsys_initcall(), too.
> > > (acpi_wmi_notify_handler() -> acpi_bus_generate_netlink_event() ->
> > > netlink_broadcast())
> > >
> > > In my system, acpi_wmi_init() is called before the genl_init().
> > > Therefore, if the worker is scheduled before genl_init() is done, NULL
> > > derefence would happen.
> > 
> > Does it help to change the subsys_initcall() in wmi.c to subsys_initcall_sync()?
> 
> I guess that it would work. I cannot reproduce the issue so it needs
> to be checked by Jonathan. Jonathan, could you check the problem
> is disappeared with above change?

I have confirmed that the problem also occurs when using SLUB instead of
SLAB, and that switching drivers/platform/x86/wmi.c to use
subsys_initcall_sync() instead of subsys_initcall() fixes the problem
for both. Weirdly I don't see the ACPI 208 event at boot time being
raised once that patch is in place.

J.

-- 
] https://www.earth.li/~noodles/ [] Synonym: word used when you can't  [
]  PGP/GPG Key @ the.earth.li    []       spell the one you want       [
] via keyserver, web or email.   []                                    [
] RSA: 4096/0x94FA372B2DA8B985   []                                    [

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH] 3c59x: fix missing dma_mapping_error check
From: Neil Horman @ 2018-01-03 10:42 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, nhorman, klassert
In-Reply-To: <20180102.214827.2231558813164114000.davem@davemloft.net>

On Tue, Jan 02, 2018 at 09:48:27PM -0500, David Miller wrote:
> From: Neil Horman <nhorman@tuxdriver.com>
> Date: Fri, 29 Dec 2017 11:40:10 -0500
> 
> > @@ -2067,6 +2072,9 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev)
> >  		int len = (skb->len + 3) & ~3;
> >  		vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len,
> >  						PCI_DMA_TODEVICE);
> > +		if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma))
> > +			return NETDEV_TX_OK;
> > +
> 
> This leaks the SKB, right?
> 
Crud, I think you're right, I'll respin this to address today.

> And for the RX cases, it allows the RX ring to deplete to empty which
> tends to hang most chips.  You need to make the DMA failure detection
> early and recycle the RX buffer back to the chip instead of passing
> it up to the stack.
> 
Strictly speaking, I think we're ok here, because the dirty_rx counter creates a
contiguous area to refill, and we will just pick up where we left off on the
next napi poll.

That said, it can still depelete if we get several consecutive
dma_mapping_errors in a sufficiently long period of time that the ring doesn't
ever refill, so yeah, recycling will be better.  That will take a bigger rewrite
of this code.  I'll post something asap.

Neil

^ permalink raw reply

* Re: [net-next: PATCH 0/8] Armada 7k/8k PP2 ACPI support
From: Graeme Gregory @ 2018-01-03 11:00 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: Marcin Wojtas, linux-acpi@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	<netdev@vger.kernel.org>, David S. Miller,
	Russell King - ARM Linux, Rafael J. Wysocki, Andrew Lunn,
	Florian Fainelli, Antoine Ténart, Thomas Petazzoni,
	Gregory CLEMENT, ezequiel.garcia, Nadav Haklai
In-Reply-To: <CAKv+Gu8_6y3dU81ZS4WWCfpRZkfjGo_+_K--e3go_3_xXwBErQ@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 3097 bytes --]

On Mon, Dec 18, 2017 at 10:40:31AM +0100, Ard Biesheuvel wrote:
> On 18 December 2017 at 10:17, Marcin Wojtas <mw@semihalf.com> wrote:
> > Hi,
> >
> > This patchset introduces ACPI support in mvpp2 and mvmdio drivers.
> > First three patches introduce fwnode helpers for obtaining PHY
> > information from nodes and also MDIO fwnode API for registering
> > the bus with its PHY/devices.
> >
> > Following patches update code of the mvmdio and mvpp2 drivers
> > to support ACPI tables handling. The latter is done in 4 steps,
> > as can be seen in the commits. For the details, please
> > refer to the commit messages.
> >
> > Drivers operation was tested on top of the net-next branch
> > with both DT and ACPI. Although for compatibility reasons
> > with older platforms, the mvmdio driver keeps using
> > of_ MDIO registering, new fwnode_ one proved to fully work
> > with DT as well (tested on MacchiatoBin board).
> >
> > mvpp2/mvmdio driver can work with the ACPI representation, as exposed
> > on a public branch:
> > https://github.com/MarvellEmbeddedProcessors/edk2-open-platform/commits/marvell-armada-wip
> > It was compiled together with the most recent Tianocore EDK2 revision.
> > Please refer to the firmware build instruction on MacchiatoBin board:
> > http://wiki.macchiatobin.net/tiki-index.php?page=Build+from+source+-+UEFI+EDK+II
> >
> > Above support configures 1G to use its PHY normally. 10G can work now
> > only with the link interrupt mode. Somehow reading of the
> > string property in fwnode_mdiobus_child_is_phy works only with
> > DT and cannot cope with 10G PHY nodes as in:
> > https://pastebin.com/3JnYpU0A
> >
> > Above root cause will be further checked. In the meantime I will
> > appreciate any comments or remarks for the kernel patches.
> >
> 
> Hi Marcin,
> 
> I have added linux-acpi and Graeme to cc. I think it makes sense to
> discuss the way you describe the device topology before looking at the
> patches in more detail.
> 
> In particular, I would like to request feedback on the use of
> [redundant] 'reg' properties and the use of _DSD + compatible to
> describe PHYs. Usually, we try to avoid this, given that it is
> essentially a ACPI encapsulated DT dialect that is difficult to
> support in drivers unless they are based on DT to begin with. Also,
> non-standard _DSD properties require a vendor prefix, it is not
> freeform.
> 
> For reference, the ACPI description is here (starting at line 175)
> https://github.com/MarvellEmbeddedProcessors/edk2-open-platform/blob/72d5ac23b20dd74d479daa5e40ba443264b31261/Platforms/Marvell/Armada/AcpiTables/Armada80x0McBin/Dsdt.asl
> 
So the representation of PHY's with _DSD was kind of formalised here

http://www.uefi.org/sites/default/files/resources/nic-request-v2.pdf

This is already in use in the kernel, and that DSDT seems to be along the same
lines. So seems ok in that manner.

The "reg", 0 property seems a little odd, it would probably make more
sense to check for the lack of ranges passed in in ACPI manner _CRS.

Graeme


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [RFT net-next v3 3/5] net: stmmac: dwmac-meson8b: fix internal RGMII clock configuration
From: Jerome Brunet @ 2018-01-03 11:06 UTC (permalink / raw)
  To: Martin Blumenstingl
  Cc: netdev, ingrassia, linus.luessing, khilman, linux-amlogic,
	Neil Armstrong, peppe.cavallaro, alexandre.torgue
In-Reply-To: <CAFBinCAXy+cgju3peA=YBHJReUu=vh8to4UVU5qebN67AZOgPg@mail.gmail.com>

On Sat, 2017-12-30 at 00:40 +0100, Martin Blumenstingl wrote:
> > Maybe this bit 10 is indeed a 5/10 divider, as amlogic claims it is. Maybe, as
> > Emiliano suggested, the output rate of div250 actually needs to be 250Mhz in
> > RGMII, before being divided by 10 to produce the 25MHz of div25
> > 
> > IOW, maybe we need this intermediate rate.
> 
> I am starting to believe that you (Emiliano and Jerome) are both right
> does anyone of you have access to a scope so we can measure the actual
> clock output?

I wanted to check this out on Gx but the 25M output is not any of the boards I
have (p200, OC2, S400). I should be able to look at the related SoC pad on the
p200 but, I don't know how to enable the GPIOCLK_1 Function 1 in the pinmux
configuration

> 
> > It would not be surprising, 1GBps usually requires a 125MHz clock somewhere.
> 
> this could mean that two clocks are derived from m250_div (names are
> not final obviously):
> - phy_ref_clk (25MHz or 50MHz)
> - rgmii_tx_clk (fixed divide by 2, 125MHz)

Probably yes.

What we consider in the code as div250 divider is actually described in snip of
doc we have as:
-----
bit 10 : Generate 25MHz clock for PHY
bit 9-7: RMII & RGMII mode:
- 001: clock source is 250MHz
- 010: clock source is 500MHz.
...
-----

1) It kind of shows that the minimum input frequency could be 250M indeed.
2) It is these unclear whether bit 10 is a gate or a divider ... ATM, I can't
check that myself
3) Looks like properly setting up div250 should also be done for RMII.

> 
> > This is still doable:
> > * Keep the divider
> > * drop CLK_SET_RATE_PARENT on div25
> > * call set_rate on div250 with 250MHz then on div25 with 25Mhz
> 
> yep, I will try this next
> this would also be work with the assumption that the rgmii_tx_clk is
> derived from m250_div

^ permalink raw reply

* Re: [net-next: PATCH 0/8] Armada 7k/8k PP2 ACPI support
From: Marcin Wojtas @ 2018-01-03 11:12 UTC (permalink / raw)
  To: Graeme Gregory
  Cc: Ard Biesheuvel, linux-acpi@vger.kernel.org,
	linux-kernel@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org,
	<netdev@vger.kernel.org>, David S. Miller,
	Russell King - ARM Linux, Rafael J. Wysocki, Andrew Lunn,
	Florian Fainelli, Antoine Ténart, Thomas Petazzoni,
	Gregory CLEMENT, Ezequiel Garcia, Nadav Haklai
In-Reply-To: <20180103110048.GA21230@xora-haswell>

Hi Graeme,

2018-01-03 12:00 GMT+01:00 Graeme Gregory <graeme.gregory@linaro.org>:
> On Mon, Dec 18, 2017 at 10:40:31AM +0100, Ard Biesheuvel wrote:
>> On 18 December 2017 at 10:17, Marcin Wojtas <mw@semihalf.com> wrote:
>> > Hi,
>> >
>> > This patchset introduces ACPI support in mvpp2 and mvmdio drivers.
>> > First three patches introduce fwnode helpers for obtaining PHY
>> > information from nodes and also MDIO fwnode API for registering
>> > the bus with its PHY/devices.
>> >
>> > Following patches update code of the mvmdio and mvpp2 drivers
>> > to support ACPI tables handling. The latter is done in 4 steps,
>> > as can be seen in the commits. For the details, please
>> > refer to the commit messages.
>> >
>> > Drivers operation was tested on top of the net-next branch
>> > with both DT and ACPI. Although for compatibility reasons
>> > with older platforms, the mvmdio driver keeps using
>> > of_ MDIO registering, new fwnode_ one proved to fully work
>> > with DT as well (tested on MacchiatoBin board).
>> >
>> > mvpp2/mvmdio driver can work with the ACPI representation, as exposed
>> > on a public branch:
>> > https://github.com/MarvellEmbeddedProcessors/edk2-open-platform/commits/marvell-armada-wip
>> > It was compiled together with the most recent Tianocore EDK2 revision.
>> > Please refer to the firmware build instruction on MacchiatoBin board:
>> > http://wiki.macchiatobin.net/tiki-index.php?page=Build+from+source+-+UEFI+EDK+II
>> >
>> > Above support configures 1G to use its PHY normally. 10G can work now
>> > only with the link interrupt mode. Somehow reading of the
>> > string property in fwnode_mdiobus_child_is_phy works only with
>> > DT and cannot cope with 10G PHY nodes as in:
>> > https://pastebin.com/3JnYpU0A
>> >
>> > Above root cause will be further checked. In the meantime I will
>> > appreciate any comments or remarks for the kernel patches.
>> >
>>
>> Hi Marcin,
>>
>> I have added linux-acpi and Graeme to cc. I think it makes sense to
>> discuss the way you describe the device topology before looking at the
>> patches in more detail.
>>
>> In particular, I would like to request feedback on the use of
>> [redundant] 'reg' properties and the use of _DSD + compatible to
>> describe PHYs. Usually, we try to avoid this, given that it is
>> essentially a ACPI encapsulated DT dialect that is difficult to
>> support in drivers unless they are based on DT to begin with. Also,
>> non-standard _DSD properties require a vendor prefix, it is not
>> freeform.
>>
>> For reference, the ACPI description is here (starting at line 175)
>> https://github.com/MarvellEmbeddedProcessors/edk2-open-platform/blob/72d5ac23b20dd74d479daa5e40ba443264b31261/Platforms/Marvell/Armada/AcpiTables/Armada80x0McBin/Dsdt.asl
>>
> So the representation of PHY's with _DSD was kind of formalised here
>
> http://www.uefi.org/sites/default/files/resources/nic-request-v2.pdf
>
> This is already in use in the kernel, and that DSDT seems to be along the same
> lines. So seems ok in that manner.
>
> The "reg", 0 property seems a little odd, it would probably make more
> sense to check for the lack of ranges passed in in ACPI manner _CRS.
>

I already agreed with 'reg' being awkward in the later emails.
Wouldn't _ADR be more appropriate to specify PHY address on MDIO bus?

Thanks,
Marcin

^ permalink raw reply

* Re: [net-next: PATCH v2 5/5] net: mvpp2: enable ACPI support in the driver
From: graeme.gregory @ 2018-01-03 11:16 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Marcin Wojtas, linux-kernel, linux-arm-kernel, netdev, linux-acpi,
	davem, linux, rafael.j.wysocki, f.fainelli, antoine.tenart,
	thomas.petazzoni, gregory.clement, ezequiel.garcia, nadavh, neta,
	ard.biesheuvel, jaz, tn
In-Reply-To: <20171231192354.GB20455@lunn.ch>

[-- Attachment #1: Type: text/plain, Size: 649 bytes --]

On Sun, Dec 31, 2017 at 08:23:54PM +0100, Andrew Lunn wrote:
> > * Modify way of obtaining interrupts - with ACPI they are resources
> >   bound to struct platform_device and it's not possible to obtain
> >   them directly from the child node. Hence a formula is used, depending
> >   on the port_id and number of possible CPUs.
> 
> Hi Marcin
> 
> I know nothing about ACPI. Is this limitation with respect to
> interrupts fundamental to ACPI, or just that nobody has implemented
> flexible interrupt support yet?
> 
The infrastructure is there to traverse trees of children, but I don't
think there any helper functions.

Graeme


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [net-next: PATCH v2 5/5] net: mvpp2: enable ACPI support in the driver
From: Marcin Wojtas @ 2018-01-03 11:19 UTC (permalink / raw)
  To: Graeme Gregory
  Cc: Andrew Lunn, linux-kernel, linux-arm-kernel, netdev, linux-acpi,
	David S. Miller, Russell King - ARM Linux, Rafael J. Wysocki,
	Florian Fainelli, Antoine Ténart, Thomas Petazzoni,
	Gregory Clément, Ezequiel Garcia, nadavh,
	Neta Zur Hershkovits, Ard Biesheuvel, Grzegorz Jaszczyk,
	Tomasz Nowicki
In-Reply-To: <20180103111611.GB21230@xora-haswell>

Graeme,

2018-01-03 12:16 GMT+01:00  <graeme.gregory@linaro.org>:
> On Sun, Dec 31, 2017 at 08:23:54PM +0100, Andrew Lunn wrote:
>> > * Modify way of obtaining interrupts - with ACPI they are resources
>> >   bound to struct platform_device and it's not possible to obtain
>> >   them directly from the child node. Hence a formula is used, depending
>> >   on the port_id and number of possible CPUs.
>>
>> Hi Marcin
>>
>> I know nothing about ACPI. Is this limitation with respect to
>> interrupts fundamental to ACPI, or just that nobody has implemented
>> flexible interrupt support yet?
>>
> The infrastructure is there to traverse trees of children, but I don't
> think there any helper functions.
>

Thanks, so if I implement such, do you expect any formal issues that
prevent its acceptance?

Best regards,
Marcin

^ permalink raw reply

* Re: [PATCH] ethernet: mlx4: Delete an error message for a failed memory allocation in five functions
From: Tariq Toukan @ 2018-01-03 11:24 UTC (permalink / raw)
  To: Julia Lawall, Tariq Toukan
  Cc: SF Markus Elfring, linux-rdma, netdev, LKML, kernel-janitors
In-Reply-To: <alpine.DEB.2.20.1801030904290.5305@hadrien>

On 03/01/2018 10:06 AM, Julia Lawall wrote:
> 
> 
> On Wed, 3 Jan 2018, Tariq Toukan wrote:
> 
>>
>>
>> On 01/01/2018 10:46 PM, SF Markus Elfring wrote:
>>> From: Markus Elfring <elfring@users.sourceforge.net>
>>> Date: Mon, 1 Jan 2018 21:42:27 +0100
>>>
>>> Omit an extra message for a memory allocation failure in these functions.
>>>
>>> This issue was detected by using the Coccinelle software.
>>>
>>> Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
>>> ---
>>
>> Is this an issue? Why? What is your motivation?
>> These are error messages, very informative, appear only upon errors, and in
>> control flow.
> 
> Strings take up space.  Since there is a backtrace on an out of memory
> problem, if the string does not provide any more information than the
> position of the call, then there is not much added value.  I don't know
> what was the string in this case.  If it provides some additional
> information, then it would be reasonable to keep it.

I don't really accept this claim...

Short informative strings worth the tiny space they consume. It helps 
the users of our driver understand what went wrong in simple words, 
without the need to understand the role of the functions/callstack or 
being familiar with different parts of the driver code.

In addition, some out-of-memory errors are recoverable, even though 
their backtrace is also printed. For example, in function 
mlx4_en_create_cq (appears in patch) we have a first allocation attempt 
(kzalloc_node) and a fallback (kzalloc). I'd prefer to state a clear 
error message only when both have failed, because otherwise the user 
might be confused whether the backtrace should indicate a malfunctioning 
interface, or not.

Tariq

> 
> julia
> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe kernel-janitors" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>

^ permalink raw reply

* Re: [net-next: PATCH v2 1/5] device property: Introduce fwnode_get_mac_address()
From: Rafael J. Wysocki @ 2018-01-03 11:27 UTC (permalink / raw)
  To: Marcin Wojtas
  Cc: linux-kernel, linux-arm-kernel, netdev, linux-acpi,
	graeme.gregory, davem, linux, rafael.j.wysocki, andrew,
	f.fainelli, antoine.tenart, thomas.petazzoni, gregory.clement,
	ezequiel.garcia, nadavh, neta, ard.biesheuvel, jaz, tn
In-Reply-To: <1514721520-18964-2-git-send-email-mw@semihalf.com>

On Sunday, December 31, 2017 12:58:36 PM CET Marcin Wojtas wrote:
> Until now there were two almost identical functions for
> obtaining MAC address - of_get_mac_address() and, more generic,
> device_get_mac_address(). However it is not uncommon,
> that the network interface is represented as a child
> of the actual controller, hence it is not associated
> directly to any struct device, required by the latter
> routine.
> 
> This commit allows for getting the MAC address for
> children nodes in the ACPI world by introducing a new function -
> fwnode_get_mac_address(). This commit also changes
> device_get_mac_address() routine to be its wrapper, in order
> to prevent unnecessary duplication.
> 
> Signed-off-by: Marcin Wojtas <mw@semihalf.com>

The changes look reasonable to me, so

Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

> ---
>  drivers/base/property.c  | 28 ++++++++++++++------
>  include/linux/property.h |  2 ++
>  2 files changed, 22 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/base/property.c b/drivers/base/property.c
> index 851b1b6..f261d1a 100644
> --- a/drivers/base/property.c
> +++ b/drivers/base/property.c
> @@ -1153,11 +1153,11 @@ int device_get_phy_mode(struct device *dev)
>  }
>  EXPORT_SYMBOL_GPL(device_get_phy_mode);
>  
> -static void *device_get_mac_addr(struct device *dev,
> +static void *fwnode_get_mac_addr(struct fwnode_handle *fwnode,
>  				 const char *name, char *addr,
>  				 int alen)
>  {
> -	int ret = device_property_read_u8_array(dev, name, addr, alen);
> +	int ret = fwnode_property_read_u8_array(fwnode, name, addr, alen);
>  
>  	if (ret == 0 && alen == ETH_ALEN && is_valid_ether_addr(addr))
>  		return addr;
> @@ -1165,8 +1165,8 @@ static void *device_get_mac_addr(struct device *dev,
>  }
>  
>  /**
> - * device_get_mac_address - Get the MAC for a given device
> - * @dev:	Pointer to the device
> + * fwnode_get_mac_address - Get the MAC from the firmware node
> + * @fwnode:	Pointer to the firmware node
>   * @addr:	Address of buffer to store the MAC in
>   * @alen:	Length of the buffer pointed to by addr, should be ETH_ALEN
>   *
> @@ -1187,19 +1187,31 @@ static void *device_get_mac_addr(struct device *dev,
>   * In this case, the real MAC is in 'local-mac-address', and 'mac-address'
>   * exists but is all zeros.
>  */
> -void *device_get_mac_address(struct device *dev, char *addr, int alen)
> +void *fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr, int alen)
>  {
>  	char *res;
>  
> -	res = device_get_mac_addr(dev, "mac-address", addr, alen);
> +	res = fwnode_get_mac_addr(fwnode, "mac-address", addr, alen);
>  	if (res)
>  		return res;
>  
> -	res = device_get_mac_addr(dev, "local-mac-address", addr, alen);
> +	res = fwnode_get_mac_addr(fwnode, "local-mac-address", addr, alen);
>  	if (res)
>  		return res;
>  
> -	return device_get_mac_addr(dev, "address", addr, alen);
> +	return fwnode_get_mac_addr(fwnode, "address", addr, alen);
> +}
> +EXPORT_SYMBOL(fwnode_get_mac_address);
> +
> +/**
> + * device_get_mac_address - Get the MAC for a given device
> + * @dev:	Pointer to the device
> + * @addr:	Address of buffer to store the MAC in
> + * @alen:	Length of the buffer pointed to by addr, should be ETH_ALEN
> + */
> +void *device_get_mac_address(struct device *dev, char *addr, int alen)
> +{
> +	return fwnode_get_mac_address(dev_fwnode(dev), addr, alen);
>  }
>  EXPORT_SYMBOL(device_get_mac_address);
>  
> diff --git a/include/linux/property.h b/include/linux/property.h
> index f6189a3..35620e0 100644
> --- a/include/linux/property.h
> +++ b/include/linux/property.h
> @@ -279,6 +279,8 @@ int device_get_phy_mode(struct device *dev);
>  
>  void *device_get_mac_address(struct device *dev, char *addr, int alen);
>  
> +void *fwnode_get_mac_address(struct fwnode_handle *fwnode,
> +			     char *addr, int alen);
>  struct fwnode_handle *fwnode_graph_get_next_endpoint(
>  	const struct fwnode_handle *fwnode, struct fwnode_handle *prev);
>  struct fwnode_handle *
> 

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox