DPDK-dev Archive on lore.kernel.org

DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2 5/6] net/virtio: fix multiple process support
From: Yuanhan Liu @ 2016-12-28 11:02 UTC (permalink / raw)
  To: dev; +Cc: Yuanhan Liu, stable, Juho Snellman, Yaron Illouz
In-Reply-To: <1482922962-21036-1-git-send-email-yuanhan.liu@linux.intel.com>

The introduce of virtio 1.0 support brings yet another set of ops, badly,
it's not handled correctly, that it breaks the multiple process support.

The issue is the data/function pointer may vary from different processes,
and the old used to do one time set (for primary process only). That
said, the function pointer the secondary process saw is actually from the
primary process space. Accessing it could likely result to a crash.

Kudos to the last patches, we now be able to maintain those info that may
vary among different process locally, meaning every process could have its
own copy for each of them, with the correct value set. And this is what
this patch does:

- remap the PCI (IO port for legacy device and memory map for modern
  device)

- set vtpci_ops correctly

After that, multiple process would work like a charm. (At least, it
passed my fuzzy test)

Fixes: b8f04520ad71 ("virtio: use PCI ioport API")
Fixes: d5bbeefca826 ("virtio: introduce PCI implementation structure")
Fixes: 6ba1f63b5ab0 ("virtio: support specification 1.0")

Cc: stable@kernel.org
Cc: Juho Snellman <jsnell@iki.fi>
Cc: Yaron Illouz <yaroni@radcom.com>
Reported-by: Juho Snellman <jsnell@iki.fi>
Reported-by: Yaron Illouz <yaroni@radcom.com>
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
 drivers/net/virtio/virtio_ethdev.c      | 53 +++++++++++++++++++++++++++++++--
 drivers/net/virtio/virtio_pci.c         |  4 +--
 drivers/net/virtio/virtio_pci.h         |  4 +++
 drivers/net/virtio/virtio_user_ethdev.c |  2 +-
 4 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 5567aa2..19d4348 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -1289,6 +1289,49 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
 }
 
 /*
+ * Remap the PCI device again (IO port map for legacy device and
+ * memory map for modern device), so that the secondary process
+ * could have the PCI initiated correctly.
+ */
+static int
+virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
+{
+	if (hw->modern) {
+		/*
+		 * We don't have to re-parse the PCI config space, since
+		 * rte_eal_pci_map_device() makes sure the mapped address
+		 * in secondary process would equal to the one mapped in
+		 * the primary process: error will be returned if that
+		 * requirement is not met.
+		 *
+		 * That said, we could simply reuse all cap pointers
+		 * (such as dev_cfg, common_cfg, etc.) parsed from the
+		 * primary process, which is stored in shared memory.
+		 */
+		if (rte_eal_pci_map_device(pci_dev)) {
+			PMD_INIT_LOG(DEBUG, "failed to map pci device!");
+			return -1;
+		}
+	} else {
+		if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
+			return -1;
+	}
+
+	return 0;
+}
+
+static void
+virtio_set_vtpci_ops(struct rte_pci_device *pci_dev, struct virtio_hw *hw)
+{
+	if (pci_dev == NULL)
+		VTPCI_OPS(hw) = &virtio_user_ops;
+	else if (hw->modern)
+		VTPCI_OPS(hw) = &modern_ops;
+	else
+		VTPCI_OPS(hw) = &legacy_ops;
+}
+
+/*
  * This function is based on probe() function in virtio_pci.c
  * It returns 0 on success.
  */
@@ -1296,7 +1339,7 @@ int
 eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 {
 	struct virtio_hw *hw = eth_dev->data->dev_private;
-	struct rte_pci_device *pci_dev;
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
 	uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE;
 	int ret;
 
@@ -1306,6 +1349,13 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
 
 	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		if (pci_dev) {
+			ret = virtio_remap_pci(pci_dev, hw);
+			if (ret)
+				return ret;
+		}
+
+		virtio_set_vtpci_ops(pci_dev, hw);
 		if (hw->use_simple_rxtx) {
 			eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple;
 			eth_dev->rx_pkt_burst = virtio_recv_pkts_vec;
@@ -1325,7 +1375,6 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	}
 
 	hw->port_id = eth_dev->data->port_id;
-	pci_dev = eth_dev->pci_dev;
 
 	if (pci_dev) {
 		ret = vtpci_init(pci_dev, hw, &dev_flags);
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index d1e9c05..f5754e5 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -303,7 +303,7 @@ legacy_virtio_resource_init(struct rte_pci_device *pci_dev,
 	return 0;
 }
 
-static const struct virtio_pci_ops legacy_ops = {
+const struct virtio_pci_ops legacy_ops = {
 	.read_dev_cfg	= legacy_read_dev_config,
 	.write_dev_cfg	= legacy_write_dev_config,
 	.reset		= legacy_reset,
@@ -519,7 +519,7 @@ modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
 	io_write16(1, vq->notify_addr);
 }
 
-static const struct virtio_pci_ops modern_ops = {
+const struct virtio_pci_ops modern_ops = {
 	.read_dev_cfg	= modern_read_dev_config,
 	.write_dev_cfg	= modern_write_dev_config,
 	.reset		= modern_reset,
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 6b9aecf..511a1c8 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -333,4 +333,8 @@ uint8_t vtpci_isr(struct virtio_hw *);
 
 uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t);
 
+extern const struct virtio_pci_ops legacy_ops;
+extern const struct virtio_pci_ops modern_ops;
+extern const struct virtio_pci_ops virtio_user_ops;
+
 #endif /* _VIRTIO_PCI_H_ */
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 7d2a9d9..3563952 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -212,7 +212,7 @@ virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
 			    strerror(errno));
 }
 
-static const struct virtio_pci_ops virtio_user_ops = {
+const struct virtio_pci_ops virtio_user_ops = {
 	.read_dev_cfg	= virtio_user_read_dev_config,
 	.write_dev_cfg	= virtio_user_write_dev_config,
 	.reset		= virtio_user_reset,
-- 
2.8.1

^ permalink raw reply related

* [PATCH v2 6/6] net/virtio: remove dead structure field
From: Yuanhan Liu @ 2016-12-28 11:02 UTC (permalink / raw)
  To: dev; +Cc: Yuanhan Liu
In-Reply-To: <1482922962-21036-1-git-send-email-yuanhan.liu@linux.intel.com>

Actually, virtio_hw->dev is not used since the beginning when it's
introduced. Remove it.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
 drivers/net/virtio/virtio_pci.c | 2 --
 drivers/net/virtio/virtio_pci.h | 1 -
 2 files changed, 3 deletions(-)

diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index f5754e5..fbdb5b7 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -730,8 +730,6 @@ int
 vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
 	   uint32_t *dev_flags)
 {
-	hw->dev = dev;
-
 	/*
 	 * Try if we can succeed reading virtio pci caps, which exists
 	 * only on modern pci device. If failed, we fallback to legacy
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 511a1c8..4235bef 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -258,7 +258,6 @@ struct virtio_hw {
 	uint32_t    notify_off_multiplier;
 	uint8_t     *isr;
 	uint16_t    *notify_base;
-	struct rte_pci_device *dev;
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
 	void	    *virtio_user_dev;
-- 
2.8.1

^ permalink raw reply related

* [PATCH v2 4/6] net/virtio: store IO port info locally
From: Yuanhan Liu @ 2016-12-28 11:02 UTC (permalink / raw)
  To: dev; +Cc: Yuanhan Liu
In-Reply-To: <1482922962-21036-1-git-send-email-yuanhan.liu@linux.intel.com>

Like vtpci_ops, the rte_pci_ioport has to store in local memory. This
is basically for the rte_pci_device field is allocated from process
local memory, but not from shared memory.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
 drivers/net/virtio/virtio_pci.c | 49 ++++++++++++++++++++++-------------------
 drivers/net/virtio/virtio_pci.h |  3 ++-
 2 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index b1f2e18..d1e9c05 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -92,17 +92,17 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
 	while (length > 0) {
 		if (length >= 4) {
 			size = 4;
-			rte_eal_pci_ioport_read(&hw->io, dst, size,
+			rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
 				VIRTIO_PCI_CONFIG(hw) + offset);
 			*(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst);
 		} else if (length >= 2) {
 			size = 2;
-			rte_eal_pci_ioport_read(&hw->io, dst, size,
+			rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
 				VIRTIO_PCI_CONFIG(hw) + offset);
 			*(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst);
 		} else {
 			size = 1;
-			rte_eal_pci_ioport_read(&hw->io, dst, size,
+			rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size,
 				VIRTIO_PCI_CONFIG(hw) + offset);
 		}
 
@@ -111,7 +111,7 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset,
 		length -= size;
 	}
 #else
-	rte_eal_pci_ioport_read(&hw->io, dst, length,
+	rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, length,
 				VIRTIO_PCI_CONFIG(hw) + offset);
 #endif
 }
@@ -131,16 +131,16 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
 		if (length >= 4) {
 			size = 4;
 			tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src);
-			rte_eal_pci_ioport_write(&hw->io, &tmp.u32, size,
+			rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size,
 				VIRTIO_PCI_CONFIG(hw) + offset);
 		} else if (length >= 2) {
 			size = 2;
 			tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src);
-			rte_eal_pci_ioport_write(&hw->io, &tmp.u16, size,
+			rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size,
 				VIRTIO_PCI_CONFIG(hw) + offset);
 		} else {
 			size = 1;
-			rte_eal_pci_ioport_write(&hw->io, src, size,
+			rte_eal_pci_ioport_write(VTPCI_IO(hw), src, size,
 				VIRTIO_PCI_CONFIG(hw) + offset);
 		}
 
@@ -149,7 +149,7 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset,
 		length -= size;
 	}
 #else
-	rte_eal_pci_ioport_write(&hw->io, src, length,
+	rte_eal_pci_ioport_write(VTPCI_IO(hw), src, length,
 				 VIRTIO_PCI_CONFIG(hw) + offset);
 #endif
 }
@@ -159,7 +159,7 @@ legacy_get_features(struct virtio_hw *hw)
 {
 	uint32_t dst;
 
-	rte_eal_pci_ioport_read(&hw->io, &dst, 4, VIRTIO_PCI_HOST_FEATURES);
+	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 4, VIRTIO_PCI_HOST_FEATURES);
 	return dst;
 }
 
@@ -171,7 +171,7 @@ legacy_set_features(struct virtio_hw *hw, uint64_t features)
 			"only 32 bit features are allowed for legacy virtio!");
 		return;
 	}
-	rte_eal_pci_ioport_write(&hw->io, &features, 4,
+	rte_eal_pci_ioport_write(VTPCI_IO(hw), &features, 4,
 				 VIRTIO_PCI_GUEST_FEATURES);
 }
 
@@ -180,14 +180,14 @@ legacy_get_status(struct virtio_hw *hw)
 {
 	uint8_t dst;
 
-	rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_STATUS);
+	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS);
 	return dst;
 }
 
 static void
 legacy_set_status(struct virtio_hw *hw, uint8_t status)
 {
-	rte_eal_pci_ioport_write(&hw->io, &status, 1, VIRTIO_PCI_STATUS);
+	rte_eal_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS);
 }
 
 static void
@@ -201,7 +201,7 @@ legacy_get_isr(struct virtio_hw *hw)
 {
 	uint8_t dst;
 
-	rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_ISR);
+	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR);
 	return dst;
 }
 
@@ -211,8 +211,10 @@ legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec)
 {
 	uint16_t dst;
 
-	rte_eal_pci_ioport_write(&hw->io, &vec, 2, VIRTIO_MSI_CONFIG_VECTOR);
-	rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_MSI_CONFIG_VECTOR);
+	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2,
+				 VIRTIO_MSI_CONFIG_VECTOR);
+	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2,
+				VIRTIO_MSI_CONFIG_VECTOR);
 	return dst;
 }
 
@@ -221,8 +223,9 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
 {
 	uint16_t dst;
 
-	rte_eal_pci_ioport_write(&hw->io, &queue_id, 2, VIRTIO_PCI_QUEUE_SEL);
-	rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_PCI_QUEUE_NUM);
+	rte_eal_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2,
+				 VIRTIO_PCI_QUEUE_SEL);
+	rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM);
 	return dst;
 }
 
@@ -234,10 +237,10 @@ legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
 	if (!check_vq_phys_addr_ok(vq))
 		return -1;
 
-	rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2,
+	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
 			 VIRTIO_PCI_QUEUE_SEL);
 	src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
-	rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN);
+	rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
 
 	return 0;
 }
@@ -247,15 +250,15 @@ legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
 	uint32_t src = 0;
 
-	rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2,
+	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
 			 VIRTIO_PCI_QUEUE_SEL);
-	rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN);
+	rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN);
 }
 
 static void
 legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq)
 {
-	rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2,
+	rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2,
 			 VIRTIO_PCI_QUEUE_NOTIFY);
 }
 
@@ -289,7 +292,7 @@ static int
 legacy_virtio_resource_init(struct rte_pci_device *pci_dev,
 			    struct virtio_hw *hw, uint32_t *dev_flags)
 {
-	if (rte_eal_pci_ioport_map(pci_dev, 0, &hw->io) < 0)
+	if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0)
 		return -1;
 
 	if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN)
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 268bb82..6b9aecf 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -245,7 +245,6 @@ struct virtio_net_config;
 
 struct virtio_hw {
 	struct virtnet_ctl *cvq;
-	struct rte_pci_ioport io;
 	uint64_t    req_guest_features;
 	uint64_t    guest_features;
 	uint32_t    max_queue_pairs;
@@ -275,9 +274,11 @@ struct virtio_hw {
  */
 struct virtio_hw_internal {
 	const struct virtio_pci_ops *vtpci_ops;
+	struct rte_pci_ioport io;
 };
 
 #define VTPCI_OPS(hw)	(virtio_hw_internal[(hw)->port_id].vtpci_ops)
+#define VTPCI_IO(hw)	(&virtio_hw_internal[(hw)->port_id].io)
 
 extern struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
 
-- 
2.8.1

^ permalink raw reply related

* [PATCH v2 3/6] net/virtio: store PCI operators pointer locally
From: Yuanhan Liu @ 2016-12-28 11:02 UTC (permalink / raw)
  To: dev; +Cc: Yuanhan Liu
In-Reply-To: <1482922962-21036-1-git-send-email-yuanhan.liu@linux.intel.com>

We used to store the vtpci_ops at virtio_hw structure. The struct,
however, is stored in shared memory. That means only one value is
allowed. For the multiple process model, however, the address of
vtpci_ops should be different among different processes.

Take virtio PMD as example, the vtpci_ops is set by the primary
process, based on its own process space. If we access that address
from the secondary process, that would be an illegal memory access,
A crash then might happen.

To make the multiple process model work, we need store the vtpci_ops
in local memory but not in a shared memory. This is what the patch
does: a local virtio_hw_internal array of size RTE_MAX_ETHPORTS is
allocated. This new structure is used to store all these kind of
info in a non-shared memory. Current, we have:

- vtpci_ops

- rte_pci_ioport

- virtio pci mapped memory, such as common_cfg.

The later two will be done in coming patches. Later patches would also
set them correctly for secondary process, so that the multiple process
model could work.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
 drivers/net/virtio/virtio_ethdev.c      |  9 ++++++---
 drivers/net/virtio/virtio_pci.c         | 26 +++++++++++++-------------
 drivers/net/virtio/virtio_pci.h         | 17 ++++++++++++++++-
 drivers/net/virtio/virtio_user_ethdev.c |  3 ++-
 drivers/net/virtio/virtqueue.h          |  2 +-
 5 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index ef37ad1..5567aa2 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -152,6 +152,8 @@ static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = {
 #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \
 			    sizeof(rte_virtio_txq_stat_strings[0]))
 
+struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
+
 static int
 virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl,
 		int *dlen, int pkt_num)
@@ -360,7 +362,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
 	 * Read the virtqueue size from the Queue Size field
 	 * Always power of 2 and if 0 virtqueue does not exist
 	 */
-	vq_size = hw->vtpci_ops->get_queue_num(hw, vtpci_queue_idx);
+	vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx);
 	PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size);
 	if (vq_size == 0) {
 		PMD_INIT_LOG(ERR, "virtqueue does not exist");
@@ -519,7 +521,7 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
 		}
 	}
 
-	if (hw->vtpci_ops->setup_queue(hw, vq) < 0) {
+	if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) {
 		PMD_INIT_LOG(ERR, "setup_queue failed");
 		return -EINVAL;
 	}
@@ -1114,7 +1116,7 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features)
 		req_features);
 
 	/* Read device(host) feature bits */
-	host_features = hw->vtpci_ops->get_features(hw);
+	host_features = VTPCI_OPS(hw)->get_features(hw);
 	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64,
 		host_features);
 
@@ -1322,6 +1324,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 		return -ENOMEM;
 	}
 
+	hw->port_id = eth_dev->data->port_id;
 	pci_dev = eth_dev->pci_dev;
 
 	if (pci_dev) {
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 9b47165..b1f2e18 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -537,14 +537,14 @@ void
 vtpci_read_dev_config(struct virtio_hw *hw, size_t offset,
 		      void *dst, int length)
 {
-	hw->vtpci_ops->read_dev_cfg(hw, offset, dst, length);
+	VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
 }
 
 void
 vtpci_write_dev_config(struct virtio_hw *hw, size_t offset,
 		       const void *src, int length)
 {
-	hw->vtpci_ops->write_dev_cfg(hw, offset, src, length);
+	VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length);
 }
 
 uint64_t
@@ -557,7 +557,7 @@ vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features)
 	 * host all support.
 	 */
 	features = host_features & hw->guest_features;
-	hw->vtpci_ops->set_features(hw, features);
+	VTPCI_OPS(hw)->set_features(hw, features);
 
 	return features;
 }
@@ -565,9 +565,9 @@ vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features)
 void
 vtpci_reset(struct virtio_hw *hw)
 {
-	hw->vtpci_ops->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+	VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
 	/* flush status write */
-	hw->vtpci_ops->get_status(hw);
+	VTPCI_OPS(hw)->get_status(hw);
 }
 
 void
@@ -580,21 +580,21 @@ void
 vtpci_set_status(struct virtio_hw *hw, uint8_t status)
 {
 	if (status != VIRTIO_CONFIG_STATUS_RESET)
-		status |= hw->vtpci_ops->get_status(hw);
+		status |= VTPCI_OPS(hw)->get_status(hw);
 
-	hw->vtpci_ops->set_status(hw, status);
+	VTPCI_OPS(hw)->set_status(hw, status);
 }
 
 uint8_t
 vtpci_get_status(struct virtio_hw *hw)
 {
-	return hw->vtpci_ops->get_status(hw);
+	return VTPCI_OPS(hw)->get_status(hw);
 }
 
 uint8_t
 vtpci_isr(struct virtio_hw *hw)
 {
-	return hw->vtpci_ops->get_isr(hw);
+	return VTPCI_OPS(hw)->get_isr(hw);
 }
 
 
@@ -602,7 +602,7 @@ vtpci_isr(struct virtio_hw *hw)
 uint16_t
 vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
 {
-	return hw->vtpci_ops->set_config_irq(hw, vec);
+	return VTPCI_OPS(hw)->set_config_irq(hw, vec);
 }
 
 static void *
@@ -736,8 +736,8 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
 	 */
 	if (virtio_read_caps(dev, hw) == 0) {
 		PMD_INIT_LOG(INFO, "modern virtio pci detected.");
-		hw->vtpci_ops = &modern_ops;
-		hw->modern    = 1;
+		virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops;
+		hw->modern = 1;
 		*dev_flags |= RTE_ETH_DEV_INTR_LSC;
 		return 0;
 	}
@@ -755,7 +755,7 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw,
 		return -1;
 	}
 
-	hw->vtpci_ops = &legacy_ops;
+	virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops;
 	hw->use_msix = legacy_virtio_has_msix(&dev->addr);
 	hw->modern   = 0;
 
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index de271bf..268bb82 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -254,6 +254,7 @@ struct virtio_hw {
 	uint8_t	    use_msix;
 	uint8_t     modern;
 	uint8_t     use_simple_rxtx;
+	uint8_t     port_id;
 	uint8_t     mac_addr[ETHER_ADDR_LEN];
 	uint32_t    notify_off_multiplier;
 	uint8_t     *isr;
@@ -261,12 +262,26 @@ struct virtio_hw {
 	struct rte_pci_device *dev;
 	struct virtio_pci_common_cfg *common_cfg;
 	struct virtio_net_config *dev_cfg;
-	const struct virtio_pci_ops *vtpci_ops;
 	void	    *virtio_user_dev;
 
 	struct virtqueue **vqs;
 };
 
+
+/*
+ * While virtio_hw is stored in shared memory, this structure stores
+ * some infos that may vary in the multiple process model locally.
+ * For example, the vtpci_ops pointer.
+ */
+struct virtio_hw_internal {
+	const struct virtio_pci_ops *vtpci_ops;
+};
+
+#define VTPCI_OPS(hw)	(virtio_hw_internal[(hw)->port_id].vtpci_ops)
+
+extern struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS];
+
+
 /*
  * This structure is just a reference to read
  * net device specific config space; it just a chodu structure
diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
index 406beea..7d2a9d9 100644
--- a/drivers/net/virtio/virtio_user_ethdev.c
+++ b/drivers/net/virtio/virtio_user_ethdev.c
@@ -301,7 +301,8 @@ virtio_user_eth_dev_alloc(const char *name)
 		return NULL;
 	}
 
-	hw->vtpci_ops = &virtio_user_ops;
+	hw->port_id = data->port_id;
+	virtio_hw_internal[hw->port_id].vtpci_ops = &virtio_user_ops;
 	hw->use_msix = 0;
 	hw->modern   = 0;
 	hw->use_simple_rxtx = 0;
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index f0bb089..b1070e0 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -330,7 +330,7 @@ virtqueue_notify(struct virtqueue *vq)
 	 * For virtio on IA, the notificaiton is through io port operation
 	 * which is a serialization instruction itself.
 	 */
-	vq->hw->vtpci_ops->notify_queue(vq->hw, vq);
+	VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq);
 }
 
 #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP
-- 
2.8.1

^ permalink raw reply related

* [PATCH v2 0/6] net/virtio: fix several multiple process issues
From: Yuanhan Liu @ 2016-12-28 11:02 UTC (permalink / raw)
  To: dev; +Cc: Yuanhan Liu
In-Reply-To: <1482391123-8149-1-git-send-email-yuanhan.liu@linux.intel.com>

This patch series fixes few crash issues regarding to multiple process
model. In my limited fuzzy test, now it works for both virtio 0.95 and
1.0, as well as for the case some virtio-net devices are managed by
kernel device while some others are managed by DPDK.

---
Maintaining the multiple process support is not an easy task -- you
have to be very mindful while coding -- what kind of stuff should
and should not be in shared memory. Otherwise, it's very likely the
multiple process model will be broken.

A typical example is the ops pointer, a pointer to a set of function
pointers.  Normally, it's a pointer stored in a read-only data section
of the application:

    static const struct virtio_pci_ops legacy_ops = {
            ...,
    }

The pointer, of course, may vary in different process space. If,
however, we store the pointer into shared memory, we could only
have one value for it.  Setting it from process A and accessing
it from process B would likely lead to an illegal memory access.
As a result, crash happens.

The fix is to keep those addresses locally, in a new struct,
virtio_hw_internal. By that, each process maintains it's own
version of the pointer (from its own process space). Thus,
everything would work as expected.

---
Yuanhan Liu (6):
  ethdev: fix port data mismatched in multiple process model
  net/virtio: fix wrong Rx/Tx method for secondary process
  net/virtio: store PCI operators pointer locally
  net/virtio: store IO port info locally
  net/virtio: fix multiple process support
  net/virtio: remove dead structure field

 drivers/net/virtio/virtio_ethdev.c      | 69 +++++++++++++++++++++++++---
 drivers/net/virtio/virtio_pci.c         | 81 +++++++++++++++++----------------
 drivers/net/virtio/virtio_pci.h         | 25 ++++++++--
 drivers/net/virtio/virtio_user_ethdev.c |  5 +-
 drivers/net/virtio/virtqueue.h          |  2 +-
 lib/librte_ether/rte_ethdev.c           | 58 ++++++++++++++++++++---
 6 files changed, 182 insertions(+), 58 deletions(-)

-- 
2.8.1

^ permalink raw reply

* [PATCH v4 6/6] net/mlx5: extend IPv4 flow item
From: Nelio Laranjeiro @ 2016-12-28 10:37 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil
In-Reply-To: <cover.1482920437.git.nelio.laranjeiro@6wind.com>

This commits adds:
- Type of service
- Next protocol ID

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 01f7a77..77021b5 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -172,11 +172,13 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 			.hdr = {
 				.src_addr = -1,
 				.dst_addr = -1,
+				.type_of_service = -1,
+				.next_proto_id = -1,
 			},
 		},
 		.mask_sz = sizeof(struct rte_flow_item_ipv4),
 		.convert = mlx5_flow_create_ipv4,
-		.dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4),
+		.dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
 	},
 	[RTE_FLOW_ITEM_TYPE_IPV6] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
@@ -574,29 +576,35 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item, void *data)
 	const struct rte_flow_item_ipv4 *spec = item->spec;
 	const struct rte_flow_item_ipv4 *mask = item->mask;
 	struct mlx5_flow *flow = (struct mlx5_flow *)data;
-	struct ibv_exp_flow_spec_ipv4 *ipv4;
-	unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4);
+	struct ibv_exp_flow_spec_ipv4_ext *ipv4;
+	unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
 
 	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
-	*ipv4 = (struct ibv_exp_flow_spec_ipv4) {
-		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4,
+	*ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
+		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
 		.size = ipv4_size,
 	};
 	if (spec) {
-		ipv4->val = (struct ibv_exp_flow_ipv4_filter){
+		ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
 			.src_ip = spec->hdr.src_addr,
 			.dst_ip = spec->hdr.dst_addr,
+			.proto = spec->hdr.next_proto_id,
+			.tos = spec->hdr.type_of_service,
 		};
 	}
 	if (mask) {
-		ipv4->mask = (struct ibv_exp_flow_ipv4_filter){
+		ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
 			.src_ip = mask->hdr.src_addr,
 			.dst_ip = mask->hdr.dst_addr,
+			.proto = mask->hdr.next_proto_id,
+			.tos = mask->hdr.type_of_service,
 		};
 	}
 	/* Remove unwanted bits from values. */
 	ipv4->val.src_ip &= ipv4->mask.src_ip;
 	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
+	ipv4->val.proto &= ipv4->mask.proto;
+	ipv4->val.tos &= ipv4->mask.tos;
 	++flow->ibv_attr->num_of_specs;
 	flow->ibv_attr->priority = 1;
 	return 0;
-- 
2.1.4

^ permalink raw reply related

* [PATCH v4 5/6] net/mlx5: support mark flow action
From: Nelio Laranjeiro @ 2016-12-28 10:37 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil
In-Reply-To: <cover.1482920437.git.nelio.laranjeiro@6wind.com>

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 78 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_prm.h  | 70 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_rxtx.c | 12 ++++++-
 drivers/net/mlx5/mlx5_rxtx.h |  3 +-
 4 files changed, 160 insertions(+), 3 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 1ec0ef5..01f7a77 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -50,6 +50,7 @@
 #include <rte_malloc.h>
 
 #include "mlx5.h"
+#include "mlx5_prm.h"
 
 static int
 mlx5_flow_create_eth(const struct rte_flow_item *item, void *data);
@@ -81,6 +82,7 @@ struct rte_flow {
 	struct ibv_exp_wq *wq; /**< Verbs work queue. */
 	struct ibv_cq *cq; /**< Verbs completion queue. */
 	struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
+	uint32_t mark:1; /**< Set if the flow is marked. */
 };
 
 /** Static initializer for items. */
@@ -119,6 +121,7 @@ struct mlx5_flow_items {
 static const enum rte_flow_action_type valid_actions[] = {
 	RTE_FLOW_ACTION_TYPE_DROP,
 	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_MARK,
 	RTE_FLOW_ACTION_TYPE_END,
 };
 
@@ -246,7 +249,9 @@ struct mlx5_flow {
 struct mlx5_flow_action {
 	uint32_t queue:1; /**< Target is a receive queue. */
 	uint32_t drop:1; /**< Target is a drop queue. */
+	uint32_t mark:1; /**< Mark is present in the flow. */
 	uint32_t queue_id; /**< Identifier of the queue. */
+	uint32_t mark_id; /**< Mark identifier. */
 };
 
 /**
@@ -341,6 +346,7 @@ priv_flow_validate(struct priv *priv,
 	struct mlx5_flow_action action = {
 		.queue = 0,
 		.drop = 0,
+		.mark = 0,
 	};
 
 	(void)priv;
@@ -427,10 +433,26 @@ priv_flow_validate(struct priv *priv,
 			if (!queue || (queue->index > (priv->rxqs_n - 1)))
 				goto exit_action_not_supported;
 			action.queue = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
+			const struct rte_flow_action_mark *mark =
+				(const struct rte_flow_action_mark *)
+				actions->conf;
+
+			if (mark && (mark->id >= MLX5_FLOW_MARK_MAX)) {
+				rte_flow_error_set(error, ENOTSUP,
+						   RTE_FLOW_ERROR_TYPE_ACTION,
+						   actions,
+						   "mark must be between 0"
+						   " and 16777199");
+				return -rte_errno;
+			}
+			action.mark = 1;
 		} else {
 			goto exit_action_not_supported;
 		}
 	}
+	if (action.mark && !flow->ibv_attr)
+		flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
 	if (!action.queue && !action.drop) {
 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "no valid action");
@@ -745,6 +767,30 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item, void *data)
 }
 
 /**
+ * Convert mark/flag action to Verbs specification.
+ *
+ * @param flow
+ *   Pointer to MLX5 flow structure.
+ * @param mark_id
+ *   Mark identifier.
+ */
+static int
+mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
+{
+	struct ibv_exp_flow_spec_action_tag *tag;
+	unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
+
+	tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*tag = (struct ibv_exp_flow_spec_action_tag){
+		.type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
+		.size = size,
+		.tag_id = mlx5_flow_mark_set(mark_id),
+	};
+	++flow->ibv_attr->num_of_specs;
+	return 0;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -800,8 +846,10 @@ priv_flow_create_action_queue(struct priv *priv,
 		rxq = container_of((*priv->rxqs)[action->queue_id],
 				   struct rxq_ctrl, rxq);
 		rte_flow->rxq = &rxq->rxq;
+		rxq->rxq.mark |= action->mark;
 		rte_flow->wq = rxq->wq;
 	}
+	rte_flow->mark = action->mark;
 	rte_flow->ibv_attr = ibv_attr;
 	rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
 		priv->ctx,
@@ -917,6 +965,8 @@ priv_flow_create(struct priv *priv,
 	action = (struct mlx5_flow_action){
 		.queue = 0,
 		.drop = 0,
+		.mark = 0,
+		.mark_id = MLX5_FLOW_MARK_DEFAULT,
 	};
 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
 		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
@@ -928,6 +978,14 @@ priv_flow_create(struct priv *priv,
 				 actions->conf)->index;
 		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
+			const struct rte_flow_action_mark *mark =
+				(const struct rte_flow_action_mark *)
+				actions->conf;
+
+			if (mark)
+				action.mark_id = mark->id;
+			action.mark = 1;
 		} else {
 			rte_flow_error_set(error, ENOTSUP,
 					   RTE_FLOW_ERROR_TYPE_ACTION,
@@ -935,6 +993,10 @@ priv_flow_create(struct priv *priv,
 			goto exit;
 		}
 	}
+	if (action.mark) {
+		mlx5_flow_create_flag_mark(&flow, action.mark_id);
+		flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
+	}
 	rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
 						 &action, error);
 	return rte_flow;
@@ -993,6 +1055,18 @@ priv_flow_destroy(struct priv *priv,
 		claim_zero(ibv_exp_destroy_wq(flow->wq));
 	if (!flow->rxq && flow->cq)
 		claim_zero(ibv_destroy_cq(flow->cq));
+	if (flow->mark) {
+		struct rte_flow *tmp;
+		uint32_t mark_n = 0;
+
+		for (tmp = LIST_FIRST(&priv->flows);
+		     tmp;
+		     tmp = LIST_NEXT(tmp, next)) {
+			if ((flow->rxq == tmp->rxq) && tmp->mark)
+				++mark_n;
+		}
+		flow->rxq->mark = !!mark_n;
+	}
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
 	rte_free(flow);
@@ -1072,6 +1146,8 @@ priv_flow_stop(struct priv *priv)
 	     flow = LIST_NEXT(flow, next)) {
 		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
 		flow->ibv_flow = NULL;
+		if (flow->mark)
+			flow->rxq->mark = 0;
 		DEBUG("Flow %p removed", (void *)flow);
 	}
 }
@@ -1101,6 +1177,8 @@ priv_flow_start(struct priv *priv)
 			return rte_errno;
 		}
 		DEBUG("Flow %p applied", (void *)flow);
+		if (flow->rxq)
+			flow->rxq->mark |= flow->mark;
 	}
 	return 0;
 }
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 9cd9fdf..d9bb332 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -34,6 +34,8 @@
 #ifndef RTE_PMD_MLX5_PRM_H_
 #define RTE_PMD_MLX5_PRM_H_
 
+#include <assert.h>
+
 /* Verbs header. */
 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
 #ifdef PEDANTIC
@@ -106,6 +108,15 @@
 /* Outer UDP header and checksum OK. */
 #define MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK (1u << 6)
 
+/* INVALID is used by packets matching no flow rules. */
+#define MLX5_FLOW_MARK_INVALID 0
+
+/* Maximum allowed value to mark a packet. */
+#define MLX5_FLOW_MARK_MAX 0xfffff0
+
+/* Default mark value used when none is provided. */
+#define MLX5_FLOW_MARK_DEFAULT 0xffffff
+
 /* Subset of struct mlx5_wqe_eth_seg. */
 struct mlx5_wqe_eth_seg_small {
 	uint32_t rsvd0;
@@ -183,10 +194,67 @@ struct mlx5_cqe {
 	uint8_t rsvd2[12];
 	uint32_t byte_cnt;
 	uint64_t timestamp;
-	uint8_t rsvd3[4];
+	uint32_t sop_drop_qpn;
 	uint16_t wqe_counter;
 	uint8_t rsvd4;
 	uint8_t op_own;
 };
 
+/**
+ * Convert a user mark to flow mark.
+ *
+ * @param val
+ *   Mark value to convert.
+ *
+ * @return
+ *   Converted mark value.
+ */
+static inline uint32_t
+mlx5_flow_mark_set(uint32_t val)
+{
+	uint32_t ret;
+
+	/*
+	 * Add one to the user value to differentiate un-marked flows from
+	 * marked flows.
+	 */
+	++val;
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+	/*
+	 * Mark is 24 bits (minus reserved values) but is stored on a 32 bit
+	 * word, byte-swapped by the kernel on little-endian systems. In this
+	 * case, left-shifting the resulting big-endian value ensures the
+	 * least significant 24 bits are retained when converting it back.
+	 */
+	ret = rte_cpu_to_be_32(val) >> 8;
+#else
+	ret = val;
+#endif
+	assert(ret <= MLX5_FLOW_MARK_MAX);
+	return ret;
+}
+
+/**
+ * Convert a mark to user mark.
+ *
+ * @param val
+ *   Mark value to convert.
+ *
+ * @return
+ *   Converted mark value.
+ */
+static inline uint32_t
+mlx5_flow_mark_get(uint32_t val)
+{
+	/*
+	 * Subtract one from the retrieved value. It was added by
+	 * mlx5_flow_mark_set() to distinguish unmarked flows.
+	 */
+#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
+	return (val >> 8) - 1;
+#else
+	return val - 1;
+#endif
+}
+
 #endif /* RTE_PMD_MLX5_PRM_H_ */
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 6f86ded..8f0b4a6 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -113,7 +113,7 @@ static inline int
 check_cqe_seen(volatile struct mlx5_cqe *cqe)
 {
 	static const uint8_t magic[] = "seen";
-	volatile uint8_t (*buf)[sizeof(cqe->rsvd3)] = &cqe->rsvd3;
+	volatile uint8_t (*buf)[sizeof(cqe->rsvd0)] = &cqe->rsvd0;
 	int ret = 1;
 	unsigned int i;
 
@@ -1357,6 +1357,16 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				pkt->hash.rss = rss_hash_res;
 				pkt->ol_flags = PKT_RX_RSS_HASH;
 			}
+			if (rxq->mark &&
+			    ((cqe->sop_drop_qpn !=
+			      htonl(MLX5_FLOW_MARK_INVALID)) ||
+			     (cqe->sop_drop_qpn !=
+			      htonl(MLX5_FLOW_MARK_DEFAULT)))) {
+				pkt->hash.fdir.hi =
+					mlx5_flow_mark_get(cqe->sop_drop_qpn);
+				pkt->ol_flags &= ~PKT_RX_RSS_HASH;
+				pkt->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+			}
 			if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
 			    rxq->crc_present) {
 				if (rxq->csum) {
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index e244c48..302ca49 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -114,7 +114,8 @@ struct rxq {
 	unsigned int elts_n:4; /* Log 2 of Mbufs. */
 	unsigned int port_id:8;
 	unsigned int rss_hash:1; /* RSS hash result is enabled. */
-	unsigned int :9; /* Remaining bits. */
+	unsigned int mark:1; /* Marked flow available on the queue. */
+	unsigned int :8; /* Remaining bits. */
 	volatile uint32_t *rq_db;
 	volatile uint32_t *cq_db;
 	uint16_t rq_ci;
-- 
2.1.4

^ permalink raw reply related

* [PATCH v4 4/6] net/mlx5: support VXLAN flow item
From: Nelio Laranjeiro @ 2016-12-28 10:37 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil
In-Reply-To: <cover.1482920437.git.nelio.laranjeiro@6wind.com>

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 72 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 66 insertions(+), 6 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 549da6c..1ec0ef5 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -69,6 +69,9 @@ mlx5_flow_create_udp(const struct rte_flow_item *item, void *data);
 static int
 mlx5_flow_create_tcp(const struct rte_flow_item *item, void *data);
 
+static int
+mlx5_flow_create_vxlan(const struct rte_flow_item *item, void *data);
+
 struct rte_flow {
 	LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
@@ -123,7 +126,8 @@ static const enum rte_flow_action_type valid_actions[] = {
 static const struct mlx5_flow_items mlx5_flow_items[] = {
 	[RTE_FLOW_ITEM_TYPE_VOID] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VOID,
-			       RTE_FLOW_ITEM_TYPE_ETH),
+			       RTE_FLOW_ITEM_TYPE_ETH,
+			       RTE_FLOW_ITEM_TYPE_VXLAN),
 		.actions = valid_actions,
 		.mask = &(const struct rte_flow_item_eth){
 			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
@@ -196,6 +200,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6),
 	},
 	[RTE_FLOW_ITEM_TYPE_UDP] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
 		.actions = valid_actions,
 		.mask = &(const struct rte_flow_item_udp){
 			.hdr = {
@@ -219,12 +224,23 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.convert = mlx5_flow_create_tcp,
 		.dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
 	},
+	[RTE_FLOW_ITEM_TYPE_VXLAN] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_vxlan){
+			.vni = "\xff\xff\xff",
+		},
+		.mask_sz = sizeof(struct rte_flow_item_vxlan),
+		.convert = mlx5_flow_create_vxlan,
+		.dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
+	},
 };
 
 /** Structure to pass to the conversion function. */
 struct mlx5_flow {
 	struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
 	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
+	uint32_t inner; /**< Set once VXLAN is encountered. */
 };
 
 struct mlx5_flow_action {
@@ -474,7 +490,7 @@ mlx5_flow_create_eth(const struct rte_flow_item *item, void *data)
 
 	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*eth = (struct ibv_exp_flow_spec_eth) {
-		.type = IBV_EXP_FLOW_SPEC_ETH,
+		.type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
 		.size = eth_size,
 	};
 	if (spec) {
@@ -541,7 +557,7 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item, void *data)
 
 	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*ipv4 = (struct ibv_exp_flow_spec_ipv4) {
-		.type = IBV_EXP_FLOW_SPEC_IPV4,
+		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4,
 		.size = ipv4_size,
 	};
 	if (spec) {
@@ -584,7 +600,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item, void *data)
 
 	ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*ipv6 = (struct ibv_exp_flow_spec_ipv6) {
-		.type = IBV_EXP_FLOW_SPEC_IPV6,
+		.type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6,
 		.size = ipv6_size,
 	};
 	if (spec) {
@@ -628,7 +644,7 @@ mlx5_flow_create_udp(const struct rte_flow_item *item, void *data)
 
 	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*udp = (struct ibv_exp_flow_spec_tcp_udp) {
-		.type = IBV_EXP_FLOW_SPEC_UDP,
+		.type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
 		.size = udp_size,
 	};
 	if (spec) {
@@ -666,7 +682,7 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item, void *data)
 
 	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
 	*tcp = (struct ibv_exp_flow_spec_tcp_udp) {
-		.type = IBV_EXP_FLOW_SPEC_TCP,
+		.type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
 		.size = tcp_size,
 	};
 	if (spec) {
@@ -686,6 +702,49 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item, void *data)
 }
 
 /**
+ * Convert VXLAN item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_vxlan(const struct rte_flow_item *item, void *data)
+{
+	const struct rte_flow_item_vxlan *spec = item->spec;
+	const struct rte_flow_item_vxlan *mask = item->mask;
+	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct ibv_exp_flow_spec_tunnel *vxlan;
+	unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
+	union vni {
+		uint32_t vlan_id;
+		uint8_t vni[4];
+	} id;
+
+	id.vni[0] = 0;
+	vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*vxlan = (struct ibv_exp_flow_spec_tunnel) {
+		.type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
+		.size = size,
+	};
+	if (spec) {
+		memcpy(&id.vni[1], spec->vni, 3);
+		vxlan->val.tunnel_id = id.vlan_id;
+	}
+	if (mask) {
+		memcpy(&id.vni[1], mask->vni, 3);
+		vxlan->mask.tunnel_id = id.vlan_id;
+	}
+	/* Remove unwanted bits from values. */
+	vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 0;
+	flow->inner = IBV_EXP_FLOW_SPEC_INNER;
+	return 0;
+}
+
+/**
  * Complete flow rule creation.
  *
  * @param priv
@@ -852,6 +911,7 @@ priv_flow_create(struct priv *priv,
 		.flags = 0,
 		.reserved = 0,
 	};
+	flow.inner = 0;
 	claim_zero(priv_flow_validate(priv, attr, items, actions,
 				      error, &flow));
 	action = (struct mlx5_flow_action){
-- 
2.1.4

^ permalink raw reply related

* [PATCH v4 3/6] net/mlx5: support VLAN flow item
From: Nelio Laranjeiro @ 2016-12-28 10:37 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil
In-Reply-To: <cover.1482920437.git.nelio.laranjeiro@6wind.com>

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/mlx5_flow.c | 54 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index ebae2b5..549da6c 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -55,6 +55,9 @@ static int
 mlx5_flow_create_eth(const struct rte_flow_item *item, void *data);
 
 static int
+mlx5_flow_create_vlan(const struct rte_flow_item *item, void *data);
+
+static int
 mlx5_flow_create_ipv4(const struct rte_flow_item *item, void *data);
 
 static int
@@ -131,7 +134,8 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
 	},
 	[RTE_FLOW_ITEM_TYPE_ETH] = {
-		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
+			       RTE_FLOW_ITEM_TYPE_IPV4,
 			       RTE_FLOW_ITEM_TYPE_IPV6),
 		.actions = valid_actions,
 		.mask = &(const struct rte_flow_item_eth){
@@ -142,6 +146,17 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
 		.convert = mlx5_flow_create_eth,
 		.dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
 	},
+	[RTE_FLOW_ITEM_TYPE_VLAN] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
+			       RTE_FLOW_ITEM_TYPE_IPV6),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_vlan){
+			.tci = -1,
+		},
+		.mask_sz = sizeof(struct rte_flow_item_vlan),
+		.convert = mlx5_flow_create_vlan,
+		.dst_sz = 0,
+	},
 	[RTE_FLOW_ITEM_TYPE_IPV4] = {
 		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
 			       RTE_FLOW_ITEM_TYPE_TCP),
@@ -348,6 +363,17 @@ priv_flow_validate(struct priv *priv,
 
 		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
 			continue;
+		/* Handle special situation for VLAN. */
+		if (items->type == RTE_FLOW_ITEM_TYPE_VLAN) {
+			if (((const struct rte_flow_item_vlan *)items)->tci >
+			    ETHER_MAX_VLAN_ID) {
+				rte_flow_error_set(error, ENOTSUP,
+						   RTE_FLOW_ERROR_TYPE_ITEM,
+						   items,
+						   "wrong VLAN id value");
+				return -rte_errno;
+			}
+		}
 		for (i = 0;
 		     cur_item->items &&
 		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
@@ -471,6 +497,32 @@ mlx5_flow_create_eth(const struct rte_flow_item *item, void *data)
 }
 
 /**
+ * Convert VLAN item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_vlan(const struct rte_flow_item *item, void *data)
+{
+	const struct rte_flow_item_vlan *spec = item->spec;
+	const struct rte_flow_item_vlan *mask = item->mask;
+	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct ibv_exp_flow_spec_eth *eth;
+	const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
+
+	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
+	if (spec)
+		eth->val.vlan_tag = spec->tci;
+	if (mask)
+		eth->mask.vlan_tag = mask->tci;
+	eth->val.vlan_tag &= eth->mask.vlan_tag;
+	return 0;
+}
+
+/**
  * Convert IPv4 item to Verbs specification.
  *
  * @param item[in]
-- 
2.1.4

^ permalink raw reply related

* [PATCH v4 2/6] net/mlx5: support basic flow items and actions
From: Nelio Laranjeiro @ 2016-12-28 10:37 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil
In-Reply-To: <cover.1482920437.git.nelio.laranjeiro@6wind.com>

Introduce initial software for rte_flow rules.

VLAN, VXLAN are still not supported.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/mlx5.h         |   3 +
 drivers/net/mlx5/mlx5_flow.c    | 928 ++++++++++++++++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_trigger.c |   2 +
 3 files changed, 904 insertions(+), 29 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 04f4eaa..c415ce3 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -136,6 +136,7 @@ struct priv {
 	unsigned int reta_idx_n; /* RETA index size. */
 	struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
 	struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
+	LIST_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
@@ -283,5 +284,7 @@ struct rte_flow *mlx5_flow_create(struct rte_eth_dev *,
 int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
 		      struct rte_flow_error *);
 int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
+int priv_flow_start(struct priv *);
+void priv_flow_stop(struct priv *);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 4fdefa0..ebae2b5 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -31,12 +31,380 @@
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <sys/queue.h>
+#include <string.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
 #include <rte_ethdev.h>
 #include <rte_flow.h>
 #include <rte_flow_driver.h>
+#include <rte_malloc.h>
 
 #include "mlx5.h"
 
+static int
+mlx5_flow_create_eth(const struct rte_flow_item *item, void *data);
+
+static int
+mlx5_flow_create_ipv4(const struct rte_flow_item *item, void *data);
+
+static int
+mlx5_flow_create_ipv6(const struct rte_flow_item *item, void *data);
+
+static int
+mlx5_flow_create_udp(const struct rte_flow_item *item, void *data);
+
+static int
+mlx5_flow_create_tcp(const struct rte_flow_item *item, void *data);
+
+struct rte_flow {
+	LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
+	struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+	struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
+	struct ibv_qp *qp; /**< Verbs queue pair. */
+	struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
+	struct ibv_exp_wq *wq; /**< Verbs work queue. */
+	struct ibv_cq *cq; /**< Verbs completion queue. */
+	struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
+};
+
+/** Static initializer for items. */
+#define ITEMS(...) \
+	(const enum rte_flow_item_type []){ \
+		__VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
+	}
+
+/** Structure to generate a simple graph of layers supported by the NIC. */
+struct mlx5_flow_items {
+	/** List of possible following items.  */
+	const enum rte_flow_item_type *const items;
+	/** List of possible actions for these items. */
+	const enum rte_flow_action_type *const actions;
+	/** Bit-masks corresponding to the possibilities for the item. */
+	const void *mask;
+	/** Bit-masks size in bytes. */
+	const unsigned int mask_sz;
+	/**
+	 * Conversion function from rte_flow to NIC specific flow.
+	 *
+	 * @param item
+	 *   rte_flow item to convert.
+	 * @param data
+	 *   Internal structure to store the conversion.
+	 *
+	 * @return
+	 *   0 on success, negative value otherwise.
+	 */
+	int (*convert)(const struct rte_flow_item *item, void *data);
+	/** Size in bytes of the destination structure. */
+	const unsigned int dst_sz;
+};
+
+/** Valid action for this PMD. */
+static const enum rte_flow_action_type valid_actions[] = {
+	RTE_FLOW_ACTION_TYPE_DROP,
+	RTE_FLOW_ACTION_TYPE_QUEUE,
+	RTE_FLOW_ACTION_TYPE_END,
+};
+
+/** Graph of supported items and associated actions. */
+static const struct mlx5_flow_items mlx5_flow_items[] = {
+	[RTE_FLOW_ITEM_TYPE_VOID] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_VOID,
+			       RTE_FLOW_ITEM_TYPE_ETH),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_eth){
+			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		},
+		.mask_sz = sizeof(struct rte_flow_item_eth),
+		.convert = mlx5_flow_create_eth,
+		.dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
+	},
+	[RTE_FLOW_ITEM_TYPE_ETH] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
+			       RTE_FLOW_ITEM_TYPE_IPV6),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_eth){
+			.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+			.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+		},
+		.mask_sz = sizeof(struct rte_flow_item_eth),
+		.convert = mlx5_flow_create_eth,
+		.dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
+	},
+	[RTE_FLOW_ITEM_TYPE_IPV4] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
+			       RTE_FLOW_ITEM_TYPE_TCP),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_ipv4){
+			.hdr = {
+				.src_addr = -1,
+				.dst_addr = -1,
+			},
+		},
+		.mask_sz = sizeof(struct rte_flow_item_ipv4),
+		.convert = mlx5_flow_create_ipv4,
+		.dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4),
+	},
+	[RTE_FLOW_ITEM_TYPE_IPV6] = {
+		.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
+			       RTE_FLOW_ITEM_TYPE_TCP),
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_ipv6){
+			.hdr = {
+				.src_addr = {
+					0xff, 0xff, 0xff, 0xff,
+					0xff, 0xff, 0xff, 0xff,
+					0xff, 0xff, 0xff, 0xff,
+					0xff, 0xff, 0xff, 0xff,
+				},
+				.dst_addr = {
+					0xff, 0xff, 0xff, 0xff,
+					0xff, 0xff, 0xff, 0xff,
+					0xff, 0xff, 0xff, 0xff,
+					0xff, 0xff, 0xff, 0xff,
+				},
+			},
+		},
+		.mask_sz = sizeof(struct rte_flow_item_ipv6),
+		.convert = mlx5_flow_create_ipv6,
+		.dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6),
+	},
+	[RTE_FLOW_ITEM_TYPE_UDP] = {
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_udp){
+			.hdr = {
+				.src_port = -1,
+				.dst_port = -1,
+			},
+		},
+		.mask_sz = sizeof(struct rte_flow_item_udp),
+		.convert = mlx5_flow_create_udp,
+		.dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
+	},
+	[RTE_FLOW_ITEM_TYPE_TCP] = {
+		.actions = valid_actions,
+		.mask = &(const struct rte_flow_item_tcp){
+			.hdr = {
+				.src_port = -1,
+				.dst_port = -1,
+			},
+		},
+		.mask_sz = sizeof(struct rte_flow_item_tcp),
+		.convert = mlx5_flow_create_tcp,
+		.dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
+	},
+};
+
+/** Structure to pass to the conversion function. */
+struct mlx5_flow {
+	struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
+	unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
+};
+
+struct mlx5_flow_action {
+	uint32_t queue:1; /**< Target is a receive queue. */
+	uint32_t drop:1; /**< Target is a drop queue. */
+	uint32_t queue_id; /**< Identifier of the queue. */
+};
+
+/**
+ * Check support for a given item.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param mask[in]
+ *   Bit-masks covering supported fields to compare with spec, last and mask in
+ *   \item.
+ * @param size
+ *   Bit-Mask size in bytes.
+ *
+ * @return
+ *   0 on success.
+ */
+static int
+mlx5_flow_item_validate(const struct rte_flow_item *item,
+			const uint8_t *mask, unsigned int size)
+{
+	int ret = 0;
+
+	if (item->spec && !item->mask) {
+		unsigned int i;
+		const uint8_t *spec = item->spec;
+
+		for (i = 0; i < size; ++i)
+			if ((spec[i] | mask[i]) != mask[i])
+				return -1;
+	}
+	if (item->last && !item->mask) {
+		unsigned int i;
+		const uint8_t *spec = item->last;
+
+		for (i = 0; i < size; ++i)
+			if ((spec[i] | mask[i]) != mask[i])
+				return -1;
+	}
+	if (item->mask) {
+		unsigned int i;
+		const uint8_t *spec = item->mask;
+
+		for (i = 0; i < size; ++i)
+			if ((spec[i] | mask[i]) != mask[i])
+				return -1;
+	}
+	if (item->spec && item->last) {
+		uint8_t spec[size];
+		uint8_t last[size];
+		const uint8_t *apply = mask;
+		unsigned int i;
+
+		if (item->mask)
+			apply = item->mask;
+		for (i = 0; i < size; ++i) {
+			spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
+			last[i] = ((const uint8_t *)item->last)[i] & apply[i];
+		}
+		ret = memcmp(spec, last, size);
+	}
+	return ret;
+}
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ * @param[in, out] flow
+ *   Flow structure to update.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_validate(struct priv *priv,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error,
+		   struct mlx5_flow *flow)
+{
+	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
+	struct mlx5_flow_action action = {
+		.queue = 0,
+		.drop = 0,
+	};
+
+	(void)priv;
+	if (attr->group) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+				   NULL,
+				   "groups are not supported");
+		return -rte_errno;
+	}
+	if (attr->priority) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+				   NULL,
+				   "priorities are not supported");
+		return -rte_errno;
+	}
+	if (attr->egress) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
+				   NULL,
+				   "egress is not supported");
+		return -rte_errno;
+	}
+	if (!attr->ingress) {
+		rte_flow_error_set(error, ENOTSUP,
+				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+				   NULL,
+				   "only ingress is supported");
+		return -rte_errno;
+	}
+	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+		const struct mlx5_flow_items *token = NULL;
+		unsigned int i;
+		int err;
+
+		if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		for (i = 0;
+		     cur_item->items &&
+		     cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+		     ++i) {
+			if (cur_item->items[i] == items->type) {
+				token = &mlx5_flow_items[items->type];
+				break;
+			}
+		}
+		if (!token)
+			goto exit_item_not_supported;
+		cur_item = token;
+		err = mlx5_flow_item_validate(items,
+					      (const uint8_t *)cur_item->mask,
+					      sizeof(cur_item->mask_sz));
+		if (err)
+			goto exit_item_not_supported;
+		if (flow->ibv_attr && cur_item->convert) {
+			err = cur_item->convert(items, flow);
+			if (err)
+				goto exit_item_not_supported;
+		}
+		flow->offset += cur_item->dst_sz;
+	}
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+			continue;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+			action.drop = 1;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+			const struct rte_flow_action_queue *queue =
+				(const struct rte_flow_action_queue *)
+				actions->conf;
+
+			if (!queue || (queue->index > (priv->rxqs_n - 1)))
+				goto exit_action_not_supported;
+			action.queue = 1;
+		} else {
+			goto exit_action_not_supported;
+		}
+	}
+	if (!action.queue && !action.drop) {
+		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "no valid action");
+		return -rte_errno;
+	}
+	return 0;
+exit_item_not_supported:
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+			   items, "item not supported");
+	return -rte_errno;
+exit_action_not_supported:
+	rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+			   actions, "action not supported");
+	return -rte_errno;
+}
+
 /**
  * Validate a flow supported by the NIC.
  *
@@ -50,15 +418,417 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
 		   const struct rte_flow_action actions[],
 		   struct rte_flow_error *error)
 {
-	(void)dev;
-	(void)attr;
-	(void)items;
-	(void)actions;
-	(void)error;
-	rte_flow_error_set(error, ENOTSUP,
-			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-			   NULL, "not implemented yet");
-	return -rte_errno;
+	struct priv *priv = dev->data->dev_private;
+	int ret;
+	struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
+
+	priv_lock(priv);
+	ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
+	priv_unlock(priv);
+	return ret;
+}
+
+/**
+ * Convert Ethernet item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_eth(const struct rte_flow_item *item, void *data)
+{
+	const struct rte_flow_item_eth *spec = item->spec;
+	const struct rte_flow_item_eth *mask = item->mask;
+	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct ibv_exp_flow_spec_eth *eth;
+	const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
+	unsigned int i;
+
+	eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*eth = (struct ibv_exp_flow_spec_eth) {
+		.type = IBV_EXP_FLOW_SPEC_ETH,
+		.size = eth_size,
+	};
+	if (spec) {
+		memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+		memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+	}
+	if (mask) {
+		memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+		memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+	}
+	/* Remove unwanted bits from values. */
+	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+		eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
+		eth->val.src_mac[i] &= eth->mask.src_mac[i];
+	}
+	/* Finalise the flow. */
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 2;
+	return 0;
+}
+
+/**
+ * Convert IPv4 item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_ipv4(const struct rte_flow_item *item, void *data)
+{
+	const struct rte_flow_item_ipv4 *spec = item->spec;
+	const struct rte_flow_item_ipv4 *mask = item->mask;
+	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct ibv_exp_flow_spec_ipv4 *ipv4;
+	unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4);
+
+	ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*ipv4 = (struct ibv_exp_flow_spec_ipv4) {
+		.type = IBV_EXP_FLOW_SPEC_IPV4,
+		.size = ipv4_size,
+	};
+	if (spec) {
+		ipv4->val = (struct ibv_exp_flow_ipv4_filter){
+			.src_ip = spec->hdr.src_addr,
+			.dst_ip = spec->hdr.dst_addr,
+		};
+	}
+	if (mask) {
+		ipv4->mask = (struct ibv_exp_flow_ipv4_filter){
+			.src_ip = mask->hdr.src_addr,
+			.dst_ip = mask->hdr.dst_addr,
+		};
+	}
+	/* Remove unwanted bits from values. */
+	ipv4->val.src_ip &= ipv4->mask.src_ip;
+	ipv4->val.dst_ip &= ipv4->mask.dst_ip;
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 1;
+	return 0;
+}
+
+/**
+ * Convert IPv6 item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_ipv6(const struct rte_flow_item *item, void *data)
+{
+	const struct rte_flow_item_ipv6 *spec = item->spec;
+	const struct rte_flow_item_ipv6 *mask = item->mask;
+	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct ibv_exp_flow_spec_ipv6 *ipv6;
+	unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6);
+	unsigned int i;
+
+	ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*ipv6 = (struct ibv_exp_flow_spec_ipv6) {
+		.type = IBV_EXP_FLOW_SPEC_IPV6,
+		.size = ipv6_size,
+	};
+	if (spec) {
+		memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
+		       RTE_DIM(ipv6->val.src_ip));
+		memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
+		       RTE_DIM(ipv6->val.dst_ip));
+	}
+	if (mask) {
+		memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
+		       RTE_DIM(ipv6->mask.src_ip));
+		memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
+		       RTE_DIM(ipv6->mask.dst_ip));
+	}
+	/* Remove unwanted bits from values. */
+	for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
+		ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
+		ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
+	}
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 1;
+	return 0;
+}
+
+/**
+ * Convert UDP item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_udp(const struct rte_flow_item *item, void *data)
+{
+	const struct rte_flow_item_udp *spec = item->spec;
+	const struct rte_flow_item_udp *mask = item->mask;
+	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct ibv_exp_flow_spec_tcp_udp *udp;
+	unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
+
+	udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*udp = (struct ibv_exp_flow_spec_tcp_udp) {
+		.type = IBV_EXP_FLOW_SPEC_UDP,
+		.size = udp_size,
+	};
+	if (spec) {
+		udp->val.dst_port = spec->hdr.dst_port;
+		udp->val.src_port = spec->hdr.src_port;
+	}
+	if (mask) {
+		udp->mask.dst_port = mask->hdr.dst_port;
+		udp->mask.src_port = mask->hdr.src_port;
+	}
+	/* Remove unwanted bits from values. */
+	udp->val.src_port &= udp->mask.src_port;
+	udp->val.dst_port &= udp->mask.dst_port;
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 0;
+	return 0;
+}
+
+/**
+ * Convert TCP item to Verbs specification.
+ *
+ * @param item[in]
+ *   Item specification.
+ * @param data[in, out]
+ *   User structure.
+ */
+static int
+mlx5_flow_create_tcp(const struct rte_flow_item *item, void *data)
+{
+	const struct rte_flow_item_tcp *spec = item->spec;
+	const struct rte_flow_item_tcp *mask = item->mask;
+	struct mlx5_flow *flow = (struct mlx5_flow *)data;
+	struct ibv_exp_flow_spec_tcp_udp *tcp;
+	unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
+
+	tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
+	*tcp = (struct ibv_exp_flow_spec_tcp_udp) {
+		.type = IBV_EXP_FLOW_SPEC_TCP,
+		.size = tcp_size,
+	};
+	if (spec) {
+		tcp->val.dst_port = spec->hdr.dst_port;
+		tcp->val.src_port = spec->hdr.src_port;
+	}
+	if (mask) {
+		tcp->mask.dst_port = mask->hdr.dst_port;
+		tcp->mask.src_port = mask->hdr.src_port;
+	}
+	/* Remove unwanted bits from values. */
+	tcp->val.src_port &= tcp->mask.src_port;
+	tcp->val.dst_port &= tcp->mask.dst_port;
+	++flow->ibv_attr->num_of_specs;
+	flow->ibv_attr->priority = 0;
+	return 0;
+}
+
+/**
+ * Complete flow rule creation.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param ibv_attr
+ *   Verbs flow attributes.
+ * @param action
+ *   Target action structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow if the rule could be created.
+ */
+static struct rte_flow *
+priv_flow_create_action_queue(struct priv *priv,
+			      struct ibv_exp_flow_attr *ibv_attr,
+			      struct mlx5_flow_action *action,
+			      struct rte_flow_error *error)
+{
+	struct rxq_ctrl *rxq;
+	struct rte_flow *rte_flow;
+
+	assert(priv->pd);
+	assert(priv->ctx);
+	rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
+	if (!rte_flow) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot allocate flow memory");
+		return NULL;
+	}
+	if (action->drop) {
+		rte_flow->cq =
+			ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
+					  &(struct ibv_exp_cq_init_attr){
+						  .comp_mask = 0,
+					  });
+		if (!rte_flow->cq) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL, "cannot allocate CQ");
+			goto error;
+		}
+		rte_flow->wq = ibv_exp_create_wq(priv->ctx,
+						 &(struct ibv_exp_wq_init_attr){
+						 .wq_type = IBV_EXP_WQT_RQ,
+						 .max_recv_wr = 1,
+						 .max_recv_sge = 1,
+						 .pd = priv->pd,
+						 .cq = rte_flow->cq,
+						 });
+	} else {
+		rxq = container_of((*priv->rxqs)[action->queue_id],
+				   struct rxq_ctrl, rxq);
+		rte_flow->rxq = &rxq->rxq;
+		rte_flow->wq = rxq->wq;
+	}
+	rte_flow->ibv_attr = ibv_attr;
+	rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
+		priv->ctx,
+		&(struct ibv_exp_rwq_ind_table_init_attr){
+			.pd = priv->pd,
+			.log_ind_tbl_size = 0,
+			.ind_tbl = &rte_flow->wq,
+			.comp_mask = 0,
+		});
+	if (!rte_flow->ind_table) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot allocate indirection table");
+		goto error;
+	}
+	rte_flow->qp = ibv_exp_create_qp(
+		priv->ctx,
+		&(struct ibv_exp_qp_init_attr){
+			.qp_type = IBV_QPT_RAW_PACKET,
+			.comp_mask =
+				IBV_EXP_QP_INIT_ATTR_PD |
+				IBV_EXP_QP_INIT_ATTR_PORT |
+				IBV_EXP_QP_INIT_ATTR_RX_HASH,
+			.pd = priv->pd,
+			.rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
+				.rx_hash_function =
+					IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
+				.rx_hash_key_len = rss_hash_default_key_len,
+				.rx_hash_key = rss_hash_default_key,
+				.rx_hash_fields_mask = 0,
+				.rwq_ind_tbl = rte_flow->ind_table,
+			},
+			.port_num = priv->port,
+		});
+	if (!rte_flow->qp) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot allocate QP");
+		goto error;
+	}
+	rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
+						 rte_flow->ibv_attr);
+	if (!rte_flow->ibv_flow) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "flow rule creation failure");
+		goto error;
+	}
+	return rte_flow;
+error:
+	assert(rte_flow);
+	if (rte_flow->qp)
+		ibv_destroy_qp(rte_flow->qp);
+	if (rte_flow->ind_table)
+		ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
+	if (!rte_flow->rxq && rte_flow->wq)
+		ibv_exp_destroy_wq(rte_flow->wq);
+	if (!rte_flow->rxq && rte_flow->cq)
+		ibv_destroy_cq(rte_flow->cq);
+	rte_free(rte_flow->ibv_attr);
+	rte_free(rte_flow);
+	return NULL;
+}
+
+/**
+ * Convert a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A flow on success, NULL otherwise.
+ */
+static struct rte_flow *
+priv_flow_create(struct priv *priv,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	struct rte_flow *rte_flow;
+	struct mlx5_flow_action action;
+	struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
+	int err;
+
+	err = priv_flow_validate(priv, attr, items, actions, error, &flow);
+	if (err)
+		goto exit;
+	flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
+	flow.offset = sizeof(struct ibv_exp_flow_attr);
+	if (!flow.ibv_attr) {
+		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+				   NULL, "cannot allocate ibv_attr memory");
+		goto exit;
+	}
+	*flow.ibv_attr = (struct ibv_exp_flow_attr){
+		.type = IBV_EXP_FLOW_ATTR_NORMAL,
+		.size = sizeof(struct ibv_exp_flow_attr),
+		.priority = attr->priority,
+		.num_of_specs = 0,
+		.port = 0,
+		.flags = 0,
+		.reserved = 0,
+	};
+	claim_zero(priv_flow_validate(priv, attr, items, actions,
+				      error, &flow));
+	action = (struct mlx5_flow_action){
+		.queue = 0,
+		.drop = 0,
+	};
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
+		if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
+			continue;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
+			action.queue = 1;
+			action.queue_id =
+				((const struct rte_flow_action_queue *)
+				 actions->conf)->index;
+		} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+			action.drop = 1;
+		} else {
+			rte_flow_error_set(error, ENOTSUP,
+					   RTE_FLOW_ERROR_TYPE_ACTION,
+					   actions, "unsupported action");
+			goto exit;
+		}
+	}
+	rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
+						 &action, error);
+	return rte_flow;
+exit:
+	rte_free(flow.ibv_attr);
+	return NULL;
 }
 
 /**
@@ -74,15 +844,46 @@ mlx5_flow_create(struct rte_eth_dev *dev,
 		 const struct rte_flow_action actions[],
 		 struct rte_flow_error *error)
 {
-	(void)dev;
-	(void)attr;
-	(void)items;
-	(void)actions;
-	(void)error;
-	rte_flow_error_set(error, ENOTSUP,
-			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-			   NULL, "not implemented yet");
-	return NULL;
+	struct priv *priv = dev->data->dev_private;
+	struct rte_flow *flow;
+
+	priv_lock(priv);
+	flow = priv_flow_create(priv, attr, items, actions, error);
+	if (flow) {
+		LIST_INSERT_HEAD(&priv->flows, flow, next);
+		DEBUG("Flow created %p", (void *)flow);
+	}
+	priv_unlock(priv);
+	return flow;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] flow
+ *   Flow to destroy.
+ */
+static void
+priv_flow_destroy(struct priv *priv,
+		  struct rte_flow *flow)
+{
+	(void)priv;
+	LIST_REMOVE(flow, next);
+	if (flow->ibv_flow)
+		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
+	if (flow->qp)
+		claim_zero(ibv_destroy_qp(flow->qp));
+	if (flow->ind_table)
+		claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
+	if (!flow->rxq && flow->wq)
+		claim_zero(ibv_exp_destroy_wq(flow->wq));
+	if (!flow->rxq && flow->cq)
+		claim_zero(ibv_destroy_cq(flow->cq));
+	rte_free(flow->ibv_attr);
+	DEBUG("Flow destroyed %p", (void *)flow);
+	rte_free(flow);
 }
 
 /**
@@ -96,13 +897,30 @@ mlx5_flow_destroy(struct rte_eth_dev *dev,
 		  struct rte_flow *flow,
 		  struct rte_flow_error *error)
 {
-	(void)dev;
-	(void)flow;
+	struct priv *priv = dev->data->dev_private;
+
 	(void)error;
-	rte_flow_error_set(error, ENOTSUP,
-			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-			   NULL, "not implemented yet");
-	return -rte_errno;
+	priv_lock(priv);
+	priv_flow_destroy(priv, flow);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+static void
+priv_flow_flush(struct priv *priv)
+{
+	while (!LIST_EMPTY(&priv->flows)) {
+		struct rte_flow *flow;
+
+		flow = LIST_FIRST(&priv->flows);
+		priv_flow_destroy(priv, flow);
+	}
 }
 
 /**
@@ -115,10 +933,62 @@ int
 mlx5_flow_flush(struct rte_eth_dev *dev,
 		struct rte_flow_error *error)
 {
-	(void)dev;
+	struct priv *priv = dev->data->dev_private;
+
 	(void)error;
-	rte_flow_error_set(error, ENOTSUP,
-			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-			   NULL, "not implemented yet");
-	return -rte_errno;
+	priv_lock(priv);
+	priv_flow_flush(priv);
+	priv_unlock(priv);
+	return 0;
+}
+
+/**
+ * Remove all flows.
+ *
+ * Called by dev_stop() to remove all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+priv_flow_stop(struct priv *priv)
+{
+	struct rte_flow *flow;
+
+	for (flow = LIST_FIRST(&priv->flows);
+	     flow;
+	     flow = LIST_NEXT(flow, next)) {
+		claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
+		flow->ibv_flow = NULL;
+		DEBUG("Flow %p removed", (void *)flow);
+	}
+}
+
+/**
+ * Add all flows.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, a errno value otherwise and rte_errno is set.
+ */
+int
+priv_flow_start(struct priv *priv)
+{
+	struct rte_flow *flow;
+
+	for (flow = LIST_FIRST(&priv->flows);
+	     flow;
+	     flow = LIST_NEXT(flow, next)) {
+		flow->ibv_flow = ibv_exp_create_flow(flow->qp,
+						     flow->ibv_attr);
+		if (!flow->ibv_flow) {
+			DEBUG("Flow %p cannot be applied", (void *)flow);
+			rte_errno = EINVAL;
+			return rte_errno;
+		}
+		DEBUG("Flow %p applied", (void *)flow);
+	}
+	return 0;
 }
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index d4dccd8..2399243 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -90,6 +90,7 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE)
 		priv_fdir_enable(priv);
 	priv_dev_interrupt_handler_install(priv, dev);
+	err = priv_flow_start(priv);
 	priv_unlock(priv);
 	return -err;
 }
@@ -120,6 +121,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	priv_mac_addrs_disable(priv);
 	priv_destroy_hash_rxqs(priv);
 	priv_fdir_disable(priv);
+	priv_flow_stop(priv);
 	priv_dev_interrupt_handler_uninstall(priv, dev);
 	priv->started = 0;
 	priv_unlock(priv);
-- 
2.1.4

^ permalink raw reply related

* [PATCH v4 1/6] net/mlx5: add preliminary flow API support
From: Nelio Laranjeiro @ 2016-12-28 10:37 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil
In-Reply-To: <cover.1482920437.git.nelio.laranjeiro@6wind.com>

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/Makefile    |   1 +
 drivers/net/mlx5/mlx5.h      |  16 ++++++
 drivers/net/mlx5/mlx5_fdir.c |  15 ++++++
 drivers/net/mlx5/mlx5_flow.c | 124 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 156 insertions(+)
 create mode 100644 drivers/net/mlx5/mlx5_flow.c

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index cf87f0b..6d1338a 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -48,6 +48,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
 
 # Dependencies.
 DEPDIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += lib/librte_ether
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 79b7a60..04f4eaa 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -59,6 +59,7 @@
 #include <rte_spinlock.h>
 #include <rte_interrupts.h>
 #include <rte_errno.h>
+#include <rte_flow.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
@@ -268,4 +269,19 @@ void priv_fdir_enable(struct priv *);
 int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type,
 			 enum rte_filter_op, void *);
 
+/* mlx5_flow.c */
+
+int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *,
+		       const struct rte_flow_item [],
+		       const struct rte_flow_action [],
+		       struct rte_flow_error *);
+struct rte_flow *mlx5_flow_create(struct rte_eth_dev *,
+				  const struct rte_flow_attr *,
+				  const struct rte_flow_item [],
+				  const struct rte_flow_action [],
+				  struct rte_flow_error *);
+int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
+		      struct rte_flow_error *);
+int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
+
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
index 1acf682..f80c58b 100644
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ b/drivers/net/mlx5/mlx5_fdir.c
@@ -55,6 +55,8 @@
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
 #include <rte_common.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
@@ -1042,6 +1044,14 @@ priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
 	return ret;
 }
 
+static const struct rte_flow_ops mlx5_flow_ops = {
+	.validate = mlx5_flow_validate,
+	.create = mlx5_flow_create,
+	.destroy = mlx5_flow_destroy,
+	.flush = mlx5_flow_flush,
+	.query = NULL,
+};
+
 /**
  * Manage filter operations.
  *
@@ -1067,6 +1077,11 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
 	struct priv *priv = dev->data->dev_private;
 
 	switch (filter_type) {
+	case RTE_ETH_FILTER_GENERIC:
+		if (filter_op != RTE_ETH_FILTER_GET)
+			return -EINVAL;
+		*(const void **)arg = &mlx5_flow_ops;
+		return 0;
 	case RTE_ETH_FILTER_FDIR:
 		priv_lock(priv);
 		ret = priv_fdir_ctrl_func(priv, filter_op, arg);
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
new file mode 100644
index 0000000..4fdefa0
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -0,0 +1,124 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   Copyright 2016 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_ethdev.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
+#include "mlx5.h"
+
+/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_validate(struct rte_eth_dev *dev,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item items[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	(void)dev;
+	(void)attr;
+	(void)items;
+	(void)actions;
+	(void)error;
+	rte_flow_error_set(error, ENOTSUP,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, "not implemented yet");
+	return -rte_errno;
+}
+
+/**
+ * Create a flow.
+ *
+ * @see rte_flow_create()
+ * @see rte_flow_ops
+ */
+struct rte_flow *
+mlx5_flow_create(struct rte_eth_dev *dev,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item items[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	(void)dev;
+	(void)attr;
+	(void)items;
+	(void)actions;
+	(void)error;
+	rte_flow_error_set(error, ENOTSUP,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, "not implemented yet");
+	return NULL;
+}
+
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_destroy(struct rte_eth_dev *dev,
+		  struct rte_flow *flow,
+		  struct rte_flow_error *error)
+{
+	(void)dev;
+	(void)flow;
+	(void)error;
+	rte_flow_error_set(error, ENOTSUP,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, "not implemented yet");
+	return -rte_errno;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_flush(struct rte_eth_dev *dev,
+		struct rte_flow_error *error)
+{
+	(void)dev;
+	(void)error;
+	rte_flow_error_set(error, ENOTSUP,
+			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, "not implemented yet");
+	return -rte_errno;
+}
-- 
2.1.4

^ permalink raw reply related

* [PATCH v4 0/6] net/mlx5: support flow API
From: Nelio Laranjeiro @ 2016-12-28 10:37 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil
In-Reply-To: <cover.1482331954.git.nelio.laranjeiro@6wind.com>

Changes in v4:

 - Simplify flow parsing by using a graph.
 - Add VXLAN flow item.
 - Add mark flow action.
 - Extend IPv4 filter item (Type of service, Next Protocol ID).

Changes in v3:

 - Fix Ethernet ether type issue.

Changes in v2:

 - Fix several issues.
 - Support VLAN filtering.


Nelio Laranjeiro (6):
  net/mlx5: add preliminary flow API support
  net/mlx5: support basic flow items and actions
  net/mlx5: support VLAN flow item
  net/mlx5: support VXLAN flow item
  net/mlx5: support mark flow action
  net/mlx5: extend IPv4 flow item

 drivers/net/mlx5/Makefile       |    1 +
 drivers/net/mlx5/mlx5.h         |   19 +
 drivers/net/mlx5/mlx5_fdir.c    |   15 +
 drivers/net/mlx5/mlx5_flow.c    | 1192 +++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_prm.h     |   70 ++-
 drivers/net/mlx5/mlx5_rxtx.c    |   12 +-
 drivers/net/mlx5/mlx5_rxtx.h    |    3 +-
 drivers/net/mlx5/mlx5_trigger.c |    2 +
 8 files changed, 1311 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/mlx5/mlx5_flow.c

-- 
2.1.4

^ permalink raw reply

* Re: [PATCH v2 07/17] net/i40e: add flow validate function
From: Xing, Beilei @ 2016-12-28 10:03 UTC (permalink / raw)
  To: Adrien Mazarguil; +Cc: Wu, Jingjing, Zhang, Helin, dev@dpdk.org, Lu, Wenzhuo
In-Reply-To: <20161228092945.GF3737@6wind.com>



> -----Original Message-----
> From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> Sent: Wednesday, December 28, 2016 5:30 PM
> To: Xing, Beilei <beilei.xing@intel.com>
> Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> <helin.zhang@intel.com>; dev@dpdk.org; Lu, Wenzhuo
> <wenzhuo.lu@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v2 07/17] net/i40e: add flow validate
> function
> 
> Hi Beilei,
> 
> On Wed, Dec 28, 2016 at 09:00:03AM +0000, Xing, Beilei wrote:
> >
> >
> > > -----Original Message-----
> > > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> > > Sent: Tuesday, December 27, 2016 8:40 PM
> > > To: Xing, Beilei <beilei.xing@intel.com>
> > > Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> > > <helin.zhang@intel.com>; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v2 07/17] net/i40e: add flow validate
> > > function
> > >
> > > Hi Beilei,
> > >
> > > A few comments below.
> > >
> > > On Tue, Dec 27, 2016 at 02:26:14PM +0800, Beilei Xing wrote:
> > > > This patch adds i40e_flow_validation function to check if a flow
> > > > is valid according to the flow pattern.
> > > > i40e_parse_ethertype_filter is added first, it also gets the
> > > > ethertype info.
> > > > i40e_flow.c is added to handle all generic filter events.
> > > >
> > > > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > > > ---
> > > >  drivers/net/i40e/Makefile      |   1 +
> > > >  drivers/net/i40e/i40e_ethdev.c |   5 +
> > > >  drivers/net/i40e/i40e_ethdev.h |  20 ++
> > > >  drivers/net/i40e/i40e_flow.c   | 431
> > > +++++++++++++++++++++++++++++++++++++++++
> > > >  4 files changed, 457 insertions(+)  create mode 100644
> > > > drivers/net/i40e/i40e_flow.c
> > > [...]
> > > > diff --git a/drivers/net/i40e/i40e_flow.c
> > > > b/drivers/net/i40e/i40e_flow.c new file mode 100644 index
> > > > 0000000..bf451ef
> > > > --- /dev/null
> > > > +++ b/drivers/net/i40e/i40e_flow.c
> > > [...]
> > > > +	if (ethertype_filter->queue >= pf->dev_data->nb_rx_queues) {
> > > > +		rte_flow_error_set(error, EINVAL,
> > > > +				   RTE_FLOW_ERROR_TYPE_ACTION,
> > > > +				   NULL, "Invalid queue ID for"
> > > > +				   " ethertype_filter.");
> > >
> > > When setting an error type related to an existing object provided by
> > > the application, you should set the related cause pointer to a
> > > non-NULL value. In this particular case, retrieving the action
> > > object seems difficult so it can remain that way, however there are
> > > many places in this series where it can be done.
> >
> > OK, I got the meaning  and usage of cause pointer now. Thanks for the
> explaination.
> >
> > >
> > > > +		return -EINVAL;
> > >
> > > While this is perfectly valid, you could also return -rte_errno to
> > > avoid duplicating EINVAL.
> >
> > Yes, agree.
> >
> > >
> > > [...]
> > > > +	}
> > > > +	if (ethertype_filter->ether_type == ETHER_TYPE_IPv4 ||
> > > > +	    ethertype_filter->ether_type == ETHER_TYPE_IPv6) {
> > > > +		rte_flow_error_set(error, ENOTSUP,
> > > > +				   RTE_FLOW_ERROR_TYPE_ITEM,
> > > > +				   NULL, "Unsupported ether_type in"
> > > > +				   " control packet filter.");
> > > > +		return -ENOTSUP;
> > > > +	}
> > > > +	if (ethertype_filter->ether_type == ETHER_TYPE_VLAN)
> > > > +		PMD_DRV_LOG(WARNING, "filter vlan ether_type in"
> > > > +			    " first tag is not supported.");
> > > > +
> > > > +	return ret;
> > > > +}
> > > [...]
> > > > +/* Parse attributes */
> > > > +static int
> > > > +i40e_parse_attr(const struct rte_flow_attr *attr,
> > > > +		struct rte_flow_error *error)
> > > > +{
> > > > +	/* Must be input direction */
> > > > +	if (!attr->ingress) {
> > > > +		rte_flow_error_set(error, EINVAL,
> > > > +				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> > > > +				   NULL, "Only support ingress.");
> > >
> > > Regarding my previous comment, &attr could replace NULL here as well
> > > as in subsequent calls to rte_flow_error_set().
> >
> > Got it, thanks.
> >
> > >
> > > > +		return -EINVAL;
> > > > +	}
> > > > +
> > > > +	/* Not supported */
> > > > +	if (attr->egress) {
> > > > +		rte_flow_error_set(error, EINVAL,
> > > > +				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
> > > > +				   NULL, "Not support egress.");
> > > > +		return -EINVAL;
> > > > +	}
> > > > +
> > > > +	/* Not supported */
> > > > +	if (attr->priority) {
> > > > +		rte_flow_error_set(error, EINVAL,
> > > > +				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
> > > > +				   NULL, "Not support priority.");
> > > > +		return -EINVAL;
> > > > +	}
> > > > +
> > > > +	/* Not supported */
> > > > +	if (attr->group) {
> > > > +		rte_flow_error_set(error, EINVAL,
> > > > +				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
> > > > +				   NULL, "Not support group.");
> > > > +		return -EINVAL;
> > > > +	}
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +static int
> > > > +i40e_parse_ethertype_pattern(const struct rte_flow_item *pattern,
> > > > +			     struct rte_flow_error *error,
> > > > +			     struct rte_eth_ethertype_filter *filter) {
> > > > +	const struct rte_flow_item *item = pattern;
> > > > +	const struct rte_flow_item_eth *eth_spec;
> > > > +	const struct rte_flow_item_eth *eth_mask;
> > > > +	enum rte_flow_item_type item_type;
> > > > +
> > > > +	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
> > > > +		item_type = item->type;
> > > > +		switch (item_type) {
> > > > +		case RTE_FLOW_ITEM_TYPE_ETH:
> > > > +			eth_spec = (const struct rte_flow_item_eth *)item-
> > > >spec;
> > > > +			eth_mask = (const struct rte_flow_item_eth *)item-
> > > >mask;
> > > > +			/* Get the MAC info. */
> > > > +			if (!eth_spec || !eth_mask) {
> > > > +				rte_flow_error_set(error, EINVAL,
> > > > +
> > > RTE_FLOW_ERROR_TYPE_ITEM,
> > > > +						   NULL,
> > > > +						   "NULL ETH spec/mask");
> > > > +				return -EINVAL;
> > > > +			}
> > >
> > > While optional, I think you should allow eth_spec and eth_mask to be
> > > NULL here as described in [1]:
> > >
> > > - If eth_spec is NULL, you can match anything that looks like a valid
> > >   Ethernet header.
> > >
> > > - If eth_mask is NULL, you should assume a default mask (for Ethernet it
> > >   usually means matching source/destination MACs perfectly).
> > >
> > > - You must check the "last" field as well, if non-NULL it may probably be
> > >   supported as long as the following condition is satisfied:
> > >
> > >    (spec & mask) == (last & mask)
> > >
> > > [1] http://dpdk.org/doc/guides/prog_guide/rte_flow.html#pattern-item
> > >
> >
> > Thanks for the specification. In fact, we don't support the "last" for both
> ixgbe and i40e currently according to the original design, so we only support
> perfect match till now. We will support it in the future, as the deadline is
> coming, what do you think?
> 
> If you want to handle it later it's fine, however in that case you need to at
> least generate an error when last is non-NULL (I did not see such a check in
> this code).

OK, will update the non-NULL condition in next version.
And thanks for all your comments.

> 
> Note that supporting it properly as defined in the API could be relatively easy
> by implementing the above condition, it's just a small step above simply
> checking for a NULL value.
> 
> > > [...]
> > > > +	const struct rte_flow_action_queue *act_q;
> > > > +	uint32_t index = 0;
> > > > +
> > > > +	/* Check if the first non-void action is QUEUE or DROP. */
> > > > +	NEXT_ITEM_OF_ACTION(act, actions, index);
> > > > +	if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE &&
> > > > +	    act->type != RTE_FLOW_ACTION_TYPE_DROP) {
> > > > +		rte_flow_error_set(error, EINVAL,
> > > RTE_FLOW_ERROR_TYPE_ACTION,
> > > > +				   NULL, "Not supported action.");
> > >
> > > Again, you could report &act instead of NULL here (please check all
> > > remaining calls to rte_flow_error_set()).
> > >
> > > [...]
> > >
> > > --
> > > Adrien Mazarguil
> > > 6WIND
> 
> --
> Adrien Mazarguil
> 6WIND

^ permalink raw reply

* [PATCH] net/mlx5: fix RSS hash result for flows
From: Nelio Laranjeiro @ 2016-12-28  9:58 UTC (permalink / raw)
  To: dev; +Cc: Adrien Mazarguil, stable

Flows redirected to a specific queue do not have a valid RSS hash result
and the related mbuf flag must not be set.

Fixes: ecf60761fc2a ("net/mlx5: return RSS hash result in mbuf")

CC: stable@dpdk.org
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Acked-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index a37e433..6f86ded 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1353,7 +1353,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			/* Update packet information. */
 			pkt->packet_type = 0;
 			pkt->ol_flags = 0;
-			if (rxq->rss_hash) {
+			if (rss_hash_res && rxq->rss_hash) {
 				pkt->hash.rss = rss_hash_res;
 				pkt->ol_flags = PKT_RX_RSS_HASH;
 			}
-- 
2.1.4

^ permalink raw reply related

* Re: [PATCH v2 07/17] net/i40e: add flow validate function
From: Adrien Mazarguil @ 2016-12-28  9:29 UTC (permalink / raw)
  To: Xing, Beilei; +Cc: Wu, Jingjing, Zhang, Helin, dev@dpdk.org, Lu, Wenzhuo
In-Reply-To: <94479800C636CB44BD422CB454846E013158C17B@SHSMSX101.ccr.corp.intel.com>

Hi Beilei,

On Wed, Dec 28, 2016 at 09:00:03AM +0000, Xing, Beilei wrote:
> 
> 
> > -----Original Message-----
> > From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> > Sent: Tuesday, December 27, 2016 8:40 PM
> > To: Xing, Beilei <beilei.xing@intel.com>
> > Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> > <helin.zhang@intel.com>; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v2 07/17] net/i40e: add flow validate
> > function
> > 
> > Hi Beilei,
> > 
> > A few comments below.
> > 
> > On Tue, Dec 27, 2016 at 02:26:14PM +0800, Beilei Xing wrote:
> > > This patch adds i40e_flow_validation function to check if a flow is
> > > valid according to the flow pattern.
> > > i40e_parse_ethertype_filter is added first, it also gets the ethertype
> > > info.
> > > i40e_flow.c is added to handle all generic filter events.
> > >
> > > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > > ---
> > >  drivers/net/i40e/Makefile      |   1 +
> > >  drivers/net/i40e/i40e_ethdev.c |   5 +
> > >  drivers/net/i40e/i40e_ethdev.h |  20 ++
> > >  drivers/net/i40e/i40e_flow.c   | 431
> > +++++++++++++++++++++++++++++++++++++++++
> > >  4 files changed, 457 insertions(+)
> > >  create mode 100644 drivers/net/i40e/i40e_flow.c
> > [...]
> > > diff --git a/drivers/net/i40e/i40e_flow.c
> > > b/drivers/net/i40e/i40e_flow.c new file mode 100644 index
> > > 0000000..bf451ef
> > > --- /dev/null
> > > +++ b/drivers/net/i40e/i40e_flow.c
> > [...]
> > > +	if (ethertype_filter->queue >= pf->dev_data->nb_rx_queues) {
> > > +		rte_flow_error_set(error, EINVAL,
> > > +				   RTE_FLOW_ERROR_TYPE_ACTION,
> > > +				   NULL, "Invalid queue ID for"
> > > +				   " ethertype_filter.");
> > 
> > When setting an error type related to an existing object provided by the
> > application, you should set the related cause pointer to a non-NULL value. In
> > this particular case, retrieving the action object seems difficult so it can
> > remain that way, however there are many places in this series where it can
> > be done.
> 
> OK, I got the meaning  and usage of cause pointer now. Thanks for the explaination.
> 
> > 
> > > +		return -EINVAL;
> > 
> > While this is perfectly valid, you could also return -rte_errno to avoid
> > duplicating EINVAL.
> 
> Yes, agree.
> 
> > 
> > [...]
> > > +	}
> > > +	if (ethertype_filter->ether_type == ETHER_TYPE_IPv4 ||
> > > +	    ethertype_filter->ether_type == ETHER_TYPE_IPv6) {
> > > +		rte_flow_error_set(error, ENOTSUP,
> > > +				   RTE_FLOW_ERROR_TYPE_ITEM,
> > > +				   NULL, "Unsupported ether_type in"
> > > +				   " control packet filter.");
> > > +		return -ENOTSUP;
> > > +	}
> > > +	if (ethertype_filter->ether_type == ETHER_TYPE_VLAN)
> > > +		PMD_DRV_LOG(WARNING, "filter vlan ether_type in"
> > > +			    " first tag is not supported.");
> > > +
> > > +	return ret;
> > > +}
> > [...]
> > > +/* Parse attributes */
> > > +static int
> > > +i40e_parse_attr(const struct rte_flow_attr *attr,
> > > +		struct rte_flow_error *error)
> > > +{
> > > +	/* Must be input direction */
> > > +	if (!attr->ingress) {
> > > +		rte_flow_error_set(error, EINVAL,
> > > +				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> > > +				   NULL, "Only support ingress.");
> > 
> > Regarding my previous comment, &attr could replace NULL here as well as in
> > subsequent calls to rte_flow_error_set().
> 
> Got it, thanks.
> 
> > 
> > > +		return -EINVAL;
> > > +	}
> > > +
> > > +	/* Not supported */
> > > +	if (attr->egress) {
> > > +		rte_flow_error_set(error, EINVAL,
> > > +				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
> > > +				   NULL, "Not support egress.");
> > > +		return -EINVAL;
> > > +	}
> > > +
> > > +	/* Not supported */
> > > +	if (attr->priority) {
> > > +		rte_flow_error_set(error, EINVAL,
> > > +				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
> > > +				   NULL, "Not support priority.");
> > > +		return -EINVAL;
> > > +	}
> > > +
> > > +	/* Not supported */
> > > +	if (attr->group) {
> > > +		rte_flow_error_set(error, EINVAL,
> > > +				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
> > > +				   NULL, "Not support group.");
> > > +		return -EINVAL;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int
> > > +i40e_parse_ethertype_pattern(const struct rte_flow_item *pattern,
> > > +			     struct rte_flow_error *error,
> > > +			     struct rte_eth_ethertype_filter *filter) {
> > > +	const struct rte_flow_item *item = pattern;
> > > +	const struct rte_flow_item_eth *eth_spec;
> > > +	const struct rte_flow_item_eth *eth_mask;
> > > +	enum rte_flow_item_type item_type;
> > > +
> > > +	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
> > > +		item_type = item->type;
> > > +		switch (item_type) {
> > > +		case RTE_FLOW_ITEM_TYPE_ETH:
> > > +			eth_spec = (const struct rte_flow_item_eth *)item-
> > >spec;
> > > +			eth_mask = (const struct rte_flow_item_eth *)item-
> > >mask;
> > > +			/* Get the MAC info. */
> > > +			if (!eth_spec || !eth_mask) {
> > > +				rte_flow_error_set(error, EINVAL,
> > > +
> > RTE_FLOW_ERROR_TYPE_ITEM,
> > > +						   NULL,
> > > +						   "NULL ETH spec/mask");
> > > +				return -EINVAL;
> > > +			}
> > 
> > While optional, I think you should allow eth_spec and eth_mask to be NULL
> > here as described in [1]:
> > 
> > - If eth_spec is NULL, you can match anything that looks like a valid
> >   Ethernet header.
> > 
> > - If eth_mask is NULL, you should assume a default mask (for Ethernet it
> >   usually means matching source/destination MACs perfectly).
> > 
> > - You must check the "last" field as well, if non-NULL it may probably be
> >   supported as long as the following condition is satisfied:
> > 
> >    (spec & mask) == (last & mask)
> > 
> > [1] http://dpdk.org/doc/guides/prog_guide/rte_flow.html#pattern-item
> > 
> 
> Thanks for the specification. In fact, we don't support the "last" for both ixgbe and i40e currently according to the original design, so we only support perfect match till now. We will support it in the future, as the deadline is coming, what do you think?

If you want to handle it later it's fine, however in that case you need to
at least generate an error when last is non-NULL (I did not see such a check
in this code).

Note that supporting it properly as defined in the API could be relatively
easy by implementing the above condition, it's just a small step above
simply checking for a NULL value.

> > [...]
> > > +	const struct rte_flow_action_queue *act_q;
> > > +	uint32_t index = 0;
> > > +
> > > +	/* Check if the first non-void action is QUEUE or DROP. */
> > > +	NEXT_ITEM_OF_ACTION(act, actions, index);
> > > +	if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE &&
> > > +	    act->type != RTE_FLOW_ACTION_TYPE_DROP) {
> > > +		rte_flow_error_set(error, EINVAL,
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > > +				   NULL, "Not supported action.");
> > 
> > Again, you could report &act instead of NULL here (please check all remaining
> > calls to rte_flow_error_set()).
> > 
> > [...]
> > 
> > --
> > Adrien Mazarguil
> > 6WIND

-- 
Adrien Mazarguil
6WIND

^ permalink raw reply

* Re: [PATCH v2 07/17] net/i40e: add flow validate function
From: Xing, Beilei @ 2016-12-28  9:00 UTC (permalink / raw)
  To: Adrien Mazarguil; +Cc: Wu, Jingjing, Zhang, Helin, dev@dpdk.org, Lu, Wenzhuo
In-Reply-To: <20161227124004.GA3737@6wind.com>



> -----Original Message-----
> From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> Sent: Tuesday, December 27, 2016 8:40 PM
> To: Xing, Beilei <beilei.xing@intel.com>
> Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> <helin.zhang@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2 07/17] net/i40e: add flow validate
> function
> 
> Hi Beilei,
> 
> A few comments below.
> 
> On Tue, Dec 27, 2016 at 02:26:14PM +0800, Beilei Xing wrote:
> > This patch adds i40e_flow_validation function to check if a flow is
> > valid according to the flow pattern.
> > i40e_parse_ethertype_filter is added first, it also gets the ethertype
> > info.
> > i40e_flow.c is added to handle all generic filter events.
> >
> > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > ---
> >  drivers/net/i40e/Makefile      |   1 +
> >  drivers/net/i40e/i40e_ethdev.c |   5 +
> >  drivers/net/i40e/i40e_ethdev.h |  20 ++
> >  drivers/net/i40e/i40e_flow.c   | 431
> +++++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 457 insertions(+)
> >  create mode 100644 drivers/net/i40e/i40e_flow.c
> [...]
> > diff --git a/drivers/net/i40e/i40e_flow.c
> > b/drivers/net/i40e/i40e_flow.c new file mode 100644 index
> > 0000000..bf451ef
> > --- /dev/null
> > +++ b/drivers/net/i40e/i40e_flow.c
> [...]
> > +	if (ethertype_filter->queue >= pf->dev_data->nb_rx_queues) {
> > +		rte_flow_error_set(error, EINVAL,
> > +				   RTE_FLOW_ERROR_TYPE_ACTION,
> > +				   NULL, "Invalid queue ID for"
> > +				   " ethertype_filter.");
> 
> When setting an error type related to an existing object provided by the
> application, you should set the related cause pointer to a non-NULL value. In
> this particular case, retrieving the action object seems difficult so it can
> remain that way, however there are many places in this series where it can
> be done.

OK, I got the meaning  and usage of cause pointer now. Thanks for the explaination.

> 
> > +		return -EINVAL;
> 
> While this is perfectly valid, you could also return -rte_errno to avoid
> duplicating EINVAL.

Yes, agree.

> 
> [...]
> > +	}
> > +	if (ethertype_filter->ether_type == ETHER_TYPE_IPv4 ||
> > +	    ethertype_filter->ether_type == ETHER_TYPE_IPv6) {
> > +		rte_flow_error_set(error, ENOTSUP,
> > +				   RTE_FLOW_ERROR_TYPE_ITEM,
> > +				   NULL, "Unsupported ether_type in"
> > +				   " control packet filter.");
> > +		return -ENOTSUP;
> > +	}
> > +	if (ethertype_filter->ether_type == ETHER_TYPE_VLAN)
> > +		PMD_DRV_LOG(WARNING, "filter vlan ether_type in"
> > +			    " first tag is not supported.");
> > +
> > +	return ret;
> > +}
> [...]
> > +/* Parse attributes */
> > +static int
> > +i40e_parse_attr(const struct rte_flow_attr *attr,
> > +		struct rte_flow_error *error)
> > +{
> > +	/* Must be input direction */
> > +	if (!attr->ingress) {
> > +		rte_flow_error_set(error, EINVAL,
> > +				   RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> > +				   NULL, "Only support ingress.");
> 
> Regarding my previous comment, &attr could replace NULL here as well as in
> subsequent calls to rte_flow_error_set().

Got it, thanks.

> 
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* Not supported */
> > +	if (attr->egress) {
> > +		rte_flow_error_set(error, EINVAL,
> > +				   RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
> > +				   NULL, "Not support egress.");
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* Not supported */
> > +	if (attr->priority) {
> > +		rte_flow_error_set(error, EINVAL,
> > +				   RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
> > +				   NULL, "Not support priority.");
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* Not supported */
> > +	if (attr->group) {
> > +		rte_flow_error_set(error, EINVAL,
> > +				   RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
> > +				   NULL, "Not support group.");
> > +		return -EINVAL;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> > +i40e_parse_ethertype_pattern(const struct rte_flow_item *pattern,
> > +			     struct rte_flow_error *error,
> > +			     struct rte_eth_ethertype_filter *filter) {
> > +	const struct rte_flow_item *item = pattern;
> > +	const struct rte_flow_item_eth *eth_spec;
> > +	const struct rte_flow_item_eth *eth_mask;
> > +	enum rte_flow_item_type item_type;
> > +
> > +	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
> > +		item_type = item->type;
> > +		switch (item_type) {
> > +		case RTE_FLOW_ITEM_TYPE_ETH:
> > +			eth_spec = (const struct rte_flow_item_eth *)item-
> >spec;
> > +			eth_mask = (const struct rte_flow_item_eth *)item-
> >mask;
> > +			/* Get the MAC info. */
> > +			if (!eth_spec || !eth_mask) {
> > +				rte_flow_error_set(error, EINVAL,
> > +
> RTE_FLOW_ERROR_TYPE_ITEM,
> > +						   NULL,
> > +						   "NULL ETH spec/mask");
> > +				return -EINVAL;
> > +			}
> 
> While optional, I think you should allow eth_spec and eth_mask to be NULL
> here as described in [1]:
> 
> - If eth_spec is NULL, you can match anything that looks like a valid
>   Ethernet header.
> 
> - If eth_mask is NULL, you should assume a default mask (for Ethernet it
>   usually means matching source/destination MACs perfectly).
> 
> - You must check the "last" field as well, if non-NULL it may probably be
>   supported as long as the following condition is satisfied:
> 
>    (spec & mask) == (last & mask)
> 
> [1] http://dpdk.org/doc/guides/prog_guide/rte_flow.html#pattern-item
> 

Thanks for the specification. In fact, we don't support the "last" for both ixgbe and i40e currently according to the original design, so we only support perfect match till now. We will support it in the future, as the deadline is coming, what do you think?


> [...]
> > +	const struct rte_flow_action_queue *act_q;
> > +	uint32_t index = 0;
> > +
> > +	/* Check if the first non-void action is QUEUE or DROP. */
> > +	NEXT_ITEM_OF_ACTION(act, actions, index);
> > +	if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE &&
> > +	    act->type != RTE_FLOW_ACTION_TYPE_DROP) {
> > +		rte_flow_error_set(error, EINVAL,
> RTE_FLOW_ERROR_TYPE_ACTION,
> > +				   NULL, "Not supported action.");
> 
> Again, you could report &act instead of NULL here (please check all remaining
> calls to rte_flow_error_set()).
> 
> [...]
> 
> --
> Adrien Mazarguil
> 6WIND

^ permalink raw reply

* Re: [PATCH v2 15/17] net/i40e: add flow flush function
From: Xing, Beilei @ 2016-12-28  8:02 UTC (permalink / raw)
  To: Adrien Mazarguil; +Cc: Wu, Jingjing, Zhang, Helin, dev@dpdk.org
In-Reply-To: <20161227124015.GB3737@6wind.com>



> -----Original Message-----
> From: Adrien Mazarguil [mailto:adrien.mazarguil@6wind.com]
> Sent: Tuesday, December 27, 2016 8:40 PM
> To: Xing, Beilei <beilei.xing@intel.com>
> Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> <helin.zhang@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2 15/17] net/i40e: add flow flush function
> 
> Hi Beilei,
> 
> On Tue, Dec 27, 2016 at 02:26:22PM +0800, Beilei Xing wrote:
> > This patch adds i40e_flow_flush function to flush all filters for
> > users. And flow director flush function is involved first.
> >
> > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > ---
> >  drivers/net/i40e/i40e_ethdev.h |  3 +++
> >  drivers/net/i40e/i40e_fdir.c   |  8 ++------
> >  drivers/net/i40e/i40e_flow.c   | 46
> ++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 51 insertions(+), 6 deletions(-)
> [...]
> > diff --git a/drivers/net/i40e/i40e_flow.c
> > b/drivers/net/i40e/i40e_flow.c
> [...]
> > +static int
> > +i40e_fdir_filter_flush(struct i40e_pf *pf) {
> > +	struct rte_eth_dev *dev = pf->adapter->eth_dev;
> > +	struct i40e_fdir_info *fdir_info = &pf->fdir;
> > +	struct i40e_fdir_filter *fdir_filter;
> > +	struct i40e_flow *flow;
> > +	int ret = 0;
> > +
> > +	ret = i40e_fdir_flush(dev);
> > +	if (!ret) {
> > +		/* Delete FDIR filters in FDIR list. */
> > +		while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list)))
> > +			i40e_sw_fdir_filter_del(pf, fdir_filter);
> > +
> > +		/* Delete FDIR flows in flow list. */
> > +		TAILQ_FOREACH(flow, &pf->flow_list, node) {
> > +			if (flow->filter_type == RTE_ETH_FILTER_FDIR) {
> > +				TAILQ_REMOVE(&pf->flow_list, flow, node);
> > +				rte_free(flow);
> > +			}
> > +		}
> 
> Be careful, I'm not sure calling TAILQ_REMOVE() followed by rte_free()
> inside a TAILQ_FOREACH() is safe. BSD has the _SAFE() variant for this
> purpose but Linux does not.
> 
Yes, thanks for reminder, I'll check it later:)

> > +	}
> > +
> > +	return ret;
> > +}
> 
> --
> Adrien Mazarguil
> 6WIND

^ permalink raw reply

* Re: [PATCH v2 07/17] net/i40e: add flow validate function
From: Xing, Beilei @ 2016-12-28  7:44 UTC (permalink / raw)
  To: Wu, Jingjing, Zhang, Helin; +Cc: dev@dpdk.org
In-Reply-To: <9BB6961774997848B5B42BEC655768F810CC00A6@SHSMSX103.ccr.corp.intel.com>



> -----Original Message-----
> From: Wu, Jingjing
> Sent: Wednesday, December 28, 2016 10:52 AM
> To: Xing, Beilei <beilei.xing@intel.com>; Zhang, Helin
> <helin.zhang@intel.com>
> Cc: dev@dpdk.org
> Subject: RE: [PATCH v2 07/17] net/i40e: add flow validate function
> 
> 
> >
> > +union i40e_filter_t {
> > +	struct rte_eth_ethertype_filter ethertype_filter;
> > +	struct rte_eth_fdir_filter fdir_filter;
> > +	struct rte_eth_tunnel_filter_conf tunnel_filter; } cons_filter;
> > +
> > +typedef int (*parse_filter_t)(struct rte_eth_dev *dev,
> > +			      const struct rte_flow_attr *attr,
> > +			      const struct rte_flow_item pattern[],
> > +			      const struct rte_flow_action actions[],
> > +			      struct rte_flow_error *error,
> > +			      union i40e_filter_t *filter);
> You can use void* instead of define union i40e_filter_t.

I tried the void * before, but I should determine the filter type when creating a flow. If using void*, I can get the filter info but I  don't know which filer type it belongs to.

> 
> > +struct i40e_valid_pattern {
> > +	enum rte_flow_item_type *items;
> What the item points to? Add few comments

It's the pattern without VOID items. I'll add comments in next version.

> > +
> > +	ret = parse_filter(dev, attr, items, actions, error, &cons_filter);
> 
> Will you use cons_filter later? If not, it looks like we don't need the argument
> at all.

Yes, it's used to create flow. We us parse_filter to get the filter info. When creating a flow, flow_validate will be involved first to get filter info, then set filter according to the filter info.

> > +
> > +	rte_free(items);
> > +
> > +	return ret;
> > +}
> > --
> > 2.5.5

^ permalink raw reply

* Re: [PATCH v2 03/17] net/i40e: store flow director filter
From: Tiwei Bie @ 2016-12-28  7:36 UTC (permalink / raw)
  To: Xing, Beilei; +Cc: Wu, Jingjing, Zhang, Helin, dev@dpdk.org
In-Reply-To: <20161228071455.GB85898@dpdk19>

On Wed, Dec 28, 2016 at 03:14:55PM +0800, Tiwei Bie wrote:
> On Wed, Dec 28, 2016 at 03:10:39PM +0800, Xing, Beilei wrote:
> > 
> > 
> > > -----Original Message-----
> > > From: Bie, Tiwei
> > > Sent: Wednesday, December 28, 2016 11:39 AM
> > > To: Xing, Beilei <beilei.xing@intel.com>
> > > Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> > > <helin.zhang@intel.com>; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v2 03/17] net/i40e: store flow director filter
> > > 
> > > On Tue, Dec 27, 2016 at 02:26:10PM +0800, Beilei Xing wrote:
> > > > Currently there's no flow director filter stored in SW. This patch
> > > > stores flow director filters in SW with cuckoo hash, also adds
> > > > protection if a flow director filter has been added.
> > > >
> > > > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > > > ---
> > > >  drivers/net/i40e/i40e_ethdev.c | 48 +++++++++++++++++++++
> > > > drivers/net/i40e/i40e_ethdev.h | 12 ++++++
> > > >  drivers/net/i40e/i40e_fdir.c   | 98
> > > ++++++++++++++++++++++++++++++++++++++++++
> > > >  3 files changed, 158 insertions(+)
> > > >
> > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > b/drivers/net/i40e/i40e_ethdev.c index c012d5d..427ebdc 100644
> > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > [...]
> > > > @@ -1342,6 +1379,17 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
> > > >  		rte_free(p_tunnel);
> > > >  	}
> > > >
> > > > +	/* Remove all flow director rules and hash */
> > > > +	if (fdir_info->hash_map)
> > > > +		rte_free(fdir_info->hash_map);
> > > > +	if (fdir_info->hash_table)
> > > > +		rte_hash_free(fdir_info->hash_table);
> > > > +
> > > > +	while ((p_fdir = TAILQ_FIRST(&fdir_info->fdir_list))) {
> > > 
> > > There is a redundant pair of parentheses, or you should compare with NULL.
> > 
> > I think the another parentheses is used to compare with NULL. In fact there's similar using in PMD.
> > 
> 
> The outer parentheses are redundant unless you compare it with NULL explicitly.
> Any way, you could just follow the existing coding style.
> 

Sorry, I was wrong here. I just did a quick check and noticed that DPDK
has enabled the below option:

-Werror=parentheses

The outer parentheses are NOT redundant even if you don't compare it with
NULL explicitly.

Best regards,
Tiwei Bie

^ permalink raw reply

* Re: [PATCH v2 12/17] net/i40e: destroy ethertype filter
From: Xing, Beilei @ 2016-12-28  7:29 UTC (permalink / raw)
  To: Wu, Jingjing, Zhang, Helin; +Cc: dev@dpdk.org
In-Reply-To: <9BB6961774997848B5B42BEC655768F810CC00FA@SHSMSX103.ccr.corp.intel.com>



> -----Original Message-----
> From: Wu, Jingjing
> Sent: Wednesday, December 28, 2016 11:30 AM
> To: Xing, Beilei <beilei.xing@intel.com>; Zhang, Helin
> <helin.zhang@intel.com>
> Cc: dev@dpdk.org
> Subject: RE: [PATCH v2 12/17] net/i40e: destroy ethertype filter
> 
> >
> >  const struct rte_flow_ops i40e_flow_ops = {
> >  	.validate = i40e_flow_validate,
> > @@ -1492,11 +1495,16 @@ i40e_flow_destroy(__rte_unused struct
> > rte_eth_dev *dev,
> >  		  struct rte_flow *flow,
> >  		  struct rte_flow_error *error)
> >  {
> > +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-
> > >dev_private);
> >  	struct i40e_flow *pmd_flow = (struct i40e_flow *)flow;
> >  	enum rte_filter_type filter_type = pmd_flow->filter_type;
> >  	int ret;
> >
> >  	switch (filter_type) {
> > +	case RTE_ETH_FILTER_ETHERTYPE:
> > +		ret = i40e_dev_destroy_ethertype_filter(pf,
> > +				(struct i40e_ethertype_filter *)pmd_flow-
> >rule);
> > +		break;
> >  	default:
> >  		PMD_DRV_LOG(WARNING, "Filter type (%d) not supported",
> >  			    filter_type);
> > @@ -1504,10 +1512,49 @@ i40e_flow_destroy(__rte_unused struct
> > rte_eth_dev *dev,
> >  		break;
> >  	}
> >
> > -	if (ret)
> > +	if (!ret) {
> > +		TAILQ_REMOVE(&pf->flow_list, pmd_flow, node);
> > +		free(pmd_flow);
> Should it be freed inside the function? Is the API definition like that?

Yes, since API or rte won't free the flow allocated by PMD. Please refer to the attached mail.

^ permalink raw reply

* Re: [PATCH v2 15/17] net/i40e: add flow flush function
From: Xing, Beilei @ 2016-12-28  7:20 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: Wu, Jingjing, Zhang, Helin, dev@dpdk.org
In-Reply-To: <20161228070035.GA85898@dpdk19>



> -----Original Message-----
> From: Bie, Tiwei
> Sent: Wednesday, December 28, 2016 3:01 PM
> To: Xing, Beilei <beilei.xing@intel.com>
> Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> <helin.zhang@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2 15/17] net/i40e: add flow flush function
> 
> On Wed, Dec 28, 2016 at 02:48:02PM +0800, Xing, Beilei wrote:
> > > -----Original Message-----
> > > From: Bie, Tiwei
> > > Sent: Wednesday, December 28, 2016 1:36 PM
> > > To: Xing, Beilei <beilei.xing@intel.com>
> > > Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> > > <helin.zhang@intel.com>; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v2 15/17] net/i40e: add flow flush
> > > function
> > >
> > > On Tue, Dec 27, 2016 at 02:26:22PM +0800, Beilei Xing wrote:
> > > > This patch adds i40e_flow_flush function to flush all filters for
> > > > users. And flow director flush function is involved first.
> > > >
> > > > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > > > ---
> > > >  drivers/net/i40e/i40e_ethdev.h |  3 +++
> > > >  drivers/net/i40e/i40e_fdir.c   |  8 ++------
> > > >  drivers/net/i40e/i40e_flow.c   | 46
> > > ++++++++++++++++++++++++++++++++++++++++++
> > > >  3 files changed, 51 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > b/drivers/net/i40e/i40e_ethdev.h index b8c7d41..0b736d5 100644
> > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > @@ -786,6 +786,9 @@ i40e_sw_tunnel_filter_lookup(struct
> > > i40e_tunnel_rule *tunnel_rule,
> > > >  			     const struct i40e_tunnel_filter_input *input);  int
> > > > i40e_sw_tunnel_filter_del(struct i40e_pf *pf,
> > > >  			      struct i40e_tunnel_filter *tunnel_filter);
> > > > +int i40e_sw_fdir_filter_del(struct i40e_pf *pf,
> > > > +			    struct i40e_fdir_filter *filter); int
> > > i40e_fdir_flush(struct
> > > > +rte_eth_dev *dev);
> > > >
> > >
> > > Why don't declare them as the global functions at the beginning?
> >
> > When I implement the store/restore function, I plan this function is only
> used in i40e_ethdev.c.
> > I change them to the global functions since I add i40e_flow.c to rework all
> the flow ops.
> >
> 
> These functions are also introduced in this patch set. There is no particular
> reason to mark them as static at first and then turn them into the global
> functions in the later patches. So it would be better to declare them as the
> global ones when introducing them.

Yes, make sense. Will update in next version.

> 
> Best regards,
> Tiwei Bie

^ permalink raw reply

* Re: [PATCH v2 03/17] net/i40e: store flow director filter
From: Tiwei Bie @ 2016-12-28  7:14 UTC (permalink / raw)
  To: Xing, Beilei; +Cc: Wu, Jingjing, Zhang, Helin, dev@dpdk.org
In-Reply-To: <94479800C636CB44BD422CB454846E013158BFAF@SHSMSX101.ccr.corp.intel.com>

On Wed, Dec 28, 2016 at 03:10:39PM +0800, Xing, Beilei wrote:
> 
> 
> > -----Original Message-----
> > From: Bie, Tiwei
> > Sent: Wednesday, December 28, 2016 11:39 AM
> > To: Xing, Beilei <beilei.xing@intel.com>
> > Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> > <helin.zhang@intel.com>; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v2 03/17] net/i40e: store flow director filter
> > 
> > On Tue, Dec 27, 2016 at 02:26:10PM +0800, Beilei Xing wrote:
> > > Currently there's no flow director filter stored in SW. This patch
> > > stores flow director filters in SW with cuckoo hash, also adds
> > > protection if a flow director filter has been added.
> > >
> > > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > > ---
> > >  drivers/net/i40e/i40e_ethdev.c | 48 +++++++++++++++++++++
> > > drivers/net/i40e/i40e_ethdev.h | 12 ++++++
> > >  drivers/net/i40e/i40e_fdir.c   | 98
> > ++++++++++++++++++++++++++++++++++++++++++
> > >  3 files changed, 158 insertions(+)
> > >
> > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > b/drivers/net/i40e/i40e_ethdev.c index c012d5d..427ebdc 100644
> > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > [...]
> > > @@ -1342,6 +1379,17 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
> > >  		rte_free(p_tunnel);
> > >  	}
> > >
> > > +	/* Remove all flow director rules and hash */
> > > +	if (fdir_info->hash_map)
> > > +		rte_free(fdir_info->hash_map);
> > > +	if (fdir_info->hash_table)
> > > +		rte_hash_free(fdir_info->hash_table);
> > > +
> > > +	while ((p_fdir = TAILQ_FIRST(&fdir_info->fdir_list))) {
> > 
> > There is a redundant pair of parentheses, or you should compare with NULL.
> 
> I think the another parentheses is used to compare with NULL. In fact there's similar using in PMD.
> 

The outer parentheses are redundant unless you compare it with NULL explicitly.
Any way, you could just follow the existing coding style.

Best regards,
Tiwei Bie

^ permalink raw reply

* Re: [PATCH v2 03/17] net/i40e: store flow director filter
From: Xing, Beilei @ 2016-12-28  7:10 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: Wu, Jingjing, Zhang, Helin, dev@dpdk.org
In-Reply-To: <20161228033847.GB13841@dpdk19>



> -----Original Message-----
> From: Bie, Tiwei
> Sent: Wednesday, December 28, 2016 11:39 AM
> To: Xing, Beilei <beilei.xing@intel.com>
> Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> <helin.zhang@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2 03/17] net/i40e: store flow director filter
> 
> On Tue, Dec 27, 2016 at 02:26:10PM +0800, Beilei Xing wrote:
> > Currently there's no flow director filter stored in SW. This patch
> > stores flow director filters in SW with cuckoo hash, also adds
> > protection if a flow director filter has been added.
> >
> > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > ---
> >  drivers/net/i40e/i40e_ethdev.c | 48 +++++++++++++++++++++
> > drivers/net/i40e/i40e_ethdev.h | 12 ++++++
> >  drivers/net/i40e/i40e_fdir.c   | 98
> ++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 158 insertions(+)
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index c012d5d..427ebdc 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> [...]
> > @@ -1342,6 +1379,17 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
> >  		rte_free(p_tunnel);
> >  	}
> >
> > +	/* Remove all flow director rules and hash */
> > +	if (fdir_info->hash_map)
> > +		rte_free(fdir_info->hash_map);
> > +	if (fdir_info->hash_table)
> > +		rte_hash_free(fdir_info->hash_table);
> > +
> > +	while ((p_fdir = TAILQ_FIRST(&fdir_info->fdir_list))) {
> 
> There is a redundant pair of parentheses, or you should compare with NULL.

I think the another parentheses is used to compare with NULL. In fact there's similar using in PMD.

> 
> > +		TAILQ_REMOVE(&fdir_info->fdir_list, p_fdir, rules);
> > +		rte_free(p_fdir);
> > +	}
> > +
> >  	dev->dev_ops = NULL;
> >  	dev->rx_pkt_burst = NULL;
> >  	dev->tx_pkt_burst = NULL;
> [...]
> > diff --git a/drivers/net/i40e/i40e_fdir.c
> > b/drivers/net/i40e/i40e_fdir.c index 335bf15..faa2495 100644
> > --- a/drivers/net/i40e/i40e_fdir.c
> > +++ b/drivers/net/i40e/i40e_fdir.c
> [...]
> > +/* Check if there exists the flow director filter */ static struct
> > +i40e_fdir_filter * i40e_sw_fdir_filter_lookup(struct i40e_fdir_info
> > +*fdir_info,
> > +			const struct rte_eth_fdir_input *input) {
> > +	int ret = 0;
> > +
> 
> The initialization is meaningless, as it will be written by the below
> assignment unconditionally.

Yes, you're right.

> 
> > +	ret = rte_hash_lookup(fdir_info->hash_table, (const void *)input);
> > +	if (ret < 0)
> > +		return NULL;
> > +
> > +	return fdir_info->hash_map[ret];
> > +}
> > +
> > +/* Add a flow director filter into the SW list */
> > +static int
> > +i40e_sw_fdir_filter_insert(struct i40e_pf *pf, struct i40e_fdir_filter *filter)
> > +{
> > +	struct i40e_fdir_info *fdir_info = &pf->fdir;
> > +	int ret = 0;
> > +
> 
> Same here.
> 
> > +	ret = rte_hash_add_key(fdir_info->hash_table,
> > +			       &filter->fdir.input);
> > +	if (ret < 0)
> > +		PMD_DRV_LOG(ERR,
> > +			    "Failed to insert fdir filter to hash table %d!",
> > +			    ret);
> 
> Function should return when ret < 0.

Thanks for catching it.

> 
> > +	fdir_info->hash_map[ret] = filter;
> > +
> > +	TAILQ_INSERT_TAIL(&fdir_info->fdir_list, filter, rules);
> > +
> > +	return 0;
> > +}
> > +
> > +/* Delete a flow director filter from the SW list */
> > +static int
> > +i40e_sw_fdir_filter_del(struct i40e_pf *pf, struct i40e_fdir_filter *filter)
> > +{
> > +	struct i40e_fdir_info *fdir_info = &pf->fdir;
> > +	int ret = 0;
> > +
> 
> Same here.
> 
> > +	ret = rte_hash_del_key(fdir_info->hash_table,
> > +			       &filter->fdir.input);
> > +	if (ret < 0)
> > +		PMD_DRV_LOG(ERR,
> > +			    "Failed to delete fdir filter to hash table %d!",
> > +			    ret);
> 
> Function should return when ret < 0.
> 
> > +	fdir_info->hash_map[ret] = NULL;
> > +
> > +	TAILQ_REMOVE(&fdir_info->fdir_list, filter, rules);
> > +	rte_free(filter);
> > +
> > +	return 0;
> > +}
> > +
> >  /*
> >   * i40e_add_del_fdir_filter - add or remove a flow director filter.
> >   * @pf: board private structure
> > @@ -1032,6 +1105,8 @@ i40e_add_del_fdir_filter(struct rte_eth_dev *dev,
> >  	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-
> >dev_private);
> >  	unsigned char *pkt = (unsigned char *)pf->fdir.prg_pkt;
> >  	enum i40e_filter_pctype pctype;
> > +	struct i40e_fdir_info *fdir_info = &pf->fdir;
> > +	struct i40e_fdir_filter *fdir_filter, *node;
> >  	int ret = 0;
> >
> >  	if (dev->data->dev_conf.fdir_conf.mode !=
> RTE_FDIR_MODE_PERFECT) {
> > @@ -1054,6 +1129,21 @@ i40e_add_del_fdir_filter(struct rte_eth_dev
> *dev,
> >  		return -EINVAL;
> >  	}
> >
> > +	fdir_filter = rte_zmalloc("fdir_filter", sizeof(*fdir_filter), 0);
> > +	i40e_fdir_filter_convert(filter, fdir_filter);
> > +	node = i40e_sw_fdir_filter_lookup(fdir_info, &fdir_filter->fdir.input);
> > +	if (add && node) {
> > +		PMD_DRV_LOG(ERR,
> > +			    "Conflict with existing flow director rules!");
> > +		rte_free(fdir_filter);
> > +		return -EINVAL;
> > +	} else if (!add && !node) {
> 
> When `if (add && node)' is true, function will return. There is no need
> to use `else' here.
> 
> Best regards,
> Tiwei Bie

^ permalink raw reply

* Re: [PATCH v2 15/17] net/i40e: add flow flush function
From: Tiwei Bie @ 2016-12-28  7:00 UTC (permalink / raw)
  To: Xing, Beilei; +Cc: Wu, Jingjing, Zhang, Helin, dev@dpdk.org
In-Reply-To: <94479800C636CB44BD422CB454846E013158BF48@SHSMSX101.ccr.corp.intel.com>

On Wed, Dec 28, 2016 at 02:48:02PM +0800, Xing, Beilei wrote:
> > -----Original Message-----
> > From: Bie, Tiwei
> > Sent: Wednesday, December 28, 2016 1:36 PM
> > To: Xing, Beilei <beilei.xing@intel.com>
> > Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> > <helin.zhang@intel.com>; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v2 15/17] net/i40e: add flow flush function
> > 
> > On Tue, Dec 27, 2016 at 02:26:22PM +0800, Beilei Xing wrote:
> > > This patch adds i40e_flow_flush function to flush all filters for
> > > users. And flow director flush function is involved first.
> > >
> > > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > > ---
> > >  drivers/net/i40e/i40e_ethdev.h |  3 +++
> > >  drivers/net/i40e/i40e_fdir.c   |  8 ++------
> > >  drivers/net/i40e/i40e_flow.c   | 46
> > ++++++++++++++++++++++++++++++++++++++++++
> > >  3 files changed, 51 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > b/drivers/net/i40e/i40e_ethdev.h index b8c7d41..0b736d5 100644
> > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > @@ -786,6 +786,9 @@ i40e_sw_tunnel_filter_lookup(struct
> > i40e_tunnel_rule *tunnel_rule,
> > >  			     const struct i40e_tunnel_filter_input *input);  int
> > > i40e_sw_tunnel_filter_del(struct i40e_pf *pf,
> > >  			      struct i40e_tunnel_filter *tunnel_filter);
> > > +int i40e_sw_fdir_filter_del(struct i40e_pf *pf,
> > > +			    struct i40e_fdir_filter *filter); int
> > i40e_fdir_flush(struct
> > > +rte_eth_dev *dev);
> > >
> > 
> > Why don't declare them as the global functions at the beginning?
> 
> When I implement the store/restore function, I plan this function is only used in i40e_ethdev.c.
> I change them to the global functions since I add i40e_flow.c to rework all  the flow ops.
> 

These functions are also introduced in this patch set. There is no
particular reason to mark them as static at first and then turn them
into the global functions in the later patches. So it would be better
to declare them as the global ones when introducing them.

Best regards,
Tiwei Bie

^ permalink raw reply

* Re: [PATCH v2 12/17] net/i40e: destroy ethertype filter
From: Xing, Beilei @ 2016-12-28  6:57 UTC (permalink / raw)
  To: Bie, Tiwei; +Cc: Wu, Jingjing, Zhang, Helin, dev@dpdk.org
In-Reply-To: <20161228045616.GA28245@dpdk19>



> -----Original Message-----
> From: Bie, Tiwei
> Sent: Wednesday, December 28, 2016 12:56 PM
> To: Xing, Beilei <beilei.xing@intel.com>
> Cc: Wu, Jingjing <jingjing.wu@intel.com>; Zhang, Helin
> <helin.zhang@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2 12/17] net/i40e: destroy ethertype filter
> 
> On Tue, Dec 27, 2016 at 02:26:19PM +0800, Beilei Xing wrote:
> > This patch adds i40e_dev_destroy_ethertype_filter function to destroy
> > a ethertype filter for users.
> >
> > Signed-off-by: Beilei Xing <beilei.xing@intel.com>
> > ---
> >  drivers/net/i40e/i40e_ethdev.c | 10 ++-------
> > drivers/net/i40e/i40e_ethdev.h |  5 +++++
> >  drivers/net/i40e/i40e_flow.c   | 51
> ++++++++++++++++++++++++++++++++++++++++--
> >  3 files changed, 56 insertions(+), 10 deletions(-)
> >
> [...]
> > diff --git a/drivers/net/i40e/i40e_flow.c
> > b/drivers/net/i40e/i40e_flow.c index 2a61c4f..732c411 100644
> > --- a/drivers/net/i40e/i40e_flow.c
> > +++ b/drivers/net/i40e/i40e_flow.c
> [...]
> > @@ -1492,11 +1495,16 @@ i40e_flow_destroy(__rte_unused struct
> > rte_eth_dev *dev,
> 
> The `__rte_unused' qualifier should be removed.

Yes :)

> 
> >  		  struct rte_flow *flow,
> >  		  struct rte_flow_error *error)
> >  {
> > +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-
> >dev_private);
> >  	struct i40e_flow *pmd_flow = (struct i40e_flow *)flow;
> >  	enum rte_filter_type filter_type = pmd_flow->filter_type;
> >  	int ret;
> >
> >  	switch (filter_type) {
> > +	case RTE_ETH_FILTER_ETHERTYPE:
> > +		ret = i40e_dev_destroy_ethertype_filter(pf,
> > +				(struct i40e_ethertype_filter *)pmd_flow-
> >rule);
> > +		break;
> >  	default:
> >  		PMD_DRV_LOG(WARNING, "Filter type (%d) not supported",
> >  			    filter_type);
> > @@ -1504,10 +1512,49 @@ i40e_flow_destroy(__rte_unused struct
> rte_eth_dev *dev,
> >  		break;
> >  	}
> >
> > -	if (ret)
> > +	if (!ret) {
> > +		TAILQ_REMOVE(&pf->flow_list, pmd_flow, node);
> > +		free(pmd_flow);
> > +	} else {
> >  		rte_flow_error_set(error, EINVAL,
> >  				   RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
> >  				   "Failed to destroy flow.");
> > +	}
> 
> Probably you should introduce the pf related code when introducing
> i40e_flow_destroy() in the below patch:
> 
> [PATCH v2 11/17] net/i40e: add flow destroy function

Good point, thanks.

> 
> > +
> > +	return ret;
> > +}
> > +
> > +static int
> > +i40e_dev_destroy_ethertype_filter(struct i40e_pf *pf,
> > +				  struct i40e_ethertype_filter *filter) {
> > +	struct i40e_hw *hw = I40E_PF_TO_HW(pf);
> > +	struct i40e_ethertype_rule *ethertype_rule = &pf->ethertype;
> > +	struct i40e_ethertype_filter *node;
> > +	struct i40e_control_filter_stats stats;
> > +	uint16_t flags = 0;
> > +	int ret = 0;
> > +
> > +	if (!(filter->flags & RTE_ETHTYPE_FLAGS_MAC))
> > +		flags |=
> I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC;
> > +	if (filter->flags & RTE_ETHTYPE_FLAGS_DROP)
> > +		flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP;
> > +	flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TO_QUEUE;
> > +
> > +	memset(&stats, 0, sizeof(stats));
> > +	ret = i40e_aq_add_rem_control_packet_filter(hw,
> > +					    filter->input.mac_addr.addr_bytes,
> > +					    filter->input.ether_type,
> > +					    flags, pf->main_vsi->seid,
> > +					    filter->queue, 0, &stats, NULL);
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	node = i40e_sw_ethertype_filter_lookup(ethertype_rule, &filter-
> >input);
> > +	if (node)
> > +		ret = i40e_sw_ethertype_filter_del(pf, node);
> > +	else
> > +		return -EINVAL;
> 
> It would be more readable to check whether node equals NULL and return
> when it's true, and call i40e_sw_ethertype_filter_del(pf, node) outside the
> `if' statement:
> 
> 	node = i40e_sw_ethertype_filter_lookup(ethertype_rule, &filter-
> >input);
> 	if (node == NULL)
> 		return -EINVAL;
> 
> 	ret = i40e_sw_ethertype_filter_del(pf, node);

Make sense, got it:)

> 
> Best regards,
> Tiwei Bie

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox