DPDK-dev Archive on lore.kernel.org

DPDK-dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 06/25] app/testpmd: implement basic support for rte_flow
From: Adrien Mazarguil @ 2016-12-19 17:48 UTC (permalink / raw)
  To: dev
In-Reply-To: <cover.1482168851.git.adrien.mazarguil@6wind.com>

Add basic management functions for the generic flow API (validate, create,
destroy, flush, query and list). Flow rule objects and properties are
arranged in lists associated with each port.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Olga Shern <olgas@mellanox.com>
---
 app/test-pmd/cmdline.c     |   1 +
 app/test-pmd/config.c      | 498 ++++++++++++++++++++++++++++++++++++++++
 app/test-pmd/csumonly.c    |   1 +
 app/test-pmd/flowgen.c     |   1 +
 app/test-pmd/icmpecho.c    |   1 +
 app/test-pmd/ieee1588fwd.c |   1 +
 app/test-pmd/iofwd.c       |   1 +
 app/test-pmd/macfwd.c      |   1 +
 app/test-pmd/macswap.c     |   1 +
 app/test-pmd/parameters.c  |   1 +
 app/test-pmd/rxonly.c      |   1 +
 app/test-pmd/testpmd.c     |   6 +
 app/test-pmd/testpmd.h     |  27 +++
 app/test-pmd/txonly.c      |   1 +
 14 files changed, 542 insertions(+)

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index d03a592..5d1c0dd 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -75,6 +75,7 @@
 #include <rte_string_fns.h>
 #include <rte_devargs.h>
 #include <rte_eth_ctrl.h>
+#include <rte_flow.h>
 
 #include <cmdline_rdline.h>
 #include <cmdline_parse.h>
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 8cf537d..9716ce7 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -92,6 +92,8 @@
 #include <rte_ethdev.h>
 #include <rte_string_fns.h>
 #include <rte_cycles.h>
+#include <rte_flow.h>
+#include <rte_errno.h>
 
 #include "testpmd.h"
 
@@ -751,6 +753,502 @@ port_mtu_set(portid_t port_id, uint16_t mtu)
 	printf("Set MTU failed. diag=%d\n", diag);
 }
 
+/* Generic flow management functions. */
+
+/** Generate flow_item[] entry. */
+#define MK_FLOW_ITEM(t, s) \
+	[RTE_FLOW_ITEM_TYPE_ ## t] = { \
+		.name = # t, \
+		.size = s, \
+	}
+
+/** Information about known flow pattern items. */
+static const struct {
+	const char *name;
+	size_t size;
+} flow_item[] = {
+	MK_FLOW_ITEM(END, 0),
+	MK_FLOW_ITEM(VOID, 0),
+	MK_FLOW_ITEM(INVERT, 0),
+	MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)),
+	MK_FLOW_ITEM(PF, 0),
+	MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)),
+	MK_FLOW_ITEM(PORT, sizeof(struct rte_flow_item_port)),
+	MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)), /* +pattern[] */
+	MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)),
+	MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)),
+	MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)),
+	MK_FLOW_ITEM(IPV6, sizeof(struct rte_flow_item_ipv6)),
+	MK_FLOW_ITEM(ICMP, sizeof(struct rte_flow_item_icmp)),
+	MK_FLOW_ITEM(UDP, sizeof(struct rte_flow_item_udp)),
+	MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)),
+	MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)),
+	MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)),
+};
+
+/** Compute storage space needed by item specification. */
+static void
+flow_item_spec_size(const struct rte_flow_item *item,
+		    size_t *size, size_t *pad)
+{
+	if (!item->spec)
+		goto empty;
+	switch (item->type) {
+		union {
+			const struct rte_flow_item_raw *raw;
+		} spec;
+
+	case RTE_FLOW_ITEM_TYPE_RAW:
+		spec.raw = item->spec;
+		*size = offsetof(struct rte_flow_item_raw, pattern) +
+			spec.raw->length * sizeof(*spec.raw->pattern);
+		break;
+	default:
+empty:
+		*size = 0;
+		break;
+	}
+	*pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
+}
+
+/** Generate flow_action[] entry. */
+#define MK_FLOW_ACTION(t, s) \
+	[RTE_FLOW_ACTION_TYPE_ ## t] = { \
+		.name = # t, \
+		.size = s, \
+	}
+
+/** Information about known flow actions. */
+static const struct {
+	const char *name;
+	size_t size;
+} flow_action[] = {
+	MK_FLOW_ACTION(END, 0),
+	MK_FLOW_ACTION(VOID, 0),
+	MK_FLOW_ACTION(PASSTHRU, 0),
+	MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)),
+	MK_FLOW_ACTION(FLAG, 0),
+	MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
+	MK_FLOW_ACTION(DROP, 0),
+	MK_FLOW_ACTION(COUNT, 0),
+	MK_FLOW_ACTION(DUP, sizeof(struct rte_flow_action_dup)),
+	MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), /* +queue[] */
+	MK_FLOW_ACTION(PF, 0),
+	MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
+};
+
+/** Compute storage space needed by action configuration. */
+static void
+flow_action_conf_size(const struct rte_flow_action *action,
+		      size_t *size, size_t *pad)
+{
+	if (!action->conf)
+		goto empty;
+	switch (action->type) {
+		union {
+			const struct rte_flow_action_rss *rss;
+		} conf;
+
+	case RTE_FLOW_ACTION_TYPE_RSS:
+		conf.rss = action->conf;
+		*size = offsetof(struct rte_flow_action_rss, queue) +
+			conf.rss->num * sizeof(*conf.rss->queue);
+		break;
+	default:
+empty:
+		*size = 0;
+		break;
+	}
+	*pad = RTE_ALIGN_CEIL(*size, sizeof(double)) - *size;
+}
+
+/** Generate a port_flow entry from attributes/pattern/actions. */
+static struct port_flow *
+port_flow_new(const struct rte_flow_attr *attr,
+	      const struct rte_flow_item *pattern,
+	      const struct rte_flow_action *actions)
+{
+	const struct rte_flow_item *item;
+	const struct rte_flow_action *action;
+	struct port_flow *pf = NULL;
+	size_t tmp;
+	size_t pad;
+	size_t off1 = 0;
+	size_t off2 = 0;
+	int err = ENOTSUP;
+
+store:
+	item = pattern;
+	if (pf)
+		pf->pattern = (void *)&pf->data[off1];
+	do {
+		struct rte_flow_item *dst = NULL;
+
+		if ((unsigned int)item->type > RTE_DIM(flow_item) ||
+		    !flow_item[item->type].name)
+			goto notsup;
+		if (pf)
+			dst = memcpy(pf->data + off1, item, sizeof(*item));
+		off1 += sizeof(*item);
+		flow_item_spec_size(item, &tmp, &pad);
+		if (item->spec) {
+			if (pf)
+				dst->spec = memcpy(pf->data + off2,
+						   item->spec, tmp);
+			off2 += tmp + pad;
+		}
+		if (item->last) {
+			if (pf)
+				dst->last = memcpy(pf->data + off2,
+						   item->last, tmp);
+			off2 += tmp + pad;
+		}
+		if (item->mask) {
+			if (pf)
+				dst->mask = memcpy(pf->data + off2,
+						   item->mask, tmp);
+			off2 += tmp + pad;
+		}
+		off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
+	} while ((item++)->type != RTE_FLOW_ITEM_TYPE_END);
+	off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
+	action = actions;
+	if (pf)
+		pf->actions = (void *)&pf->data[off1];
+	do {
+		struct rte_flow_action *dst = NULL;
+
+		if ((unsigned int)action->type > RTE_DIM(flow_action) ||
+		    !flow_action[action->type].name)
+			goto notsup;
+		if (pf)
+			dst = memcpy(pf->data + off1, action, sizeof(*action));
+		off1 += sizeof(*action);
+		flow_action_conf_size(action, &tmp, &pad);
+		if (action->conf) {
+			if (pf)
+				dst->conf = memcpy(pf->data + off2,
+						   action->conf, tmp);
+			off2 += tmp + pad;
+		}
+		off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
+	} while ((action++)->type != RTE_FLOW_ACTION_TYPE_END);
+	if (pf != NULL)
+		return pf;
+	off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
+	tmp = RTE_ALIGN_CEIL(offsetof(struct port_flow, data), sizeof(double));
+	pf = calloc(1, tmp + off1 + off2);
+	if (pf == NULL)
+		err = errno;
+	else {
+		*pf = (const struct port_flow){
+			.size = tmp + off1 + off2,
+			.attr = *attr,
+		};
+		tmp -= offsetof(struct port_flow, data);
+		off2 = tmp + off1;
+		off1 = tmp;
+		goto store;
+	}
+notsup:
+	rte_errno = err;
+	return NULL;
+}
+
+/** Print a message out of a flow error. */
+static int
+port_flow_complain(struct rte_flow_error *error)
+{
+	static const char *const errstrlist[] = {
+		[RTE_FLOW_ERROR_TYPE_NONE] = "no error",
+		[RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
+		[RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
+		[RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
+		[RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
+		[RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
+		[RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
+		[RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
+		[RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
+		[RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
+		[RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
+		[RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
+	};
+	const char *errstr;
+	char buf[32];
+	int err = rte_errno;
+
+	if ((unsigned int)error->type > RTE_DIM(errstrlist) ||
+	    !errstrlist[error->type])
+		errstr = "unknown type";
+	else
+		errstr = errstrlist[error->type];
+	printf("Caught error type %d (%s): %s%s\n",
+	       error->type, errstr,
+	       error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
+					error->cause), buf) : "",
+	       error->message ? error->message : "(no stated reason)");
+	return -err;
+}
+
+/** Validate flow rule. */
+int
+port_flow_validate(portid_t port_id,
+		   const struct rte_flow_attr *attr,
+		   const struct rte_flow_item *pattern,
+		   const struct rte_flow_action *actions)
+{
+	struct rte_flow_error error;
+
+	/* Poisoning to make sure PMDs update it in case of error. */
+	memset(&error, 0x11, sizeof(error));
+	if (rte_flow_validate(port_id, attr, pattern, actions, &error))
+		return port_flow_complain(&error);
+	printf("Flow rule validated\n");
+	return 0;
+}
+
+/** Create flow rule. */
+int
+port_flow_create(portid_t port_id,
+		 const struct rte_flow_attr *attr,
+		 const struct rte_flow_item *pattern,
+		 const struct rte_flow_action *actions)
+{
+	struct rte_flow *flow;
+	struct rte_port *port;
+	struct port_flow *pf;
+	uint32_t id;
+	struct rte_flow_error error;
+
+	/* Poisoning to make sure PMDs update it in case of error. */
+	memset(&error, 0x22, sizeof(error));
+	flow = rte_flow_create(port_id, attr, pattern, actions, &error);
+	if (!flow)
+		return port_flow_complain(&error);
+	port = &ports[port_id];
+	if (port->flow_list) {
+		if (port->flow_list->id == UINT32_MAX) {
+			printf("Highest rule ID is already assigned, delete"
+			       " it first");
+			rte_flow_destroy(port_id, flow, NULL);
+			return -ENOMEM;
+		}
+		id = port->flow_list->id + 1;
+	} else
+		id = 0;
+	pf = port_flow_new(attr, pattern, actions);
+	if (!pf) {
+		int err = rte_errno;
+
+		printf("Cannot allocate flow: %s\n", rte_strerror(err));
+		rte_flow_destroy(port_id, flow, NULL);
+		return -err;
+	}
+	pf->next = port->flow_list;
+	pf->id = id;
+	pf->flow = flow;
+	port->flow_list = pf;
+	printf("Flow rule #%u created\n", pf->id);
+	return 0;
+}
+
+/** Destroy a number of flow rules. */
+int
+port_flow_destroy(portid_t port_id, uint32_t n, const uint32_t *rule)
+{
+	struct rte_port *port;
+	struct port_flow **tmp;
+	uint32_t c = 0;
+	int ret = 0;
+
+	if (port_id_is_invalid(port_id, ENABLED_WARN) ||
+	    port_id == (portid_t)RTE_PORT_ALL)
+		return -EINVAL;
+	port = &ports[port_id];
+	tmp = &port->flow_list;
+	while (*tmp) {
+		uint32_t i;
+
+		for (i = 0; i != n; ++i) {
+			struct rte_flow_error error;
+			struct port_flow *pf = *tmp;
+
+			if (rule[i] != pf->id)
+				continue;
+			/*
+			 * Poisoning to make sure PMDs update it in case
+			 * of error.
+			 */
+			memset(&error, 0x33, sizeof(error));
+			if (rte_flow_destroy(port_id, pf->flow, &error)) {
+				ret = port_flow_complain(&error);
+				continue;
+			}
+			printf("Flow rule #%u destroyed\n", pf->id);
+			*tmp = pf->next;
+			free(pf);
+			break;
+		}
+		if (i == n)
+			tmp = &(*tmp)->next;
+		++c;
+	}
+	return ret;
+}
+
+/** Remove all flow rules. */
+int
+port_flow_flush(portid_t port_id)
+{
+	struct rte_flow_error error;
+	struct rte_port *port;
+	int ret = 0;
+
+	/* Poisoning to make sure PMDs update it in case of error. */
+	memset(&error, 0x44, sizeof(error));
+	if (rte_flow_flush(port_id, &error)) {
+		ret = port_flow_complain(&error);
+		if (port_id_is_invalid(port_id, DISABLED_WARN) ||
+		    port_id == (portid_t)RTE_PORT_ALL)
+			return ret;
+	}
+	port = &ports[port_id];
+	while (port->flow_list) {
+		struct port_flow *pf = port->flow_list->next;
+
+		free(port->flow_list);
+		port->flow_list = pf;
+	}
+	return ret;
+}
+
+/** Query a flow rule. */
+int
+port_flow_query(portid_t port_id, uint32_t rule,
+		enum rte_flow_action_type action)
+{
+	struct rte_flow_error error;
+	struct rte_port *port;
+	struct port_flow *pf;
+	const char *name;
+	union {
+		struct rte_flow_query_count count;
+	} query;
+
+	if (port_id_is_invalid(port_id, ENABLED_WARN) ||
+	    port_id == (portid_t)RTE_PORT_ALL)
+		return -EINVAL;
+	port = &ports[port_id];
+	for (pf = port->flow_list; pf; pf = pf->next)
+		if (pf->id == rule)
+			break;
+	if (!pf) {
+		printf("Flow rule #%u not found\n", rule);
+		return -ENOENT;
+	}
+	if ((unsigned int)action > RTE_DIM(flow_action) ||
+	    !flow_action[action].name)
+		name = "unknown";
+	else
+		name = flow_action[action].name;
+	switch (action) {
+	case RTE_FLOW_ACTION_TYPE_COUNT:
+		break;
+	default:
+		printf("Cannot query action type %d (%s)\n", action, name);
+		return -ENOTSUP;
+	}
+	/* Poisoning to make sure PMDs update it in case of error. */
+	memset(&error, 0x55, sizeof(error));
+	memset(&query, 0, sizeof(query));
+	if (rte_flow_query(port_id, pf->flow, action, &query, &error))
+		return port_flow_complain(&error);
+	switch (action) {
+	case RTE_FLOW_ACTION_TYPE_COUNT:
+		printf("%s:\n"
+		       " hits_set: %u\n"
+		       " bytes_set: %u\n"
+		       " hits: %" PRIu64 "\n"
+		       " bytes: %" PRIu64 "\n",
+		       name,
+		       query.count.hits_set,
+		       query.count.bytes_set,
+		       query.count.hits,
+		       query.count.bytes);
+		break;
+	default:
+		printf("Cannot display result for action type %d (%s)\n",
+		       action, name);
+		break;
+	}
+	return 0;
+}
+
+/** List flow rules. */
+void
+port_flow_list(portid_t port_id, uint32_t n, const uint32_t group[n])
+{
+	struct rte_port *port;
+	struct port_flow *pf;
+	struct port_flow *list = NULL;
+	uint32_t i;
+
+	if (port_id_is_invalid(port_id, ENABLED_WARN) ||
+	    port_id == (portid_t)RTE_PORT_ALL)
+		return;
+	port = &ports[port_id];
+	if (!port->flow_list)
+		return;
+	/* Sort flows by group, priority and ID. */
+	for (pf = port->flow_list; pf != NULL; pf = pf->next) {
+		struct port_flow **tmp;
+
+		if (n) {
+			/* Filter out unwanted groups. */
+			for (i = 0; i != n; ++i)
+				if (pf->attr.group == group[i])
+					break;
+			if (i == n)
+				continue;
+		}
+		tmp = &list;
+		while (*tmp &&
+		       (pf->attr.group > (*tmp)->attr.group ||
+			(pf->attr.group == (*tmp)->attr.group &&
+			 pf->attr.priority > (*tmp)->attr.priority) ||
+			(pf->attr.group == (*tmp)->attr.group &&
+			 pf->attr.priority == (*tmp)->attr.priority &&
+			 pf->id > (*tmp)->id)))
+			tmp = &(*tmp)->tmp;
+		pf->tmp = *tmp;
+		*tmp = pf;
+	}
+	printf("ID\tGroup\tPrio\tAttr\tRule\n");
+	for (pf = list; pf != NULL; pf = pf->tmp) {
+		const struct rte_flow_item *item = pf->pattern;
+		const struct rte_flow_action *action = pf->actions;
+
+		printf("%" PRIu32 "\t%" PRIu32 "\t%" PRIu32 "\t%c%c\t",
+		       pf->id,
+		       pf->attr.group,
+		       pf->attr.priority,
+		       pf->attr.ingress ? 'i' : '-',
+		       pf->attr.egress ? 'e' : '-');
+		while (item->type != RTE_FLOW_ITEM_TYPE_END) {
+			if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
+				printf("%s ", flow_item[item->type].name);
+			++item;
+		}
+		printf("=>");
+		while (action->type != RTE_FLOW_ACTION_TYPE_END) {
+			if (action->type != RTE_FLOW_ACTION_TYPE_VOID)
+				printf(" %s", flow_action[action->type].name);
+			++action;
+		}
+		printf("\n");
+	}
+}
+
 /*
  * RX/TX ring descriptors display functions.
  */
diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c
index 57e6ae2..dd67ebf 100644
--- a/app/test-pmd/csumonly.c
+++ b/app/test-pmd/csumonly.c
@@ -70,6 +70,7 @@
 #include <rte_sctp.h>
 #include <rte_prefetch.h>
 #include <rte_string_fns.h>
+#include <rte_flow.h>
 #include "testpmd.h"
 
 #define IP_DEFTTL  64   /* from RFC 1340. */
diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index b13ff89..13b4f90 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -68,6 +68,7 @@
 #include <rte_tcp.h>
 #include <rte_udp.h>
 #include <rte_string_fns.h>
+#include <rte_flow.h>
 
 #include "testpmd.h"
 
diff --git a/app/test-pmd/icmpecho.c b/app/test-pmd/icmpecho.c
index 6a4e750..f25a8f5 100644
--- a/app/test-pmd/icmpecho.c
+++ b/app/test-pmd/icmpecho.c
@@ -61,6 +61,7 @@
 #include <rte_ip.h>
 #include <rte_icmp.h>
 #include <rte_string_fns.h>
+#include <rte_flow.h>
 
 #include "testpmd.h"
 
diff --git a/app/test-pmd/ieee1588fwd.c b/app/test-pmd/ieee1588fwd.c
index 0d3b37a..51170ee 100644
--- a/app/test-pmd/ieee1588fwd.c
+++ b/app/test-pmd/ieee1588fwd.c
@@ -34,6 +34,7 @@
 
 #include <rte_cycles.h>
 #include <rte_ethdev.h>
+#include <rte_flow.h>
 
 #include "testpmd.h"
 
diff --git a/app/test-pmd/iofwd.c b/app/test-pmd/iofwd.c
index 26936b7..15cb4a2 100644
--- a/app/test-pmd/iofwd.c
+++ b/app/test-pmd/iofwd.c
@@ -64,6 +64,7 @@
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_string_fns.h>
+#include <rte_flow.h>
 
 #include "testpmd.h"
 
diff --git a/app/test-pmd/macfwd.c b/app/test-pmd/macfwd.c
index 86e01de..d361db1 100644
--- a/app/test-pmd/macfwd.c
+++ b/app/test-pmd/macfwd.c
@@ -65,6 +65,7 @@
 #include <rte_ethdev.h>
 #include <rte_ip.h>
 #include <rte_string_fns.h>
+#include <rte_flow.h>
 
 #include "testpmd.h"
 
diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c
index 36e139f..f996039 100644
--- a/app/test-pmd/macswap.c
+++ b/app/test-pmd/macswap.c
@@ -65,6 +65,7 @@
 #include <rte_ethdev.h>
 #include <rte_ip.h>
 #include <rte_string_fns.h>
+#include <rte_flow.h>
 
 #include "testpmd.h"
 
diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index 08e5a76..28db8cd 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -76,6 +76,7 @@
 #ifdef RTE_LIBRTE_PMD_BOND
 #include <rte_eth_bond.h>
 #endif
+#include <rte_flow.h>
 
 #include "testpmd.h"
 
diff --git a/app/test-pmd/rxonly.c b/app/test-pmd/rxonly.c
index fff815c..cf00576 100644
--- a/app/test-pmd/rxonly.c
+++ b/app/test-pmd/rxonly.c
@@ -67,6 +67,7 @@
 #include <rte_ip.h>
 #include <rte_udp.h>
 #include <rte_net.h>
+#include <rte_flow.h>
 
 #include "testpmd.h"
 
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index a0332c2..bfb2f8e 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -78,6 +78,7 @@
 #ifdef RTE_LIBRTE_PDUMP
 #include <rte_pdump.h>
 #endif
+#include <rte_flow.h>
 
 #include "testpmd.h"
 
@@ -1545,6 +1546,8 @@ close_port(portid_t pid)
 			continue;
 		}
 
+		if (port->flow_list)
+			port_flow_flush(pi);
 		rte_eth_dev_close(pi);
 
 		if (rte_atomic16_cmpset(&(port->port_status),
@@ -1599,6 +1602,9 @@ detach_port(uint8_t port_id)
 		return;
 	}
 
+	if (ports[port_id].flow_list)
+		port_flow_flush(port_id);
+
 	if (rte_eth_dev_detach(port_id, name))
 		return;
 
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 9c1e703..22ce2d6 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -144,6 +144,19 @@ struct fwd_stream {
 /** Insert double VLAN header in forward engine */
 #define TESTPMD_TX_OFFLOAD_INSERT_QINQ       0x0080
 
+/** Descriptor for a single flow. */
+struct port_flow {
+	size_t size; /**< Allocated space including data[]. */
+	struct port_flow *next; /**< Next flow in list. */
+	struct port_flow *tmp; /**< Temporary linking. */
+	uint32_t id; /**< Flow rule ID. */
+	struct rte_flow *flow; /**< Opaque flow object returned by PMD. */
+	struct rte_flow_attr attr; /**< Attributes. */
+	struct rte_flow_item *pattern; /**< Pattern. */
+	struct rte_flow_action *actions; /**< Actions. */
+	uint8_t data[]; /**< Storage for pattern/actions. */
+};
+
 /**
  * The data structure associated with each port.
  */
@@ -177,6 +190,7 @@ struct rte_port {
 	struct ether_addr       *mc_addr_pool; /**< pool of multicast addrs */
 	uint32_t                mc_addr_nb; /**< nb. of addr. in mc_addr_pool */
 	uint8_t                 slave_flag; /**< bonding slave port */
+	struct port_flow        *flow_list; /**< Associated flows. */
 };
 
 extern portid_t __rte_unused
@@ -504,6 +518,19 @@ void port_reg_bit_field_set(portid_t port_id, uint32_t reg_off,
 			    uint8_t bit1_pos, uint8_t bit2_pos, uint32_t value);
 void port_reg_display(portid_t port_id, uint32_t reg_off);
 void port_reg_set(portid_t port_id, uint32_t reg_off, uint32_t value);
+int port_flow_validate(portid_t port_id,
+		       const struct rte_flow_attr *attr,
+		       const struct rte_flow_item *pattern,
+		       const struct rte_flow_action *actions);
+int port_flow_create(portid_t port_id,
+		     const struct rte_flow_attr *attr,
+		     const struct rte_flow_item *pattern,
+		     const struct rte_flow_action *actions);
+int port_flow_destroy(portid_t port_id, uint32_t n, const uint32_t *rule);
+int port_flow_flush(portid_t port_id);
+int port_flow_query(portid_t port_id, uint32_t rule,
+		    enum rte_flow_action_type action);
+void port_flow_list(portid_t port_id, uint32_t n, const uint32_t *group);
 
 void rx_ring_desc_display(portid_t port_id, queueid_t rxq_id, uint16_t rxd_id);
 void tx_ring_desc_display(portid_t port_id, queueid_t txq_id, uint16_t txd_id);
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index 8513a06..e996f35 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -68,6 +68,7 @@
 #include <rte_tcp.h>
 #include <rte_udp.h>
 #include <rte_string_fns.h>
+#include <rte_flow.h>
 
 #include "testpmd.h"
 
-- 
2.1.4

^ permalink raw reply related

* [PATCH v3 05/25] cmdline: add alignment constraint
From: Adrien Mazarguil @ 2016-12-19 17:48 UTC (permalink / raw)
  To: dev
In-Reply-To: <cover.1482168851.git.adrien.mazarguil@6wind.com>

This prevents sigbus errors on architectures that cannot handle unexpected
unaligned accesses to the output buffer.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Olga Shern <olgas@mellanox.com>
---
 lib/librte_cmdline/cmdline_parse.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c
index 14f5553..763c286 100644
--- a/lib/librte_cmdline/cmdline_parse.c
+++ b/lib/librte_cmdline/cmdline_parse.c
@@ -255,7 +255,10 @@ cmdline_parse(struct cmdline *cl, const char * buf)
 	unsigned int inst_num=0;
 	cmdline_parse_inst_t *inst;
 	const char *curbuf;
-	char result_buf[CMDLINE_PARSE_RESULT_BUFSIZE];
+	union {
+		char buf[CMDLINE_PARSE_RESULT_BUFSIZE];
+		long double align; /* strong alignment constraint for buf */
+	} result;
 	cmdline_parse_token_hdr_t *dyn_tokens[CMDLINE_PARSE_DYNAMIC_TOKENS];
 	void (*f)(void *, struct cmdline *, void *) = NULL;
 	void *data = NULL;
@@ -318,7 +321,7 @@ cmdline_parse(struct cmdline *cl, const char * buf)
 		debug_printf("INST %d\n", inst_num);
 
 		/* fully parsed */
-		tok = match_inst(inst, buf, 0, result_buf, sizeof(result_buf),
+		tok = match_inst(inst, buf, 0, result.buf, sizeof(result.buf),
 				 &dyn_tokens);
 
 		if (tok > 0) /* we matched at least one token */
@@ -353,7 +356,7 @@ cmdline_parse(struct cmdline *cl, const char * buf)
 
 	/* call func */
 	if (f) {
-		f(result_buf, cl, data);
+		f(result.buf, cl, data);
 	}
 
 	/* no match */
-- 
2.1.4

^ permalink raw reply related

* [PATCH v3 04/25] cmdline: add support for dynamic tokens
From: Adrien Mazarguil @ 2016-12-19 17:48 UTC (permalink / raw)
  To: dev
In-Reply-To: <cover.1482168851.git.adrien.mazarguil@6wind.com>

Considering tokens must be hard-coded in a list part of the instruction
structure, context-dependent tokens cannot be expressed.

This commit adds support for building dynamic token lists through a
user-provided function, which is called when the static token list is empty
(a single NULL entry).

Because no structures are modified (existing fields are reused), this
commit has no impact on the current ABI.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Olga Shern <olgas@mellanox.com>
---
 lib/librte_cmdline/cmdline_parse.c | 60 +++++++++++++++++++++++++++++----
 lib/librte_cmdline/cmdline_parse.h | 21 ++++++++++++
 2 files changed, 74 insertions(+), 7 deletions(-)

diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c
index b496067..14f5553 100644
--- a/lib/librte_cmdline/cmdline_parse.c
+++ b/lib/librte_cmdline/cmdline_parse.c
@@ -146,7 +146,9 @@ nb_common_chars(const char * s1, const char * s2)
  */
 static int
 match_inst(cmdline_parse_inst_t *inst, const char *buf,
-	   unsigned int nb_match_token, void *resbuf, unsigned resbuf_size)
+	   unsigned int nb_match_token, void *resbuf, unsigned resbuf_size,
+	   cmdline_parse_token_hdr_t
+		*(*dyn_tokens)[CMDLINE_PARSE_DYNAMIC_TOKENS])
 {
 	unsigned int token_num=0;
 	cmdline_parse_token_hdr_t * token_p;
@@ -155,6 +157,11 @@ match_inst(cmdline_parse_inst_t *inst, const char *buf,
 	struct cmdline_token_hdr token_hdr;
 
 	token_p = inst->tokens[token_num];
+	if (!token_p && dyn_tokens && inst->f) {
+		if (!(*dyn_tokens)[0])
+			inst->f(&(*dyn_tokens)[0], NULL, dyn_tokens);
+		token_p = (*dyn_tokens)[0];
+	}
 	if (token_p)
 		memcpy(&token_hdr, token_p, sizeof(token_hdr));
 
@@ -196,7 +203,17 @@ match_inst(cmdline_parse_inst_t *inst, const char *buf,
 		buf += n;
 
 		token_num ++;
-		token_p = inst->tokens[token_num];
+		if (!inst->tokens[0]) {
+			if (token_num < (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) {
+				if (!(*dyn_tokens)[token_num])
+					inst->f(&(*dyn_tokens)[token_num],
+						NULL,
+						dyn_tokens);
+				token_p = (*dyn_tokens)[token_num];
+			} else
+				token_p = NULL;
+		} else
+			token_p = inst->tokens[token_num];
 		if (token_p)
 			memcpy(&token_hdr, token_p, sizeof(token_hdr));
 	}
@@ -239,6 +256,7 @@ cmdline_parse(struct cmdline *cl, const char * buf)
 	cmdline_parse_inst_t *inst;
 	const char *curbuf;
 	char result_buf[CMDLINE_PARSE_RESULT_BUFSIZE];
+	cmdline_parse_token_hdr_t *dyn_tokens[CMDLINE_PARSE_DYNAMIC_TOKENS];
 	void (*f)(void *, struct cmdline *, void *) = NULL;
 	void *data = NULL;
 	int comment = 0;
@@ -255,6 +273,7 @@ cmdline_parse(struct cmdline *cl, const char * buf)
 		return CMDLINE_PARSE_BAD_ARGS;
 
 	ctx = cl->ctx;
+	memset(&dyn_tokens, 0, sizeof(dyn_tokens));
 
 	/*
 	 * - look if the buffer contains at least one line
@@ -299,7 +318,8 @@ cmdline_parse(struct cmdline *cl, const char * buf)
 		debug_printf("INST %d\n", inst_num);
 
 		/* fully parsed */
-		tok = match_inst(inst, buf, 0, result_buf, sizeof(result_buf));
+		tok = match_inst(inst, buf, 0, result_buf, sizeof(result_buf),
+				 &dyn_tokens);
 
 		if (tok > 0) /* we matched at least one token */
 			err = CMDLINE_PARSE_BAD_ARGS;
@@ -355,6 +375,7 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state,
 	cmdline_parse_token_hdr_t *token_p;
 	struct cmdline_token_hdr token_hdr;
 	char tmpbuf[CMDLINE_BUFFER_SIZE], comp_buf[CMDLINE_BUFFER_SIZE];
+	cmdline_parse_token_hdr_t *dyn_tokens[CMDLINE_PARSE_DYNAMIC_TOKENS];
 	unsigned int partial_tok_len;
 	int comp_len = -1;
 	int tmp_len = -1;
@@ -374,6 +395,7 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state,
 
 	debug_printf("%s called\n", __func__);
 	memset(&token_hdr, 0, sizeof(token_hdr));
+	memset(&dyn_tokens, 0, sizeof(dyn_tokens));
 
 	/* count the number of complete token to parse */
 	for (i=0 ; buf[i] ; i++) {
@@ -396,11 +418,24 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state,
 		inst = ctx[inst_num];
 		while (inst) {
 			/* parse the first tokens of the inst */
-			if (nb_token && match_inst(inst, buf, nb_token, NULL, 0))
+			if (nb_token &&
+			    match_inst(inst, buf, nb_token, NULL, 0,
+				       &dyn_tokens))
 				goto next;
 
 			debug_printf("instruction match\n");
-			token_p = inst->tokens[nb_token];
+			if (!inst->tokens[0]) {
+				if (nb_token <
+				    (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) {
+					if (!dyn_tokens[nb_token])
+						inst->f(&dyn_tokens[nb_token],
+							NULL,
+							&dyn_tokens);
+					token_p = dyn_tokens[nb_token];
+				} else
+					token_p = NULL;
+			} else
+				token_p = inst->tokens[nb_token];
 			if (token_p)
 				memcpy(&token_hdr, token_p, sizeof(token_hdr));
 
@@ -490,10 +525,21 @@ cmdline_complete(struct cmdline *cl, const char *buf, int *state,
 		/* we need to redo it */
 		inst = ctx[inst_num];
 
-		if (nb_token && match_inst(inst, buf, nb_token, NULL, 0))
+		if (nb_token &&
+		    match_inst(inst, buf, nb_token, NULL, 0, &dyn_tokens))
 			goto next2;
 
-		token_p = inst->tokens[nb_token];
+		if (!inst->tokens[0]) {
+			if (nb_token < (CMDLINE_PARSE_DYNAMIC_TOKENS - 1)) {
+				if (!dyn_tokens[nb_token])
+					inst->f(&dyn_tokens[nb_token],
+						NULL,
+						&dyn_tokens);
+				token_p = dyn_tokens[nb_token];
+			} else
+				token_p = NULL;
+		} else
+			token_p = inst->tokens[nb_token];
 		if (token_p)
 			memcpy(&token_hdr, token_p, sizeof(token_hdr));
 
diff --git a/lib/librte_cmdline/cmdline_parse.h b/lib/librte_cmdline/cmdline_parse.h
index 4ac05d6..65b18d4 100644
--- a/lib/librte_cmdline/cmdline_parse.h
+++ b/lib/librte_cmdline/cmdline_parse.h
@@ -83,6 +83,9 @@ extern "C" {
 /* maximum buffer size for parsed result */
 #define CMDLINE_PARSE_RESULT_BUFSIZE 8192
 
+/* maximum number of dynamic tokens */
+#define CMDLINE_PARSE_DYNAMIC_TOKENS 128
+
 /**
  * Stores a pointer to the ops struct, and the offset: the place to
  * write the parsed result in the destination structure.
@@ -130,6 +133,24 @@ struct cmdline;
  * Store a instruction, which is a pointer to a callback function and
  * its parameter that is called when the instruction is parsed, a help
  * string, and a list of token composing this instruction.
+ *
+ * When no tokens are defined (tokens[0] == NULL), they are retrieved
+ * dynamically by calling f() as follows:
+ *
+ *  f((struct cmdline_token_hdr **)&token_hdr,
+ *    NULL,
+ *    (struct cmdline_token_hdr *[])tokens));
+ *
+ * The address of the resulting token is expected at the location pointed by
+ * the first argument. Can be set to NULL to end the list.
+ *
+ * The cmdline argument (struct cmdline *) is always NULL.
+ *
+ * The last argument points to the NULL-terminated list of dynamic tokens
+ * defined so far. Since token_hdr points to an index of that list, the
+ * current index can be derived as follows:
+ *
+ *  int index = token_hdr - &(*tokens)[0];
  */
 struct cmdline_inst {
 	/* f(parsed_struct, data) */
-- 
2.1.4

^ permalink raw reply related

* [PATCH v3 02/25] doc: add rte_flow prog guide
From: Adrien Mazarguil @ 2016-12-19 17:48 UTC (permalink / raw)
  To: dev
In-Reply-To: <cover.1482168851.git.adrien.mazarguil@6wind.com>

This documentation is based on the latest RFC submission, subsequently
updated according to feedback from the community.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Olga Shern <olgas@mellanox.com>
---
 doc/guides/prog_guide/index.rst    |    1 +
 doc/guides/prog_guide/rte_flow.rst | 2042 +++++++++++++++++++++++++++++++
 2 files changed, 2043 insertions(+)

diff --git a/doc/guides/prog_guide/index.rst b/doc/guides/prog_guide/index.rst
index e5a50a8..ed7f770 100644
--- a/doc/guides/prog_guide/index.rst
+++ b/doc/guides/prog_guide/index.rst
@@ -42,6 +42,7 @@ Programmer's Guide
     mempool_lib
     mbuf_lib
     poll_mode_drv
+    rte_flow
     cryptodev_lib
     link_bonding_poll_mode_drv_lib
     timer_lib
diff --git a/doc/guides/prog_guide/rte_flow.rst b/doc/guides/prog_guide/rte_flow.rst
new file mode 100644
index 0000000..73fe809
--- /dev/null
+++ b/doc/guides/prog_guide/rte_flow.rst
@@ -0,0 +1,2042 @@
+..  BSD LICENSE
+    Copyright 2016 6WIND S.A.
+    Copyright 2016 Mellanox.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+
+    * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+    * Neither the name of 6WIND S.A. nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+.. _Generic_flow_API:
+
+Generic flow API (rte_flow)
+===========================
+
+Overview
+--------
+
+This API provides a generic means to configure hardware to match specific
+ingress or egress traffic, alter its fate and query related counters
+according to any number of user-defined rules.
+
+It is named *rte_flow* after the prefix used for all its symbols, and is
+defined in ``rte_flow.h``.
+
+- Matching can be performed on packet data (protocol headers, payload) and
+  properties (e.g. associated physical port, virtual device function ID).
+
+- Possible operations include dropping traffic, diverting it to specific
+  queues, to virtual/physical device functions or ports, performing tunnel
+  offloads, adding marks and so on.
+
+It is slightly higher-level than the legacy filtering framework which it
+encompasses and supersedes (including all functions and filter types) in
+order to expose a single interface with an unambiguous behavior that is
+common to all poll-mode drivers (PMDs).
+
+Several methods to migrate existing applications are described in `API
+migration`_.
+
+Flow rule
+---------
+
+Description
+~~~~~~~~~~~
+
+A flow rule is the combination of attributes with a matching pattern and a
+list of actions. Flow rules form the basis of this API.
+
+Flow rules can have several distinct actions (such as counting,
+encapsulating, decapsulating before redirecting packets to a particular
+queue, etc.), instead of relying on several rules to achieve this and having
+applications deal with hardware implementation details regarding their
+order.
+
+Support for different priority levels on a rule basis is provided, for
+example in order to force a more specific rule to come before a more generic
+one for packets matched by both. However hardware support for more than a
+single priority level cannot be guaranteed. When supported, the number of
+available priority levels is usually low, which is why they can also be
+implemented in software by PMDs (e.g. missing priority levels may be
+emulated by reordering rules).
+
+In order to remain as hardware-agnostic as possible, by default all rules
+are considered to have the same priority, which means that the order between
+overlapping rules (when a packet is matched by several filters) is
+undefined.
+
+PMDs may refuse to create overlapping rules at a given priority level when
+they can be detected (e.g. if a pattern matches an existing filter).
+
+Thus predictable results for a given priority level can only be achieved
+with non-overlapping rules, using perfect matching on all protocol layers.
+
+Flow rules can also be grouped, the flow rule priority is specific to the
+group they belong to. All flow rules in a given group are thus processed
+either before or after another group.
+
+Support for multiple actions per rule may be implemented internally on top
+of non-default hardware priorities, as a result both features may not be
+simultaneously available to applications.
+
+Considering that allowed pattern/actions combinations cannot be known in
+advance and would result in an impractically large number of capabilities to
+expose, a method is provided to validate a given rule from the current
+device configuration state.
+
+This enables applications to check if the rule types they need is supported
+at initialization time, before starting their data path. This method can be
+used anytime, its only requirement being that the resources needed by a rule
+should exist (e.g. a target RX queue should be configured first).
+
+Each defined rule is associated with an opaque handle managed by the PMD,
+applications are responsible for keeping it. These can be used for queries
+and rules management, such as retrieving counters or other data and
+destroying them.
+
+To avoid resource leaks on the PMD side, handles must be explicitly
+destroyed by the application before releasing associated resources such as
+queues and ports.
+
+The following sections cover:
+
+- **Attributes** (represented by ``struct rte_flow_attr``): properties of a
+  flow rule such as its direction (ingress or egress) and priority.
+
+- **Pattern item** (represented by ``struct rte_flow_item``): part of a
+  matching pattern that either matches specific packet data or traffic
+  properties. It can also describe properties of the pattern itself, such as
+  inverted matching.
+
+- **Matching pattern**: traffic properties to look for, a combination of any
+  number of items.
+
+- **Actions** (represented by ``struct rte_flow_action``): operations to
+  perform whenever a packet is matched by a pattern.
+
+Attributes
+~~~~~~~~~~
+
+Attribute: Group
+^^^^^^^^^^^^^^^^
+
+Flow rules can be grouped by assigning them a common group number. Lower
+values have higher priority. Group 0 has the highest priority.
+
+Although optional, applications are encouraged to group similar rules as
+much as possible to fully take advantage of hardware capabilities
+(e.g. optimized matching) and work around limitations (e.g. a single pattern
+type possibly allowed in a given group).
+
+Note that support for more than a single group is not guaranteed.
+
+Attribute: Priority
+^^^^^^^^^^^^^^^^^^^
+
+A priority level can be assigned to a flow rule. Like groups, lower values
+denote higher priority, with 0 as the maximum.
+
+A rule with priority 0 in group 8 is always matched after a rule with
+priority 8 in group 0.
+
+Group and priority levels are arbitrary and up to the application, they do
+not need to be contiguous nor start from 0, however the maximum number
+varies between devices and may be affected by existing flow rules.
+
+If a packet is matched by several rules of a given group for a given
+priority level, the outcome is undefined. It can take any path, may be
+duplicated or even cause unrecoverable errors.
+
+Note that support for more than a single priority level is not guaranteed.
+
+Attribute: Traffic direction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Flow rules can apply to inbound and/or outbound traffic (ingress/egress).
+
+Several pattern items and actions are valid and can be used in both
+directions. At least one direction must be specified.
+
+Specifying both directions at once for a given rule is not recommended but
+may be valid in a few cases (e.g. shared counters).
+
+Pattern item
+~~~~~~~~~~~~
+
+Pattern items fall in two categories:
+
+- Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4,
+  IPV6, ICMP, UDP, TCP, SCTP, VXLAN and so on), usually associated with a
+  specification structure.
+
+- Matching meta-data or affecting pattern processing (END, VOID, INVERT, PF,
+  VF, PORT and so on), often without a specification structure.
+
+Item specification structures are used to match specific values among
+protocol fields (or item properties). Documentation describes for each item
+whether they are associated with one and their type name if so.
+
+Up to three structures of the same type can be set for a given item:
+
+- ``spec``: values to match (e.g. a given IPv4 address).
+
+- ``last``: upper bound for an inclusive range with corresponding fields in
+  ``spec``.
+
+- ``mask``: bit-mask applied to both ``spec`` and ``last`` whose purpose is
+  to distinguish the values to take into account and/or partially mask them
+  out (e.g. in order to match an IPv4 address prefix).
+
+Usage restrictions and expected behavior:
+
+- Setting either ``mask`` or ``last`` without ``spec`` is an error.
+
+- Field values in ``last`` which are either 0 or equal to the corresponding
+  values in ``spec`` are ignored; they do not generate a range. Nonzero
+  values lower than those in ``spec`` are not supported.
+
+- Setting ``spec`` and optionally ``last`` without ``mask`` causes the PMD
+  to only take the fields it can recognize into account. There is no error
+  checking for unsupported fields.
+
+- Not setting any of them (assuming item type allows it) uses default
+  parameters that depend on the item type. Most of the time, particularly
+  for protocol header items, it is equivalent to providing an empty (zeroed)
+  ``mask``.
+
+- ``mask`` is a simple bit-mask applied before interpreting the contents of
+  ``spec`` and ``last``, which may yield unexpected results if not used
+  carefully. For example, if for an IPv4 address field, ``spec`` provides
+  *10.1.2.3*, ``last`` provides *10.3.4.5* and ``mask`` provides
+  *255.255.0.0*, the effective range becomes *10.1.0.0* to *10.3.255.255*.
+
+Example of an item specification matching an Ethernet header:
+
+.. _table_rte_flow_pattern_item_example:
+
+.. table:: Ethernet item
+
+   +----------+----------+--------------------+
+   | Field    | Subfield | Value              |
+   +==========+==========+====================+
+   | ``spec`` | ``src``  | ``00:01:02:03:04`` |
+   |          +----------+--------------------+
+   |          | ``dst``  | ``00:2a:66:00:01`` |
+   |          +----------+--------------------+
+   |          | ``type`` | ``0x22aa``         |
+   +----------+----------+--------------------+
+   | ``last`` | unspecified                   |
+   +----------+----------+--------------------+
+   | ``mask`` | ``src``  | ``00:ff:ff:ff:00`` |
+   |          +----------+--------------------+
+   |          | ``dst``  | ``00:00:00:00:ff`` |
+   |          +----------+--------------------+
+   |          | ``type`` | ``0x0000``         |
+   +----------+----------+--------------------+
+
+Non-masked bits stand for any value (shown as ``?`` below), Ethernet headers
+with the following properties are thus matched:
+
+- ``src``: ``??:01:02:03:??``
+- ``dst``: ``??:??:??:??:01``
+- ``type``: ``0x????``
+
+Matching pattern
+~~~~~~~~~~~~~~~~
+
+A pattern is formed by stacking items starting from the lowest protocol
+layer to match. This stacking restriction does not apply to meta items which
+can be placed anywhere in the stack without affecting the meaning of the
+resulting pattern.
+
+Patterns are terminated by END items.
+
+Examples:
+
+.. _table_rte_flow_tcpv4_as_l4:
+
+.. table:: TCPv4 as L4
+
+   +-------+----------+
+   | Index | Item     |
+   +=======+==========+
+   | 0     | Ethernet |
+   +-------+----------+
+   | 1     | IPv4     |
+   +-------+----------+
+   | 2     | TCP      |
+   +-------+----------+
+   | 3     | END      |
+   +-------+----------+
+
+|
+
+.. _table_rte_flow_tcpv6_in_vxlan:
+
+.. table:: TCPv6 in VXLAN
+
+   +-------+------------+
+   | Index | Item       |
+   +=======+============+
+   | 0     | Ethernet   |
+   +-------+------------+
+   | 1     | IPv4       |
+   +-------+------------+
+   | 2     | UDP        |
+   +-------+------------+
+   | 3     | VXLAN      |
+   +-------+------------+
+   | 4     | Ethernet   |
+   +-------+------------+
+   | 5     | IPv6       |
+   +-------+------------+
+   | 6     | TCP        |
+   +-------+------------+
+   | 7     | END        |
+   +-------+------------+
+
+|
+
+.. _table_rte_flow_tcpv4_as_l4_meta:
+
+.. table:: TCPv4 as L4 with meta items
+
+   +-------+----------+
+   | Index | Item     |
+   +=======+==========+
+   | 0     | VOID     |
+   +-------+----------+
+   | 1     | Ethernet |
+   +-------+----------+
+   | 2     | VOID     |
+   +-------+----------+
+   | 3     | IPv4     |
+   +-------+----------+
+   | 4     | TCP      |
+   +-------+----------+
+   | 5     | VOID     |
+   +-------+----------+
+   | 6     | VOID     |
+   +-------+----------+
+   | 7     | END      |
+   +-------+----------+
+
+The above example shows how meta items do not affect packet data matching
+items, as long as those remain stacked properly. The resulting matching
+pattern is identical to "TCPv4 as L4".
+
+.. _table_rte_flow_udpv6_anywhere:
+
+.. table:: UDPv6 anywhere
+
+   +-------+------+
+   | Index | Item |
+   +=======+======+
+   | 0     | IPv6 |
+   +-------+------+
+   | 1     | UDP  |
+   +-------+------+
+   | 2     | END  |
+   +-------+------+
+
+If supported by the PMD, omitting one or several protocol layers at the
+bottom of the stack as in the above example (missing an Ethernet
+specification) enables looking up anywhere in packets.
+
+It is unspecified whether the payload of supported encapsulations
+(e.g. VXLAN payload) is matched by such a pattern, which may apply to inner,
+outer or both packets.
+
+.. _table_rte_flow_invalid_l3:
+
+.. table:: Invalid, missing L3
+
+   +-------+----------+
+   | Index | Item     |
+   +=======+==========+
+   | 0     | Ethernet |
+   +-------+----------+
+   | 1     | UDP      |
+   +-------+----------+
+   | 2     | END      |
+   +-------+----------+
+
+The above pattern is invalid due to a missing L3 specification between L2
+(Ethernet) and L4 (UDP). Doing so is only allowed at the bottom and at the
+top of the stack.
+
+Meta item types
+~~~~~~~~~~~~~~~
+
+They match meta-data or affect pattern processing instead of matching packet
+data directly, most of them do not need a specification structure. This
+particularity allows them to be specified anywhere in the stack without
+causing any side effect.
+
+Item: ``END``
+^^^^^^^^^^^^^
+
+End marker for item lists. Prevents further processing of items, thereby
+ending the pattern.
+
+- Its numeric value is 0 for convenience.
+- PMD support is mandatory.
+- ``spec``, ``last`` and ``mask`` are ignored.
+
+.. _table_rte_flow_item_end:
+
+.. table:: END
+
+   +----------+---------+
+   | Field    | Value   |
+   +==========+=========+
+   | ``spec`` | ignored |
+   +----------+---------+
+   | ``last`` | ignored |
+   +----------+---------+
+   | ``mask`` | ignored |
+   +----------+---------+
+
+Item: ``VOID``
+^^^^^^^^^^^^^^
+
+Used as a placeholder for convenience. It is ignored and simply discarded by
+PMDs.
+
+- PMD support is mandatory.
+- ``spec``, ``last`` and ``mask`` are ignored.
+
+.. _table_rte_flow_item_void:
+
+.. table:: VOID
+
+   +----------+---------+
+   | Field    | Value   |
+   +==========+=========+
+   | ``spec`` | ignored |
+   +----------+---------+
+   | ``last`` | ignored |
+   +----------+---------+
+   | ``mask`` | ignored |
+   +----------+---------+
+
+One usage example for this type is generating rules that share a common
+prefix quickly without reallocating memory, only by updating item types:
+
+.. _table_rte_flow_item_void_example:
+
+.. table:: TCP, UDP or ICMP as L4
+
+   +-------+--------------------+
+   | Index | Item               |
+   +=======+====================+
+   | 0     | Ethernet           |
+   +-------+--------------------+
+   | 1     | IPv4               |
+   +-------+------+------+------+
+   | 2     | UDP  | VOID | VOID |
+   +-------+------+------+------+
+   | 3     | VOID | TCP  | VOID |
+   +-------+------+------+------+
+   | 4     | VOID | VOID | ICMP |
+   +-------+------+------+------+
+   | 5     | END                |
+   +-------+--------------------+
+
+Item: ``INVERT``
+^^^^^^^^^^^^^^^^
+
+Inverted matching, i.e. process packets that do not match the pattern.
+
+- ``spec``, ``last`` and ``mask`` are ignored.
+
+.. _table_rte_flow_item_invert:
+
+.. table:: INVERT
+
+   +----------+---------+
+   | Field    | Value   |
+   +==========+=========+
+   | ``spec`` | ignored |
+   +----------+---------+
+   | ``last`` | ignored |
+   +----------+---------+
+   | ``mask`` | ignored |
+   +----------+---------+
+
+Usage example, matching non-TCPv4 packets only:
+
+.. _table_rte_flow_item_invert_example:
+
+.. table:: Anything but TCPv4
+
+   +-------+----------+
+   | Index | Item     |
+   +=======+==========+
+   | 0     | INVERT   |
+   +-------+----------+
+   | 1     | Ethernet |
+   +-------+----------+
+   | 2     | IPv4     |
+   +-------+----------+
+   | 3     | TCP      |
+   +-------+----------+
+   | 4     | END      |
+   +-------+----------+
+
+Item: ``PF``
+^^^^^^^^^^^^
+
+Matches packets addressed to the physical function of the device.
+
+If the underlying device function differs from the one that would normally
+receive the matched traffic, specifying this item prevents it from reaching
+that device unless the flow rule contains a `Action: PF`_. Packets are not
+duplicated between device instances by default.
+
+- Likely to return an error or never match any traffic if applied to a VF
+  device.
+- Can be combined with any number of `Item: VF`_ to match both PF and VF
+  traffic.
+- ``spec``, ``last`` and ``mask`` must not be set.
+
+.. _table_rte_flow_item_pf:
+
+.. table:: PF
+
+   +----------+-------+
+   | Field    | Value |
+   +==========+=======+
+   | ``spec`` | unset |
+   +----------+-------+
+   | ``last`` | unset |
+   +----------+-------+
+   | ``mask`` | unset |
+   +----------+-------+
+
+Item: ``VF``
+^^^^^^^^^^^^
+
+Matches packets addressed to a virtual function ID of the device.
+
+If the underlying device function differs from the one that would normally
+receive the matched traffic, specifying this item prevents it from reaching
+that device unless the flow rule contains a `Action: VF`_. Packets are not
+duplicated between device instances by default.
+
+- Likely to return an error or never match any traffic if this causes a VF
+  device to match traffic addressed to a different VF.
+- Can be specified multiple times to match traffic addressed to several VF
+  IDs.
+- Can be combined with a PF item to match both PF and VF traffic.
+
+.. _table_rte_flow_item_vf:
+
+.. table:: VF
+
+   +----------+----------+---------------------------+
+   | Field    | Subfield | Value                     |
+   +==========+==========+===========================+
+   | ``spec`` | ``id``   | destination VF ID         |
+   +----------+----------+---------------------------+
+   | ``last`` | ``id``   | upper range value         |
+   +----------+----------+---------------------------+
+   | ``mask`` | ``id``   | zeroed to match any VF ID |
+   +----------+----------+---------------------------+
+
+Item: ``PORT``
+^^^^^^^^^^^^^^
+
+Matches packets coming from the specified physical port of the underlying
+device.
+
+The first PORT item overrides the physical port normally associated with the
+specified DPDK input port (port_id). This item can be provided several times
+to match additional physical ports.
+
+Note that physical ports are not necessarily tied to DPDK input ports
+(port_id) when those are not under DPDK control. Possible values are
+specific to each device, they are not necessarily indexed from zero and may
+not be contiguous.
+
+As a device property, the list of allowed values as well as the value
+associated with a port_id should be retrieved by other means.
+
+.. _table_rte_flow_item_port:
+
+.. table:: PORT
+
+   +----------+-----------+--------------------------------+
+   | Field    | Subfield  | Value                          |
+   +==========+===========+================================+
+   | ``spec`` | ``index`` | physical port index            |
+   +----------+-----------+--------------------------------+
+   | ``last`` | ``index`` | upper range value              |
+   +----------+-----------+--------------------------------+
+   | ``mask`` | ``index`` | zeroed to match any port index |
+   +----------+-----------+--------------------------------+
+
+Data matching item types
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Most of these are basically protocol header definitions with associated
+bit-masks. They must be specified (stacked) from lowest to highest protocol
+layer to form a matching pattern.
+
+The following list is not exhaustive, new protocols will be added in the
+future.
+
+Item: ``ANY``
+^^^^^^^^^^^^^
+
+Matches any protocol in place of the current layer, a single ANY may also
+stand for several protocol layers.
+
+This is usually specified as the first pattern item when looking for a
+protocol anywhere in a packet.
+
+.. _table_rte_flow_item_any:
+
+.. table:: ANY
+
+   +----------+----------+--------------------------------------+
+   | Field    | Subfield | Value                                |
+   +==========+==========+======================================+
+   | ``spec`` | ``num``  | number of layers covered             |
+   +----------+----------+--------------------------------------+
+   | ``last`` | ``num``  | upper range value                    |
+   +----------+----------+--------------------------------------+
+   | ``mask`` | ``num``  | zeroed to cover any number of layers |
+   +----------+----------+--------------------------------------+
+
+Example for VXLAN TCP payload matching regardless of outer L3 (IPv4 or IPv6)
+and L4 (UDP) both matched by the first ANY specification, and inner L3 (IPv4
+or IPv6) matched by the second ANY specification:
+
+.. _table_rte_flow_item_any_example:
+
+.. table:: TCP in VXLAN with wildcards
+
+   +-------+------+----------+----------+-------+
+   | Index | Item | Field    | Subfield | Value |
+   +=======+======+==========+==========+=======+
+   | 0     | Ethernet                           |
+   +-------+------+----------+----------+-------+
+   | 1     | ANY  | ``spec`` | ``num``  | 2     |
+   +-------+------+----------+----------+-------+
+   | 2     | VXLAN                              |
+   +-------+------------------------------------+
+   | 3     | Ethernet                           |
+   +-------+------+----------+----------+-------+
+   | 4     | ANY  | ``spec`` | ``num``  | 1     |
+   +-------+------+----------+----------+-------+
+   | 5     | TCP                                |
+   +-------+------------------------------------+
+   | 6     | END                                |
+   +-------+------------------------------------+
+
+Item: ``RAW``
+^^^^^^^^^^^^^
+
+Matches a byte string of a given length at a given offset.
+
+Offset is either absolute (using the start of the packet) or relative to the
+end of the previous matched item in the stack, in which case negative values
+are allowed.
+
+If search is enabled, offset is used as the starting point. The search area
+can be delimited by setting limit to a nonzero value, which is the maximum
+number of bytes after offset where the pattern may start.
+
+Matching a zero-length pattern is allowed, doing so resets the relative
+offset for subsequent items.
+
+- This type does not support ranges (``last`` field).
+
+.. _table_rte_flow_item_raw:
+
+.. table:: RAW
+
+   +----------+--------------+-------------------------------------------------+
+   | Field    | Subfield     | Value                                           |
+   +==========+==============+=================================================+
+   | ``spec`` | ``relative`` | look for pattern after the previous item        |
+   |          +--------------+-------------------------------------------------+
+   |          | ``search``   | search pattern from offset (see also ``limit``) |
+   |          +--------------+-------------------------------------------------+
+   |          | ``reserved`` | reserved, must be set to zero                   |
+   |          +--------------+-------------------------------------------------+
+   |          | ``offset``   | absolute or relative offset for ``pattern``     |
+   |          +--------------+-------------------------------------------------+
+   |          | ``limit``    | search area limit for start of ``pattern``      |
+   |          +--------------+-------------------------------------------------+
+   |          | ``length``   | ``pattern`` length                              |
+   |          +--------------+-------------------------------------------------+
+   |          | ``pattern``  | byte string to look for                         |
+   +----------+--------------+-------------------------------------------------+
+   | ``last`` | if specified, either all 0 or with the same values as ``spec`` |
+   +----------+----------------------------------------------------------------+
+   | ``mask`` | bit-mask applied to ``spec`` values with usual behavior        |
+   +----------+----------------------------------------------------------------+
+
+Example pattern looking for several strings at various offsets of a UDP
+payload, using combined RAW items:
+
+.. _table_rte_flow_item_raw_example:
+
+.. table:: UDP payload matching
+
+   +-------+------+----------+--------------+-------+
+   | Index | Item | Field    | Subfield     | Value |
+   +=======+======+==========+==============+=======+
+   | 0     | Ethernet                               |
+   +-------+----------------------------------------+
+   | 1     | IPv4                                   |
+   +-------+----------------------------------------+
+   | 2     | UDP                                    |
+   +-------+------+----------+--------------+-------+
+   | 3     | RAW  | ``spec`` | ``relative`` | 1     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``search``   | 1     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``offset``   | 10    |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``limit``    | 0     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``length``   | 3     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``pattern``  | "foo" |
+   +-------+------+----------+--------------+-------+
+   | 4     | RAW  | ``spec`` | ``relative`` | 1     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``search``   | 0     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``offset``   | 20    |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``limit``    | 0     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``length``   | 3     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``pattern``  | "bar" |
+   +-------+------+----------+--------------+-------+
+   | 5     | RAW  | ``spec`` | ``relative`` | 1     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``search``   | 0     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``offset``   | -29   |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``limit``    | 0     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``length``   | 3     |
+   |       |      |          +--------------+-------+
+   |       |      |          | ``pattern``  | "baz" |
+   +-------+------+----------+--------------+-------+
+   | 6     | END                                    |
+   +-------+----------------------------------------+
+
+This translates to:
+
+- Locate "foo" at least 10 bytes deep inside UDP payload.
+- Locate "bar" after "foo" plus 20 bytes.
+- Locate "baz" after "bar" minus 29 bytes.
+
+Such a packet may be represented as follows (not to scale)::
+
+ 0                     >= 10 B           == 20 B
+ |                  |<--------->|     |<--------->|
+ |                  |           |     |           |
+ |-----|------|-----|-----|-----|-----|-----------|-----|------|
+ | ETH | IPv4 | UDP | ... | baz | foo | ......... | bar | .... |
+ |-----|------|-----|-----|-----|-----|-----------|-----|------|
+                          |                             |
+                          |<--------------------------->|
+                                      == 29 B
+
+Note that matching subsequent pattern items would resume after "baz", not
+"bar" since matching is always performed after the previous item of the
+stack.
+
+Item: ``ETH``
+^^^^^^^^^^^^^
+
+Matches an Ethernet header.
+
+- ``dst``: destination MAC.
+- ``src``: source MAC.
+- ``type``: EtherType.
+
+Item: ``VLAN``
+^^^^^^^^^^^^^^
+
+Matches an 802.1Q/ad VLAN tag.
+
+- ``tpid``: tag protocol identifier.
+- ``tci``: tag control information.
+
+Item: ``IPV4``
+^^^^^^^^^^^^^^
+
+Matches an IPv4 header.
+
+Note: IPv4 options are handled by dedicated pattern items.
+
+- ``hdr``: IPv4 header definition (``rte_ip.h``).
+
+Item: ``IPV6``
+^^^^^^^^^^^^^^
+
+Matches an IPv6 header.
+
+Note: IPv6 options are handled by dedicated pattern items.
+
+- ``hdr``: IPv6 header definition (``rte_ip.h``).
+
+Item: ``ICMP``
+^^^^^^^^^^^^^^
+
+Matches an ICMP header.
+
+- ``hdr``: ICMP header definition (``rte_icmp.h``).
+
+Item: ``UDP``
+^^^^^^^^^^^^^
+
+Matches a UDP header.
+
+- ``hdr``: UDP header definition (``rte_udp.h``).
+
+Item: ``TCP``
+^^^^^^^^^^^^^
+
+Matches a TCP header.
+
+- ``hdr``: TCP header definition (``rte_tcp.h``).
+
+Item: ``SCTP``
+^^^^^^^^^^^^^^
+
+Matches a SCTP header.
+
+- ``hdr``: SCTP header definition (``rte_sctp.h``).
+
+Item: ``VXLAN``
+^^^^^^^^^^^^^^^
+
+Matches a VXLAN header (RFC 7348).
+
+- ``flags``: normally 0x08 (I flag).
+- ``rsvd0``: reserved, normally 0x000000.
+- ``vni``: VXLAN network identifier.
+- ``rsvd1``: reserved, normally 0x00.
+
+Actions
+~~~~~~~
+
+Each possible action is represented by a type. Some have associated
+configuration structures. Several actions combined in a list can be affected
+to a flow rule. That list is not ordered.
+
+They fall in three categories:
+
+- Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent
+  processing matched packets by subsequent flow rules, unless overridden
+  with PASSTHRU.
+
+- Non-terminating actions (PASSTHRU, DUP) that leave matched packets up for
+  additional processing by subsequent flow rules.
+
+- Other non-terminating meta actions that do not affect the fate of packets
+  (END, VOID, MARK, FLAG, COUNT).
+
+When several actions are combined in a flow rule, they should all have
+different types (e.g. dropping a packet twice is not possible).
+
+Only the last action of a given type is taken into account. PMDs still
+perform error checking on the entire list.
+
+Like matching patterns, action lists are terminated by END items.
+
+*Note that PASSTHRU is the only action able to override a terminating rule.*
+
+Example of action that redirects packets to queue index 10:
+
+.. _table_rte_flow_action_example:
+
+.. table:: Queue action
+
+   +-----------+-------+
+   | Field     | Value |
+   +===========+=======+
+   | ``index`` | 10    |
+   +-----------+-------+
+
+Action lists examples, their order is not significant, applications must
+consider all actions to be performed simultaneously:
+
+.. _table_rte_flow_count_and_drop:
+
+.. table:: Count and drop
+
+   +-------+--------+
+   | Index | Action |
+   +=======+========+
+   | 0     | COUNT  |
+   +-------+--------+
+   | 1     | DROP   |
+   +-------+--------+
+   | 2     | END    |
+   +-------+--------+
+
+|
+
+.. _table_rte_flow_mark_count_redirect:
+
+.. table:: Mark, count and redirect
+
+   +-------+--------+-----------+-------+
+   | Index | Action | Field     | Value |
+   +=======+========+===========+=======+
+   | 0     | MARK   | ``mark``  | 0x2a  |
+   +-------+--------+-----------+-------+
+   | 1     | COUNT                      |
+   +-------+--------+-----------+-------+
+   | 2     | QUEUE  | ``queue`` | 10    |
+   +-------+--------+-----------+-------+
+   | 3     | END                        |
+   +-------+----------------------------+
+
+|
+
+.. _table_rte_flow_redirect_queue_5:
+
+.. table:: Redirect to queue 5
+
+   +-------+--------+-----------+-------+
+   | Index | Action | Field     | Value |
+   +=======+========+===========+=======+
+   | 0     | DROP                       |
+   +-------+--------+-----------+-------+
+   | 1     | QUEUE  | ``queue`` | 5     |
+   +-------+--------+-----------+-------+
+   | 2     | END                        |
+   +-------+----------------------------+
+
+In the above example, considering both actions are performed simultaneously,
+the end result is that only QUEUE has any effect.
+
+.. _table_rte_flow_redirect_queue_3:
+
+.. table:: Redirect to queue 3
+
+   +-------+--------+-----------+-------+
+   | Index | Action | Field     | Value |
+   +=======+========+===========+=======+
+   | 0     | QUEUE  | ``queue`` | 5     |
+   +-------+--------+-----------+-------+
+   | 1     | VOID                       |
+   +-------+--------+-----------+-------+
+   | 2     | QUEUE  | ``queue`` | 3     |
+   +-------+--------+-----------+-------+
+   | 3     | END                        |
+   +-------+----------------------------+
+
+As previously described, only the last action of a given type found in the
+list is taken into account. The above example also shows that VOID is
+ignored.
+
+Action types
+~~~~~~~~~~~~
+
+Common action types are described in this section. Like pattern item types,
+this list is not exhaustive as new actions will be added in the future.
+
+Action: ``END``
+^^^^^^^^^^^^^^^
+
+End marker for action lists. Prevents further processing of actions, thereby
+ending the list.
+
+- Its numeric value is 0 for convenience.
+- PMD support is mandatory.
+- No configurable properties.
+
+.. _table_rte_flow_action_end:
+
+.. table:: END
+
+   +---------------+
+   | Field         |
+   +===============+
+   | no properties |
+   +---------------+
+
+Action: ``VOID``
+^^^^^^^^^^^^^^^^
+
+Used as a placeholder for convenience. It is ignored and simply discarded by
+PMDs.
+
+- PMD support is mandatory.
+- No configurable properties.
+
+.. _table_rte_flow_action_void:
+
+.. table:: VOID
+
+   +---------------+
+   | Field         |
+   +===============+
+   | no properties |
+   +---------------+
+
+Action: ``PASSTHRU``
+^^^^^^^^^^^^^^^^^^^^
+
+Leaves packets up for additional processing by subsequent flow rules. This
+is the default when a rule does not contain a terminating action, but can be
+specified to force a rule to become non-terminating.
+
+- No configurable properties.
+
+.. _table_rte_flow_action_passthru:
+
+.. table:: PASSTHRU
+
+   +---------------+
+   | Field         |
+   +===============+
+   | no properties |
+   +---------------+
+
+Example to copy a packet to a queue and continue processing by subsequent
+flow rules:
+
+.. _table_rte_flow_action_passthru_example:
+
+.. table:: Copy to queue 8
+
+   +-------+--------+-----------+-------+
+   | Index | Action | Field     | Value |
+   +=======+========+===========+=======+
+   | 0     | PASSTHRU                   |
+   +-------+--------+-----------+-------+
+   | 1     | QUEUE  | ``queue`` | 8     |
+   +-------+--------+-----------+-------+
+   | 2     | END                        |
+   +-------+----------------------------+
+
+Action: ``MARK``
+^^^^^^^^^^^^^^^^
+
+Attaches a 32 bit value to packets.
+
+This value is arbitrary and application-defined. For compatibility with FDIR
+it is returned in the ``hash.fdir.hi`` mbuf field. ``PKT_RX_FDIR_ID`` is
+also set in ``ol_flags``.
+
+.. _table_rte_flow_action_mark:
+
+.. table:: MARK
+
+   +--------+-------------------------------------+
+   | Field  | Value                               |
+   +========+=====================================+
+   | ``id`` | 32 bit value to return with packets |
+   +--------+-------------------------------------+
+
+Action: ``FLAG``
+^^^^^^^^^^^^^^^^
+
+Flag packets. Similar to `Action: MARK`_ but only affects ``ol_flags``.
+
+- No configurable properties.
+
+Note: a distinctive flag must be defined for it.
+
+.. _table_rte_flow_action_flag:
+
+.. table:: FLAG
+
+   +---------------+
+   | Field         |
+   +===============+
+   | no properties |
+   +---------------+
+
+Action: ``QUEUE``
+^^^^^^^^^^^^^^^^^
+
+Assigns packets to a given queue index.
+
+- Terminating by default.
+
+.. _table_rte_flow_action_queue:
+
+.. table:: QUEUE
+
+   +-----------+--------------------+
+   | Field     | Value              |
+   +===========+====================+
+   | ``index`` | queue index to use |
+   +-----------+--------------------+
+
+Action: ``DROP``
+^^^^^^^^^^^^^^^^
+
+Drop packets.
+
+- No configurable properties.
+- Terminating by default.
+- PASSTHRU overrides this action if both are specified.
+
+.. _table_rte_flow_action_drop:
+
+.. table:: DROP
+
+   +---------------+
+   | Field         |
+   +===============+
+   | no properties |
+   +---------------+
+
+Action: ``COUNT``
+^^^^^^^^^^^^^^^^^
+
+Enables counters for this rule.
+
+These counters can be retrieved and reset through ``rte_flow_query()``, see
+``struct rte_flow_query_count``.
+
+- Counters can be retrieved with ``rte_flow_query()``.
+- No configurable properties.
+
+.. _table_rte_flow_action_count:
+
+.. table:: COUNT
+
+   +---------------+
+   | Field         |
+   +===============+
+   | no properties |
+   +---------------+
+
+Query structure to retrieve and reset flow rule counters:
+
+.. _table_rte_flow_query_count:
+
+.. table:: COUNT query
+
+   +---------------+-----+-----------------------------------+
+   | Field         | I/O | Value                             |
+   +===============+=====+===================================+
+   | ``reset``     | in  | reset counter after query         |
+   +---------------+-----+-----------------------------------+
+   | ``hits_set``  | out | ``hits`` field is set             |
+   +---------------+-----+-----------------------------------+
+   | ``bytes_set`` | out | ``bytes`` field is set            |
+   +---------------+-----+-----------------------------------+
+   | ``hits``      | out | number of hits for this rule      |
+   +---------------+-----+-----------------------------------+
+   | ``bytes``     | out | number of bytes through this rule |
+   +---------------+-----+-----------------------------------+
+
+Action: ``DUP``
+^^^^^^^^^^^^^^^
+
+Duplicates packets to a given queue index.
+
+This is normally combined with QUEUE, however when used alone, it is
+actually similar to QUEUE + PASSTHRU.
+
+- Non-terminating by default.
+
+.. _table_rte_flow_action_dup:
+
+.. table:: DUP
+
+   +-----------+------------------------------------+
+   | Field     | Value                              |
+   +===========+====================================+
+   | ``index`` | queue index to duplicate packet to |
+   +-----------+------------------------------------+
+
+Action: ``RSS``
+^^^^^^^^^^^^^^^
+
+Similar to QUEUE, except RSS is additionally performed on packets to spread
+them among several queues according to the provided parameters.
+
+Note: RSS hash result is normally stored in the ``hash.rss`` mbuf field,
+however it conflicts with `Action: MARK`_ as they share the same space. When
+both actions are specified, the RSS hash is discarded and
+``PKT_RX_RSS_HASH`` is not set in ``ol_flags``. MARK has priority. The mbuf
+structure should eventually evolve to store both.
+
+- Terminating by default.
+
+.. _table_rte_flow_action_rss:
+
+.. table:: RSS
+
+   +--------------+------------------------------+
+   | Field        | Value                        |
+   +==============+==============================+
+   | ``rss_conf`` | RSS parameters               |
+   +--------------+------------------------------+
+   | ``num``      | number of entries in queue[] |
+   +--------------+------------------------------+
+   | ``queue[]``  | queue indices to use         |
+   +--------------+------------------------------+
+
+Action: ``PF``
+^^^^^^^^^^^^^^
+
+Redirects packets to the physical function (PF) of the current device.
+
+- No configurable properties.
+- Terminating by default.
+
+.. _table_rte_flow_action_pf:
+
+.. table:: PF
+
+   +---------------+
+   | Field         |
+   +===============+
+   | no properties |
+   +---------------+
+
+Action: ``VF``
+^^^^^^^^^^^^^^
+
+Redirects packets to a virtual function (VF) of the current device.
+
+Packets matched by a VF pattern item can be redirected to their original VF
+ID instead of the specified one. This parameter may not be available and is
+not guaranteed to work properly if the VF part is matched by a prior flow
+rule or if packets are not addressed to a VF in the first place.
+
+- Terminating by default.
+
+.. _table_rte_flow_action_vf:
+
+.. table:: VF
+
+   +--------------+--------------------------------+
+   | Field        | Value                          |
+   +==============+================================+
+   | ``original`` | use original VF ID if possible |
+   +--------------+--------------------------------+
+   | ``vf``       | VF ID to redirect packets to   |
+   +--------------+--------------------------------+
+
+Negative types
+~~~~~~~~~~~~~~
+
+All specified pattern items (``enum rte_flow_item_type``) and actions
+(``enum rte_flow_action_type``) use positive identifiers.
+
+The negative space is reserved for dynamic types generated by PMDs during
+run-time. PMDs may encounter them as a result but must not accept negative
+identifiers they are not aware of.
+
+A method to generate them remains to be defined.
+
+Planned types
+~~~~~~~~~~~~~
+
+Pattern item types will be added as new protocols are implemented.
+
+Variable headers support through dedicated pattern items, for example in
+order to match specific IPv4 options and IPv6 extension headers would be
+stacked after IPv4/IPv6 items.
+
+Other action types are planned but are not defined yet. These include the
+ability to alter packet data in several ways, such as performing
+encapsulation/decapsulation of tunnel headers.
+
+Rules management
+----------------
+
+A rather simple API with few functions is provided to fully manage flow
+rules.
+
+Each created flow rule is associated with an opaque, PMD-specific handle
+pointer. The application is responsible for keeping it until the rule is
+destroyed.
+
+Flows rules are represented by ``struct rte_flow`` objects.
+
+Validation
+~~~~~~~~~~
+
+Given that expressing a definite set of device capabilities is not
+practical, a dedicated function is provided to check if a flow rule is
+supported and can be created.
+
+.. code-block:: c
+
+   int
+   rte_flow_validate(uint8_t port_id,
+                     const struct rte_flow_attr *attr,
+                     const struct rte_flow_item pattern[],
+                     const struct rte_flow_action actions[],
+                     struct rte_flow_error *error);
+
+While this function has no effect on the target device, the flow rule is
+validated against its current configuration state and the returned value
+should be considered valid by the caller for that state only.
+
+The returned value is guaranteed to remain valid only as long as no
+successful calls to ``rte_flow_create()`` or ``rte_flow_destroy()`` are made
+in the meantime and no device parameter affecting flow rules in any way are
+modified, due to possible collisions or resource limitations (although in
+such cases ``EINVAL`` should not be returned).
+
+Arguments:
+
+- ``port_id``: port identifier of Ethernet device.
+- ``attr``: flow rule attributes.
+- ``pattern``: pattern specification (list terminated by the END pattern
+  item).
+- ``actions``: associated actions (list terminated by the END action).
+- ``error``: perform verbose error reporting if not NULL. PMDs initialize
+  this structure in case of error only.
+
+Return values:
+
+- 0 if flow rule is valid and can be created. A negative errno value
+  otherwise (``rte_errno`` is also set), the following errors are defined.
+- ``-ENOSYS``: underlying device does not support this functionality.
+- ``-EINVAL``: unknown or invalid rule specification.
+- ``-ENOTSUP``: valid but unsupported rule specification (e.g. partial
+  bit-masks are unsupported).
+- ``-EEXIST``: collision with an existing rule.
+- ``-ENOMEM``: not enough resources.
+- ``-EBUSY``: action cannot be performed due to busy device resources, may
+  succeed if the affected queues or even the entire port are in a stopped
+  state (see ``rte_eth_dev_rx_queue_stop()`` and ``rte_eth_dev_stop()``).
+
+Creation
+~~~~~~~~
+
+Creating a flow rule is similar to validating one, except the rule is
+actually created and a handle returned.
+
+.. code-block:: c
+
+   struct rte_flow *
+   rte_flow_create(uint8_t port_id,
+                   const struct rte_flow_attr *attr,
+                   const struct rte_flow_item pattern[],
+                   const struct rte_flow_action *actions[],
+                   struct rte_flow_error *error);
+
+Arguments:
+
+- ``port_id``: port identifier of Ethernet device.
+- ``attr``: flow rule attributes.
+- ``pattern``: pattern specification (list terminated by the END pattern
+  item).
+- ``actions``: associated actions (list terminated by the END action).
+- ``error``: perform verbose error reporting if not NULL. PMDs initialize
+  this structure in case of error only.
+
+Return values:
+
+A valid handle in case of success, NULL otherwise and ``rte_errno`` is set
+to the positive version of one of the error codes defined for
+``rte_flow_validate()``.
+
+Destruction
+~~~~~~~~~~~
+
+Flow rules destruction is not automatic, and a queue or a port should not be
+released if any are still attached to them. Applications must take care of
+performing this step before releasing resources.
+
+.. code-block:: c
+
+   int
+   rte_flow_destroy(uint8_t port_id,
+                    struct rte_flow *flow,
+                    struct rte_flow_error *error);
+
+
+Failure to destroy a flow rule handle may occur when other flow rules depend
+on it, and destroying it would result in an inconsistent state.
+
+This function is only guaranteed to succeed if handles are destroyed in
+reverse order of their creation.
+
+Arguments:
+
+- ``port_id``: port identifier of Ethernet device.
+- ``flow``: flow rule handle to destroy.
+- ``error``: perform verbose error reporting if not NULL. PMDs initialize
+  this structure in case of error only.
+
+Return values:
+
+- 0 on success, a negative errno value otherwise and ``rte_errno`` is set.
+
+Flush
+~~~~~
+
+Convenience function to destroy all flow rule handles associated with a
+port. They are released as with successive calls to ``rte_flow_destroy()``.
+
+.. code-block:: c
+
+   int
+   rte_flow_flush(uint8_t port_id,
+                  struct rte_flow_error *error);
+
+In the unlikely event of failure, handles are still considered destroyed and
+no longer valid but the port must be assumed to be in an inconsistent state.
+
+Arguments:
+
+- ``port_id``: port identifier of Ethernet device.
+- ``error``: perform verbose error reporting if not NULL. PMDs initialize
+  this structure in case of error only.
+
+Return values:
+
+- 0 on success, a negative errno value otherwise and ``rte_errno`` is set.
+
+Query
+~~~~~
+
+Query an existing flow rule.
+
+This function allows retrieving flow-specific data such as counters. Data
+is gathered by special actions which must be present in the flow rule
+definition.
+
+.. code-block:: c
+
+   int
+   rte_flow_query(uint8_t port_id,
+                  struct rte_flow *flow,
+                  enum rte_flow_action_type action,
+                  void *data,
+                  struct rte_flow_error *error);
+
+Arguments:
+
+- ``port_id``: port identifier of Ethernet device.
+- ``flow``: flow rule handle to query.
+- ``action``: action type to query.
+- ``data``: pointer to storage for the associated query data type.
+- ``error``: perform verbose error reporting if not NULL. PMDs initialize
+  this structure in case of error only.
+
+Return values:
+
+- 0 on success, a negative errno value otherwise and ``rte_errno`` is set.
+
+Verbose error reporting
+-----------------------
+
+The defined *errno* values may not be accurate enough for users or
+application developers who want to investigate issues related to flow rules
+management. A dedicated error object is defined for this purpose:
+
+.. code-block:: c
+
+   enum rte_flow_error_type {
+       RTE_FLOW_ERROR_TYPE_NONE, /**< No error. */
+       RTE_FLOW_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */
+       RTE_FLOW_ERROR_TYPE_HANDLE, /**< Flow rule (handle). */
+       RTE_FLOW_ERROR_TYPE_ATTR_GROUP, /**< Group field. */
+       RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, /**< Priority field. */
+       RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, /**< Ingress field. */
+       RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */
+       RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */
+       RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */
+       RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */
+       RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */
+       RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */
+   };
+
+   struct rte_flow_error {
+       enum rte_flow_error_type type; /**< Cause field and error types. */
+       const void *cause; /**< Object responsible for the error. */
+       const char *message; /**< Human-readable error message. */
+   };
+
+Error type ``RTE_FLOW_ERROR_TYPE_NONE`` stands for no error, in which case
+remaining fields can be ignored. Other error types describe the type of the
+object pointed by ``cause``.
+
+If non-NULL, ``cause`` points to the object responsible for the error. For a
+flow rule, this may be a pattern item or an individual action.
+
+If non-NULL, ``message`` provides a human-readable error message.
+
+This object is normally allocated by applications and set by PMDs in case of
+error, the message points to a constant string which does not need to be
+freed by the application, however its pointer can be considered valid only
+as long as its associated DPDK port remains configured. Closing the
+underlying device or unloading the PMD invalidates it.
+
+Caveats
+-------
+
+- DPDK does not keep track of flow rules definitions or flow rule objects
+  automatically. Applications may keep track of the former and must keep
+  track of the latter. PMDs may also do it for internal needs, however this
+  must not be relied on by applications.
+
+- Flow rules are not maintained between successive port initializations. An
+  application exiting without releasing them and restarting must re-create
+  them from scratch.
+
+- API operations are synchronous and blocking (``EAGAIN`` cannot be
+  returned).
+
+- There is no provision for reentrancy/multi-thread safety, although nothing
+  should prevent different devices from being configured at the same
+  time. PMDs may protect their control path functions accordingly.
+
+- Stopping the data path (TX/RX) should not be necessary when managing flow
+  rules. If this cannot be achieved naturally or with workarounds (such as
+  temporarily replacing the burst function pointers), an appropriate error
+  code must be returned (``EBUSY``).
+
+- PMDs, not applications, are responsible for maintaining flow rules
+  configuration when stopping and restarting a port or performing other
+  actions which may affect them. They can only be destroyed explicitly by
+  applications.
+
+For devices exposing multiple ports sharing global settings affected by flow
+rules:
+
+- All ports under DPDK control must behave consistently, PMDs are
+  responsible for making sure that existing flow rules on a port are not
+  affected by other ports.
+
+- Ports not under DPDK control (unaffected or handled by other applications)
+  are user's responsibility. They may affect existing flow rules and cause
+  undefined behavior. PMDs aware of this may prevent flow rules creation
+  altogether in such cases.
+
+PMD interface
+-------------
+
+The PMD interface is defined in ``rte_flow_driver.h``. It is not subject to
+API/ABI versioning constraints as it is not exposed to applications and may
+evolve independently.
+
+It is currently implemented on top of the legacy filtering framework through
+filter type *RTE_ETH_FILTER_GENERIC* that accepts the single operation
+*RTE_ETH_FILTER_GET* to return PMD-specific *rte_flow* callbacks wrapped
+inside ``struct rte_flow_ops``.
+
+This overhead is temporarily necessary in order to keep compatibility with
+the legacy filtering framework, which should eventually disappear.
+
+- PMD callbacks implement exactly the interface described in `Rules
+  management`_, except for the port ID argument which has already been
+  converted to a pointer to the underlying ``struct rte_eth_dev``.
+
+- Public API functions do not process flow rules definitions at all before
+  calling PMD functions (no basic error checking, no validation
+  whatsoever). They only make sure these callbacks are non-NULL or return
+  the ``ENOSYS`` (function not supported) error.
+
+This interface additionally defines the following helper functions:
+
+- ``rte_flow_ops_get()``: get generic flow operations structure from a
+  port.
+
+- ``rte_flow_error_set()``: initialize generic flow error structure.
+
+More will be added over time.
+
+Device compatibility
+--------------------
+
+No known implementation supports all the described features.
+
+Unsupported features or combinations are not expected to be fully emulated
+in software by PMDs for performance reasons. Partially supported features
+may be completed in software as long as hardware performs most of the work
+(such as queue redirection and packet recognition).
+
+However PMDs are expected to do their best to satisfy application requests
+by working around hardware limitations as long as doing so does not affect
+the behavior of existing flow rules.
+
+The following sections provide a few examples of such cases and describe how
+PMDs should handle them, they are based on limitations built into the
+previous APIs.
+
+Global bit-masks
+~~~~~~~~~~~~~~~~
+
+Each flow rule comes with its own, per-layer bit-masks, while hardware may
+support only a single, device-wide bit-mask for a given layer type, so that
+two IPv4 rules cannot use different bit-masks.
+
+The expected behavior in this case is that PMDs automatically configure
+global bit-masks according to the needs of the first flow rule created.
+
+Subsequent rules are allowed only if their bit-masks match those, the
+``EEXIST`` error code should be returned otherwise.
+
+Unsupported layer types
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Many protocols can be simulated by crafting patterns with the `Item: RAW`_
+type.
+
+PMDs can rely on this capability to simulate support for protocols with
+headers not directly recognized by hardware.
+
+``ANY`` pattern item
+~~~~~~~~~~~~~~~~~~~~
+
+This pattern item stands for anything, which can be difficult to translate
+to something hardware would understand, particularly if followed by more
+specific types.
+
+Consider the following pattern:
+
+.. _table_rte_flow_unsupported_any:
+
+.. table:: Pattern with ANY as L3
+
+   +-------+-----------------------+
+   | Index | Item                  |
+   +=======+=======================+
+   | 0     | ETHER                 |
+   +-------+-----+---------+-------+
+   | 1     | ANY | ``num`` | ``1`` |
+   +-------+-----+---------+-------+
+   | 2     | TCP                   |
+   +-------+-----------------------+
+   | 3     | END                   |
+   +-------+-----------------------+
+
+Knowing that TCP does not make sense with something other than IPv4 and IPv6
+as L3, such a pattern may be translated to two flow rules instead:
+
+.. _table_rte_flow_unsupported_any_ipv4:
+
+.. table:: ANY replaced with IPV4
+
+   +-------+--------------------+
+   | Index | Item               |
+   +=======+====================+
+   | 0     | ETHER              |
+   +-------+--------------------+
+   | 1     | IPV4 (zeroed mask) |
+   +-------+--------------------+
+   | 2     | TCP                |
+   +-------+--------------------+
+   | 3     | END                |
+   +-------+--------------------+
+
+|
+
+.. _table_rte_flow_unsupported_any_ipv6:
+
+.. table:: ANY replaced with IPV6
+
+   +-------+--------------------+
+   | Index | Item               |
+   +=======+====================+
+   | 0     | ETHER              |
+   +-------+--------------------+
+   | 1     | IPV6 (zeroed mask) |
+   +-------+--------------------+
+   | 2     | TCP                |
+   +-------+--------------------+
+   | 3     | END                |
+   +-------+--------------------+
+
+Note that as soon as a ANY rule covers several layers, this approach may
+yield a large number of hidden flow rules. It is thus suggested to only
+support the most common scenarios (anything as L2 and/or L3).
+
+Unsupported actions
+~~~~~~~~~~~~~~~~~~~
+
+- When combined with `Action: QUEUE`_, packet counting (`Action: COUNT`_)
+  and tagging (`Action: MARK`_ or `Action: FLAG`_) may be implemented in
+  software as long as the target queue is used by a single rule.
+
+- A rule specifying both `Action: DUP`_ + `Action: QUEUE`_ may be translated
+  to two hidden rules combining `Action: QUEUE`_ and `Action: PASSTHRU`_.
+
+- When a single target queue is provided, `Action: RSS`_ can also be
+  implemented through `Action: QUEUE`_.
+
+Flow rules priority
+~~~~~~~~~~~~~~~~~~~
+
+While it would naturally make sense, flow rules cannot be assumed to be
+processed by hardware in the same order as their creation for several
+reasons:
+
+- They may be managed internally as a tree or a hash table instead of a
+  list.
+- Removing a flow rule before adding another one can either put the new rule
+  at the end of the list or reuse a freed entry.
+- Duplication may occur when packets are matched by several rules.
+
+For overlapping rules (particularly in order to use `Action: PASSTHRU`_)
+predictable behavior is only guaranteed by using different priority levels.
+
+Priority levels are not necessarily implemented in hardware, or may be
+severely limited (e.g. a single priority bit).
+
+For these reasons, priority levels may be implemented purely in software by
+PMDs.
+
+- For devices expecting flow rules to be added in the correct order, PMDs
+  may destroy and re-create existing rules after adding a new one with
+  a higher priority.
+
+- A configurable number of dummy or empty rules can be created at
+  initialization time to save high priority slots for later.
+
+- In order to save priority levels, PMDs may evaluate whether rules are
+  likely to collide and adjust their priority accordingly.
+
+Future evolutions
+-----------------
+
+- A device profile selection function which could be used to force a
+  permanent profile instead of relying on its automatic configuration based
+  on existing flow rules.
+
+- A method to optimize *rte_flow* rules with specific pattern items and
+  action types generated on the fly by PMDs. DPDK should assign negative
+  numbers to these in order to not collide with the existing types. See
+  `Negative types`_.
+
+- Adding specific egress pattern items and actions as described in
+  `Attribute: Traffic direction`_.
+
+- Optional software fallback when PMDs are unable to handle requested flow
+  rules so applications do not have to implement their own.
+
+API migration
+-------------
+
+Exhaustive list of deprecated filter types (normally prefixed with
+*RTE_ETH_FILTER_*) found in ``rte_eth_ctrl.h`` and methods to convert them
+to *rte_flow* rules.
+
+``MACVLAN`` to ``ETH`` → ``VF``, ``PF``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*MACVLAN* can be translated to a basic `Item: ETH`_ flow rule with a
+terminating `Action: VF`_ or `Action: PF`_.
+
+.. _table_rte_flow_migration_macvlan:
+
+.. table:: MACVLAN conversion
+
+   +--------------------------+---------+
+   | Pattern                  | Actions |
+   +===+=====+==========+=====+=========+
+   | 0 | ETH | ``spec`` | any | VF,     |
+   |   |     +----------+-----+ PF      |
+   |   |     | ``last`` | N/A |         |
+   |   |     +----------+-----+         |
+   |   |     | ``mask`` | any |         |
+   +---+-----+----------+-----+---------+
+   | 1 | END                  | END     |
+   +---+----------------------+---------+
+
+``ETHERTYPE`` to ``ETH`` → ``QUEUE``, ``DROP``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*ETHERTYPE* is basically an `Item: ETH`_ flow rule with a terminating
+`Action: QUEUE`_ or `Action: DROP`_.
+
+.. _table_rte_flow_migration_ethertype:
+
+.. table:: ETHERTYPE conversion
+
+   +--------------------------+---------+
+   | Pattern                  | Actions |
+   +===+=====+==========+=====+=========+
+   | 0 | ETH | ``spec`` | any | QUEUE,  |
+   |   |     +----------+-----+ DROP    |
+   |   |     | ``last`` | N/A |         |
+   |   |     +----------+-----+         |
+   |   |     | ``mask`` | any |         |
+   +---+-----+----------+-----+---------+
+   | 1 | END                  | END     |
+   +---+----------------------+---------+
+
+``FLEXIBLE`` to ``RAW`` → ``QUEUE``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*FLEXIBLE* can be translated to one `Item: RAW`_ pattern with a terminating
+`Action: QUEUE`_ and a defined priority level.
+
+.. _table_rte_flow_migration_flexible:
+
+.. table:: FLEXIBLE conversion
+
+   +--------------------------+---------+
+   | Pattern                  | Actions |
+   +===+=====+==========+=====+=========+
+   | 0 | RAW | ``spec`` | any | QUEUE   |
+   |   |     +----------+-----+         |
+   |   |     | ``last`` | N/A |         |
+   |   |     +----------+-----+         |
+   |   |     | ``mask`` | any |         |
+   +---+-----+----------+-----+---------+
+   | 1 | END                  | END     |
+   +---+----------------------+---------+
+
+``SYN`` to ``TCP`` → ``QUEUE``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*SYN* is a `Item: TCP`_ rule with only the ``syn`` bit enabled and masked,
+and a terminating `Action: QUEUE`_.
+
+Priority level can be set to simulate the high priority bit.
+
+.. _table_rte_flow_migration_syn:
+
+.. table:: SYN conversion
+
+   +-----------------------------------+---------+
+   | Pattern                           | Actions |
+   +===+======+==========+=============+=========+
+   | 0 | ETH  | ``spec`` | unset       | QUEUE   |
+   |   |      +----------+-------------+         |
+   |   |      | ``last`` | unset       |         |
+   |   |      +----------+-------------+         |
+   |   |      | ``mask`` | unset       |         |
+   +---+------+----------+-------------+         |
+   | 1 | IPV4 | ``spec`` | unset       |         |
+   |   |      +----------+-------------+         |
+   |   |      | ``mask`` | unset       |         |
+   |   |      +----------+-------------+         |
+   |   |      | ``mask`` | unset       |         |
+   +---+------+----------+---------+---+         |
+   | 2 | TCP  | ``spec`` | ``syn`` | 1 |         |
+   |   |      +----------+---------+---+         |
+   |   |      | ``mask`` | ``syn`` | 1 |         |
+   +---+------+----------+---------+---+---------+
+   | 3 | END                           | END     |
+   +---+-------------------------------+---------+
+
+``NTUPLE`` to ``IPV4``, ``TCP``, ``UDP`` → ``QUEUE``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*NTUPLE* is similar to specifying an empty L2, `Item: IPV4`_ as L3 with
+`Item: TCP`_ or `Item: UDP`_ as L4 and a terminating `Action: QUEUE`_.
+
+A priority level can be specified as well.
+
+.. _table_rte_flow_migration_ntuple:
+
+.. table:: NTUPLE conversion
+
+   +-----------------------------+---------+
+   | Pattern                     | Actions |
+   +===+======+==========+=======+=========+
+   | 0 | ETH  | ``spec`` | unset | QUEUE   |
+   |   |      +----------+-------+         |
+   |   |      | ``last`` | unset |         |
+   |   |      +----------+-------+         |
+   |   |      | ``mask`` | unset |         |
+   +---+------+----------+-------+         |
+   | 1 | IPV4 | ``spec`` | any   |         |
+   |   |      +----------+-------+         |
+   |   |      | ``last`` | unset |         |
+   |   |      +----------+-------+         |
+   |   |      | ``mask`` | any   |         |
+   +---+------+----------+-------+         |
+   | 2 | TCP, | ``spec`` | any   |         |
+   |   | UDP  +----------+-------+         |
+   |   |      | ``last`` | unset |         |
+   |   |      +----------+-------+         |
+   |   |      | ``mask`` | any   |         |
+   +---+------+----------+-------+---------+
+   | 3 | END                     | END     |
+   +---+-------------------------+---------+
+
+``TUNNEL`` to ``ETH``, ``IPV4``, ``IPV6``, ``VXLAN`` (or other) → ``QUEUE``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*TUNNEL* matches common IPv4 and IPv6 L3/L4-based tunnel types.
+
+In the following table, `Item: ANY`_ is used to cover the optional L4.
+
+.. _table_rte_flow_migration_tunnel:
+
+.. table:: TUNNEL conversion
+
+   +--------------------------------------+---------+
+   | Pattern                              | Actions |
+   +===+=========+==========+=============+=========+
+   | 0 | ETH     | ``spec`` | any         | QUEUE   |
+   |   |         +----------+-------------+         |
+   |   |         | ``last`` | unset       |         |
+   |   |         +----------+-------------+         |
+   |   |         | ``mask`` | any         |         |
+   +---+---------+----------+-------------+         |
+   | 1 | IPV4,   | ``spec`` | any         |         |
+   |   | IPV6    +----------+-------------+         |
+   |   |         | ``last`` | unset       |         |
+   |   |         +----------+-------------+         |
+   |   |         | ``mask`` | any         |         |
+   +---+---------+----------+-------------+         |
+   | 2 | ANY     | ``spec`` | any         |         |
+   |   |         +----------+-------------+         |
+   |   |         | ``last`` | unset       |         |
+   |   |         +----------+---------+---+         |
+   |   |         | ``mask`` | ``num`` | 0 |         |
+   +---+---------+----------+---------+---+         |
+   | 3 | VXLAN,  | ``spec`` | any         |         |
+   |   | GENEVE, +----------+-------------+         |
+   |   | TEREDO, | ``last`` | unset       |         |
+   |   | NVGRE,  +----------+-------------+         |
+   |   | GRE,    | ``mask`` | any         |         |
+   |   | ...     |          |             |         |
+   |   |         |          |             |         |
+   |   |         |          |             |         |
+   +---+---------+----------+-------------+---------+
+   | 4 | END                              | END     |
+   +---+----------------------------------+---------+
+
+``FDIR`` to most item types → ``QUEUE``, ``DROP``, ``PASSTHRU``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+*FDIR* is more complex than any other type, there are several methods to
+emulate its functionality. It is summarized for the most part in the table
+below.
+
+A few features are intentionally not supported:
+
+- The ability to configure the matching input set and masks for the entire
+  device, PMDs should take care of it automatically according to the
+  requested flow rules.
+
+  For example if a device supports only one bit-mask per protocol type,
+  source/address IPv4 bit-masks can be made immutable by the first created
+  rule. Subsequent IPv4 or TCPv4 rules can only be created if they are
+  compatible.
+
+  Note that only protocol bit-masks affected by existing flow rules are
+  immutable, others can be changed later. They become mutable again after
+  the related flow rules are destroyed.
+
+- Returning four or eight bytes of matched data when using flex bytes
+  filtering. Although a specific action could implement it, it conflicts
+  with the much more useful 32 bits tagging on devices that support it.
+
+- Side effects on RSS processing of the entire device. Flow rules that
+  conflict with the current device configuration should not be
+  allowed. Similarly, device configuration should not be allowed when it
+  affects existing flow rules.
+
+- Device modes of operation. "none" is unsupported since filtering cannot be
+  disabled as long as a flow rule is present.
+
+- "MAC VLAN" or "tunnel" perfect matching modes should be automatically set
+  according to the created flow rules.
+
+- Signature mode of operation is not defined but could be handled through a
+  specific item type if needed.
+
+.. _table_rte_flow_migration_fdir:
+
+.. table:: FDIR conversion
+
+   +---------------------------------+------------+
+   | Pattern                         | Actions    |
+   +===+============+==========+=====+============+
+   | 0 | ETH,       | ``spec`` | any | QUEUE,     |
+   |   | RAW        +----------+-----+ DROP,      |
+   |   |            | ``last`` | N/A | PASSTHRU   |
+   |   |            +----------+-----+            |
+   |   |            | ``mask`` | any |            |
+   +---+------------+----------+-----+------------+
+   | 1 | IPV4,      | ``spec`` | any | MARK       |
+   |   | IPV6       +----------+-----+            |
+   |   |            | ``last`` | N/A |            |
+   |   |            +----------+-----+            |
+   |   |            | ``mask`` | any |            |
+   +---+------------+----------+-----+            |
+   | 2 | TCP,       | ``spec`` | any |            |
+   |   | UDP,       +----------+-----+            |
+   |   | SCTP       | ``last`` | N/A |            |
+   |   |            +----------+-----+            |
+   |   |            | ``mask`` | any |            |
+   +---+------------+----------+-----+            |
+   | 3 | VF,        | ``spec`` | any |            |
+   |   | PF         +----------+-----+            |
+   |   | (optional) | ``last`` | N/A |            |
+   |   |            +----------+-----+            |
+   |   |            | ``mask`` | any |            |
+   +---+------------+----------+-----+------------+
+   | 4 | END                         | END        |
+   +---+-----------------------------+------------+
+
+``HASH``
+~~~~~~~~
+
+There is no counterpart to this filter type because it translates to a
+global device setting instead of a pattern item. Device settings are
+automatically set according to the created flow rules.
+
+``L2_TUNNEL`` to ``VOID`` → ``VXLAN`` (or others)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+All packets are matched. This type alters incoming packets to encapsulate
+them in a chosen tunnel type, optionally redirect them to a VF as well.
+
+The destination pool for tag based forwarding can be emulated with other
+flow rules using `Action: DUP`_.
+
+.. _table_rte_flow_migration_l2tunnel:
+
+.. table:: L2_TUNNEL conversion
+
+   +---------------------------+------------+
+   | Pattern                   | Actions    |
+   +===+======+==========+=====+============+
+   | 0 | VOID | ``spec`` | N/A | VXLAN,     |
+   |   |      |          |     | GENEVE,    |
+   |   |      |          |     | ...        |
+   |   |      +----------+-----+------------+
+   |   |      | ``last`` | N/A | VF         |
+   |   |      +----------+-----+ (optional) |
+   |   |      | ``mask`` | N/A |            |
+   |   |      |          |     |            |
+   +---+------+----------+-----+------------+
+   | 1 | END                   | END        |
+   +---+-----------------------+------------+
-- 
2.1.4

^ permalink raw reply related

* [PATCH v3 03/25] doc: announce deprecation of legacy filter types
From: Adrien Mazarguil @ 2016-12-19 17:48 UTC (permalink / raw)
  To: dev
In-Reply-To: <cover.1482168851.git.adrien.mazarguil@6wind.com>

They are superseded by the generic flow API (rte_flow). Target release is
not defined yet.

Suggested-by: Kevin Traynor <ktraynor@redhat.com>
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Olga Shern <olgas@mellanox.com>
---
 doc/guides/rel_notes/deprecation.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 2d17bc6..1438c77 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -71,3 +71,11 @@ Deprecation Notices
 * mempool: The functions for single/multi producer/consumer are deprecated
   and will be removed in 17.02.
   It is replaced by ``rte_mempool_generic_get/put`` functions.
+
+* ethdev: the legacy filter API, including
+  ``rte_eth_dev_filter_supported()``, ``rte_eth_dev_filter_ctrl()`` as well
+  as filter types MACVLAN, ETHERTYPE, FLEXIBLE, SYN, NTUPLE, TUNNEL, FDIR,
+  HASH and L2_TUNNEL, is superseded by the generic flow API (rte_flow) in
+  PMDs that implement the latter.
+  Target release for removal of the legacy API will be defined once most
+  PMDs have switched to rte_flow.
-- 
2.1.4

^ permalink raw reply related

* [PATCH v3 01/25] ethdev: introduce generic flow API
From: Adrien Mazarguil @ 2016-12-19 17:48 UTC (permalink / raw)
  To: dev
In-Reply-To: <cover.1482168851.git.adrien.mazarguil@6wind.com>

This new API supersedes all the legacy filter types described in
rte_eth_ctrl.h. It is slightly higher level and as a result relies more on
PMDs to process and validate flow rules.

Benefits:

- A unified API is easier to program for, applications do not have to be
  written for a specific filter type which may or may not be supported by
  the underlying device.

- The behavior of a flow rule is the same regardless of the underlying
  device, applications do not need to be aware of hardware quirks.

- Extensible by design, API/ABI breakage should rarely occur if at all.

- Documentation is self-standing, no need to look up elsewhere.

Existing filter types will be deprecated and removed in the near future.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Acked-by: Olga Shern <olgas@mellanox.com>
---
 MAINTAINERS                            |   4 +
 doc/api/doxy-api-index.md              |   2 +
 lib/librte_ether/Makefile              |   3 +
 lib/librte_ether/rte_eth_ctrl.h        |   1 +
 lib/librte_ether/rte_ether_version.map |  11 +
 lib/librte_ether/rte_flow.c            | 159 +++++
 lib/librte_ether/rte_flow.h            | 947 ++++++++++++++++++++++++++++
 lib/librte_ether/rte_flow_driver.h     | 182 ++++++
 8 files changed, 1309 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 26d9590..5975cff 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -243,6 +243,10 @@ M: Thomas Monjalon <thomas.monjalon@6wind.com>
 F: lib/librte_ether/
 F: scripts/test-null.sh
 
+Generic flow API
+M: Adrien Mazarguil <adrien.mazarguil@6wind.com>
+F: lib/librte_ether/rte_flow*
+
 Crypto API
 M: Declan Doherty <declan.doherty@intel.com>
 F: lib/librte_cryptodev/
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index de65b4c..4951552 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -39,6 +39,8 @@ There are many libraries, so their headers may be grouped by topics:
   [dev]                (@ref rte_dev.h),
   [ethdev]             (@ref rte_ethdev.h),
   [ethctrl]            (@ref rte_eth_ctrl.h),
+  [rte_flow]           (@ref rte_flow.h),
+  [rte_flow_driver]    (@ref rte_flow_driver.h),
   [cryptodev]          (@ref rte_cryptodev.h),
   [devargs]            (@ref rte_devargs.h),
   [bond]               (@ref rte_eth_bond.h),
diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile
index efe1e5f..9335361 100644
--- a/lib/librte_ether/Makefile
+++ b/lib/librte_ether/Makefile
@@ -44,6 +44,7 @@ EXPORT_MAP := rte_ether_version.map
 LIBABIVER := 5
 
 SRCS-y += rte_ethdev.c
+SRCS-y += rte_flow.c
 
 #
 # Export include files
@@ -51,6 +52,8 @@ SRCS-y += rte_ethdev.c
 SYMLINK-y-include += rte_ethdev.h
 SYMLINK-y-include += rte_eth_ctrl.h
 SYMLINK-y-include += rte_dev_info.h
+SYMLINK-y-include += rte_flow.h
+SYMLINK-y-include += rte_flow_driver.h
 
 # this lib depends upon:
 DEPDIRS-y += lib/librte_net lib/librte_eal lib/librte_mempool lib/librte_ring lib/librte_mbuf
diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h
index fe80eb0..8386904 100644
--- a/lib/librte_ether/rte_eth_ctrl.h
+++ b/lib/librte_ether/rte_eth_ctrl.h
@@ -99,6 +99,7 @@ enum rte_filter_type {
 	RTE_ETH_FILTER_FDIR,
 	RTE_ETH_FILTER_HASH,
 	RTE_ETH_FILTER_L2_TUNNEL,
+	RTE_ETH_FILTER_GENERIC,
 	RTE_ETH_FILTER_MAX
 };
 
diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map
index 72be66d..384cdee 100644
--- a/lib/librte_ether/rte_ether_version.map
+++ b/lib/librte_ether/rte_ether_version.map
@@ -147,3 +147,14 @@ DPDK_16.11 {
 	rte_eth_dev_pci_remove;
 
 } DPDK_16.07;
+
+DPDK_17.02 {
+	global:
+
+	rte_flow_validate;
+	rte_flow_create;
+	rte_flow_destroy;
+	rte_flow_flush;
+	rte_flow_query;
+
+} DPDK_16.11;
diff --git a/lib/librte_ether/rte_flow.c b/lib/librte_ether/rte_flow.c
new file mode 100644
index 0000000..d98fb1b
--- /dev/null
+++ b/lib/librte_ether/rte_flow.c
@@ -0,0 +1,159 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   Copyright 2016 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <rte_errno.h>
+#include <rte_branch_prediction.h>
+#include "rte_ethdev.h"
+#include "rte_flow_driver.h"
+#include "rte_flow.h"
+
+/* Get generic flow operations structure from a port. */
+const struct rte_flow_ops *
+rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	const struct rte_flow_ops *ops;
+	int code;
+
+	if (unlikely(!rte_eth_dev_is_valid_port(port_id)))
+		code = ENODEV;
+	else if (unlikely(!dev->dev_ops->filter_ctrl ||
+			  dev->dev_ops->filter_ctrl(dev,
+						    RTE_ETH_FILTER_GENERIC,
+						    RTE_ETH_FILTER_GET,
+						    &ops) ||
+			  !ops))
+		code = ENOSYS;
+	else
+		return ops;
+	rte_flow_error_set(error, code, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, rte_strerror(code));
+	return NULL;
+}
+
+/* Check whether a flow rule can be created on a given port. */
+int
+rte_flow_validate(uint8_t port_id,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item pattern[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error)
+{
+	const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+	if (unlikely(!ops))
+		return -rte_errno;
+	if (likely(!!ops->validate))
+		return ops->validate(dev, attr, pattern, actions, error);
+	rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, rte_strerror(ENOSYS));
+	return -rte_errno;
+}
+
+/* Create a flow rule on a given port. */
+struct rte_flow *
+rte_flow_create(uint8_t port_id,
+		const struct rte_flow_attr *attr,
+		const struct rte_flow_item pattern[],
+		const struct rte_flow_action actions[],
+		struct rte_flow_error *error)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+
+	if (unlikely(!ops))
+		return NULL;
+	if (likely(!!ops->create))
+		return ops->create(dev, attr, pattern, actions, error);
+	rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, rte_strerror(ENOSYS));
+	return NULL;
+}
+
+/* Destroy a flow rule on a given port. */
+int
+rte_flow_destroy(uint8_t port_id,
+		 struct rte_flow *flow,
+		 struct rte_flow_error *error)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+
+	if (unlikely(!ops))
+		return -rte_errno;
+	if (likely(!!ops->destroy))
+		return ops->destroy(dev, flow, error);
+	rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, rte_strerror(ENOSYS));
+	return -rte_errno;
+}
+
+/* Destroy all flow rules associated with a port. */
+int
+rte_flow_flush(uint8_t port_id,
+	       struct rte_flow_error *error)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+
+	if (unlikely(!ops))
+		return -rte_errno;
+	if (likely(!!ops->flush))
+		return ops->flush(dev, error);
+	rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, rte_strerror(ENOSYS));
+	return -rte_errno;
+}
+
+/* Query an existing flow rule. */
+int
+rte_flow_query(uint8_t port_id,
+	       struct rte_flow *flow,
+	       enum rte_flow_action_type action,
+	       void *data,
+	       struct rte_flow_error *error)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+	const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error);
+
+	if (!ops)
+		return -rte_errno;
+	if (likely(!!ops->query))
+		return ops->query(dev, flow, action, data, error);
+	rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+			   NULL, rte_strerror(ENOSYS));
+	return -rte_errno;
+}
diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
new file mode 100644
index 0000000..98084ac
--- /dev/null
+++ b/lib/librte_ether/rte_flow.h
@@ -0,0 +1,947 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   Copyright 2016 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_FLOW_H_
+#define RTE_FLOW_H_
+
+/**
+ * @file
+ * RTE generic flow API
+ *
+ * This interface provides the ability to program packet matching and
+ * associated actions in hardware through flow rules.
+ */
+
+#include <rte_arp.h>
+#include <rte_ether.h>
+#include <rte_icmp.h>
+#include <rte_ip.h>
+#include <rte_sctp.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Flow rule attributes.
+ *
+ * Priorities are set on two levels: per group and per rule within groups.
+ *
+ * Lower values denote higher priority, the highest priority for both levels
+ * is 0, so that a rule with priority 0 in group 8 is always matched after a
+ * rule with priority 8 in group 0.
+ *
+ * Although optional, applications are encouraged to group similar rules as
+ * much as possible to fully take advantage of hardware capabilities
+ * (e.g. optimized matching) and work around limitations (e.g. a single
+ * pattern type possibly allowed in a given group).
+ *
+ * Group and priority levels are arbitrary and up to the application, they
+ * do not need to be contiguous nor start from 0, however the maximum number
+ * varies between devices and may be affected by existing flow rules.
+ *
+ * If a packet is matched by several rules of a given group for a given
+ * priority level, the outcome is undefined. It can take any path, may be
+ * duplicated or even cause unrecoverable errors.
+ *
+ * Note that support for more than a single group and priority level is not
+ * guaranteed.
+ *
+ * Flow rules can apply to inbound and/or outbound traffic (ingress/egress).
+ *
+ * Several pattern items and actions are valid and can be used in both
+ * directions. Those valid for only one direction are described as such.
+ *
+ * At least one direction must be specified.
+ *
+ * Specifying both directions at once for a given rule is not recommended
+ * but may be valid in a few cases (e.g. shared counter).
+ */
+struct rte_flow_attr {
+	uint32_t group; /**< Priority group. */
+	uint32_t priority; /**< Priority level within group. */
+	uint32_t ingress:1; /**< Rule applies to ingress traffic. */
+	uint32_t egress:1; /**< Rule applies to egress traffic. */
+	uint32_t reserved:30; /**< Reserved, must be zero. */
+};
+
+/**
+ * Matching pattern item types.
+ *
+ * Pattern items fall in two categories:
+ *
+ * - Matching protocol headers and packet data (ANY, RAW, ETH, VLAN, IPV4,
+ *   IPV6, ICMP, UDP, TCP, SCTP, VXLAN and so on), usually associated with a
+ *   specification structure. These must be stacked in the same order as the
+ *   protocol layers to match, starting from the lowest.
+ *
+ * - Matching meta-data or affecting pattern processing (END, VOID, INVERT,
+ *   PF, VF, PORT and so on), often without a specification structure. Since
+ *   they do not match packet contents, these can be specified anywhere
+ *   within item lists without affecting others.
+ *
+ * See the description of individual types for more information. Those
+ * marked with [META] fall into the second category.
+ */
+enum rte_flow_item_type {
+	/**
+	 * [META]
+	 *
+	 * End marker for item lists. Prevents further processing of items,
+	 * thereby ending the pattern.
+	 *
+	 * No associated specification structure.
+	 */
+	RTE_FLOW_ITEM_TYPE_END,
+
+	/**
+	 * [META]
+	 *
+	 * Used as a placeholder for convenience. It is ignored and simply
+	 * discarded by PMDs.
+	 *
+	 * No associated specification structure.
+	 */
+	RTE_FLOW_ITEM_TYPE_VOID,
+
+	/**
+	 * [META]
+	 *
+	 * Inverted matching, i.e. process packets that do not match the
+	 * pattern.
+	 *
+	 * No associated specification structure.
+	 */
+	RTE_FLOW_ITEM_TYPE_INVERT,
+
+	/**
+	 * Matches any protocol in place of the current layer, a single ANY
+	 * may also stand for several protocol layers.
+	 *
+	 * See struct rte_flow_item_any.
+	 */
+	RTE_FLOW_ITEM_TYPE_ANY,
+
+	/**
+	 * [META]
+	 *
+	 * Matches packets addressed to the physical function of the device.
+	 *
+	 * If the underlying device function differs from the one that would
+	 * normally receive the matched traffic, specifying this item
+	 * prevents it from reaching that device unless the flow rule
+	 * contains a PF action. Packets are not duplicated between device
+	 * instances by default.
+	 *
+	 * No associated specification structure.
+	 */
+	RTE_FLOW_ITEM_TYPE_PF,
+
+	/**
+	 * [META]
+	 *
+	 * Matches packets addressed to a virtual function ID of the device.
+	 *
+	 * If the underlying device function differs from the one that would
+	 * normally receive the matched traffic, specifying this item
+	 * prevents it from reaching that device unless the flow rule
+	 * contains a VF action. Packets are not duplicated between device
+	 * instances by default.
+	 *
+	 * See struct rte_flow_item_vf.
+	 */
+	RTE_FLOW_ITEM_TYPE_VF,
+
+	/**
+	 * [META]
+	 *
+	 * Matches packets coming from the specified physical port of the
+	 * underlying device.
+	 *
+	 * The first PORT item overrides the physical port normally
+	 * associated with the specified DPDK input port (port_id). This
+	 * item can be provided several times to match additional physical
+	 * ports.
+	 *
+	 * See struct rte_flow_item_port.
+	 */
+	RTE_FLOW_ITEM_TYPE_PORT,
+
+	/**
+	 * Matches a byte string of a given length at a given offset.
+	 *
+	 * See struct rte_flow_item_raw.
+	 */
+	RTE_FLOW_ITEM_TYPE_RAW,
+
+	/**
+	 * Matches an Ethernet header.
+	 *
+	 * See struct rte_flow_item_eth.
+	 */
+	RTE_FLOW_ITEM_TYPE_ETH,
+
+	/**
+	 * Matches an 802.1Q/ad VLAN tag.
+	 *
+	 * See struct rte_flow_item_vlan.
+	 */
+	RTE_FLOW_ITEM_TYPE_VLAN,
+
+	/**
+	 * Matches an IPv4 header.
+	 *
+	 * See struct rte_flow_item_ipv4.
+	 */
+	RTE_FLOW_ITEM_TYPE_IPV4,
+
+	/**
+	 * Matches an IPv6 header.
+	 *
+	 * See struct rte_flow_item_ipv6.
+	 */
+	RTE_FLOW_ITEM_TYPE_IPV6,
+
+	/**
+	 * Matches an ICMP header.
+	 *
+	 * See struct rte_flow_item_icmp.
+	 */
+	RTE_FLOW_ITEM_TYPE_ICMP,
+
+	/**
+	 * Matches a UDP header.
+	 *
+	 * See struct rte_flow_item_udp.
+	 */
+	RTE_FLOW_ITEM_TYPE_UDP,
+
+	/**
+	 * Matches a TCP header.
+	 *
+	 * See struct rte_flow_item_tcp.
+	 */
+	RTE_FLOW_ITEM_TYPE_TCP,
+
+	/**
+	 * Matches a SCTP header.
+	 *
+	 * See struct rte_flow_item_sctp.
+	 */
+	RTE_FLOW_ITEM_TYPE_SCTP,
+
+	/**
+	 * Matches a VXLAN header.
+	 *
+	 * See struct rte_flow_item_vxlan.
+	 */
+	RTE_FLOW_ITEM_TYPE_VXLAN,
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ANY
+ *
+ * Matches any protocol in place of the current layer, a single ANY may also
+ * stand for several protocol layers.
+ *
+ * This is usually specified as the first pattern item when looking for a
+ * protocol anywhere in a packet.
+ *
+ * A zeroed mask stands for any number of layers.
+ */
+struct rte_flow_item_any {
+	uint32_t num; /* Number of layers covered. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_VF
+ *
+ * Matches packets addressed to a virtual function ID of the device.
+ *
+ * If the underlying device function differs from the one that would
+ * normally receive the matched traffic, specifying this item prevents it
+ * from reaching that device unless the flow rule contains a VF
+ * action. Packets are not duplicated between device instances by default.
+ *
+ * - Likely to return an error or never match any traffic if this causes a
+ *   VF device to match traffic addressed to a different VF.
+ * - Can be specified multiple times to match traffic addressed to several
+ *   VF IDs.
+ * - Can be combined with a PF item to match both PF and VF traffic.
+ *
+ * A zeroed mask can be used to match any VF ID.
+ */
+struct rte_flow_item_vf {
+	uint32_t id; /**< Destination VF ID. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_PORT
+ *
+ * Matches packets coming from the specified physical port of the underlying
+ * device.
+ *
+ * The first PORT item overrides the physical port normally associated with
+ * the specified DPDK input port (port_id). This item can be provided
+ * several times to match additional physical ports.
+ *
+ * Note that physical ports are not necessarily tied to DPDK input ports
+ * (port_id) when those are not under DPDK control. Possible values are
+ * specific to each device, they are not necessarily indexed from zero and
+ * may not be contiguous.
+ *
+ * As a device property, the list of allowed values as well as the value
+ * associated with a port_id should be retrieved by other means.
+ *
+ * A zeroed mask can be used to match any port index.
+ */
+struct rte_flow_item_port {
+	uint32_t index; /**< Physical port index. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_RAW
+ *
+ * Matches a byte string of a given length at a given offset.
+ *
+ * Offset is either absolute (using the start of the packet) or relative to
+ * the end of the previous matched item in the stack, in which case negative
+ * values are allowed.
+ *
+ * If search is enabled, offset is used as the starting point. The search
+ * area can be delimited by setting limit to a nonzero value, which is the
+ * maximum number of bytes after offset where the pattern may start.
+ *
+ * Matching a zero-length pattern is allowed, doing so resets the relative
+ * offset for subsequent items.
+ *
+ * This type does not support ranges (struct rte_flow_item.last).
+ */
+struct rte_flow_item_raw {
+	uint32_t relative:1; /**< Look for pattern after the previous item. */
+	uint32_t search:1; /**< Search pattern from offset (see also limit). */
+	uint32_t reserved:30; /**< Reserved, must be set to zero. */
+	int32_t offset; /**< Absolute or relative offset for pattern. */
+	uint16_t limit; /**< Search area limit for start of pattern. */
+	uint16_t length; /**< Pattern length. */
+	uint8_t pattern[]; /**< Byte string to look for. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ETH
+ *
+ * Matches an Ethernet header.
+ */
+struct rte_flow_item_eth {
+	struct ether_addr dst; /**< Destination MAC. */
+	struct ether_addr src; /**< Source MAC. */
+	uint16_t type; /**< EtherType. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_VLAN
+ *
+ * Matches an 802.1Q/ad VLAN tag.
+ *
+ * This type normally follows either RTE_FLOW_ITEM_TYPE_ETH or
+ * RTE_FLOW_ITEM_TYPE_VLAN.
+ */
+struct rte_flow_item_vlan {
+	uint16_t tpid; /**< Tag protocol identifier. */
+	uint16_t tci; /**< Tag control information. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_IPV4
+ *
+ * Matches an IPv4 header.
+ *
+ * Note: IPv4 options are handled by dedicated pattern items.
+ */
+struct rte_flow_item_ipv4 {
+	struct ipv4_hdr hdr; /**< IPv4 header definition. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_IPV6.
+ *
+ * Matches an IPv6 header.
+ *
+ * Note: IPv6 options are handled by dedicated pattern items.
+ */
+struct rte_flow_item_ipv6 {
+	struct ipv6_hdr hdr; /**< IPv6 header definition. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_ICMP.
+ *
+ * Matches an ICMP header.
+ */
+struct rte_flow_item_icmp {
+	struct icmp_hdr hdr; /**< ICMP header definition. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_UDP.
+ *
+ * Matches a UDP header.
+ */
+struct rte_flow_item_udp {
+	struct udp_hdr hdr; /**< UDP header definition. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_TCP.
+ *
+ * Matches a TCP header.
+ */
+struct rte_flow_item_tcp {
+	struct tcp_hdr hdr; /**< TCP header definition. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_SCTP.
+ *
+ * Matches a SCTP header.
+ */
+struct rte_flow_item_sctp {
+	struct sctp_hdr hdr; /**< SCTP header definition. */
+};
+
+/**
+ * RTE_FLOW_ITEM_TYPE_VXLAN.
+ *
+ * Matches a VXLAN header (RFC 7348).
+ */
+struct rte_flow_item_vxlan {
+	uint8_t flags; /**< Normally 0x08 (I flag). */
+	uint8_t rsvd0[3]; /**< Reserved, normally 0x000000. */
+	uint8_t vni[3]; /**< VXLAN identifier. */
+	uint8_t rsvd1; /**< Reserved, normally 0x00. */
+};
+
+/**
+ * Matching pattern item definition.
+ *
+ * A pattern is formed by stacking items starting from the lowest protocol
+ * layer to match. This stacking restriction does not apply to meta items
+ * which can be placed anywhere in the stack without affecting the meaning
+ * of the resulting pattern.
+ *
+ * Patterns are terminated by END items.
+ *
+ * The spec field should be a valid pointer to a structure of the related
+ * item type. It may be set to NULL in many cases to use default values.
+ *
+ * Optionally, last can point to a structure of the same type to define an
+ * inclusive range. This is mostly supported by integer and address fields,
+ * may cause errors otherwise. Fields that do not support ranges must be set
+ * to 0 or to the same value as the corresponding fields in spec.
+ *
+ * By default all fields present in spec are considered relevant (see note
+ * below). This behavior can be altered by providing a mask structure of the
+ * same type with applicable bits set to one. It can also be used to
+ * partially filter out specific fields (e.g. as an alternate mean to match
+ * ranges of IP addresses).
+ *
+ * Mask is a simple bit-mask applied before interpreting the contents of
+ * spec and last, which may yield unexpected results if not used
+ * carefully. For example, if for an IPv4 address field, spec provides
+ * 10.1.2.3, last provides 10.3.4.5 and mask provides 255.255.0.0, the
+ * effective range becomes 10.1.0.0 to 10.3.255.255.
+ *
+ * Note: the defaults for data-matching items such as IPv4 when mask is not
+ * specified actually depend on the underlying implementation since only
+ * recognized fields can be taken into account.
+ */
+struct rte_flow_item {
+	enum rte_flow_item_type type; /**< Item type. */
+	const void *spec; /**< Pointer to item specification structure. */
+	const void *last; /**< Defines an inclusive range (spec to last). */
+	const void *mask; /**< Bit-mask applied to spec and last. */
+};
+
+/**
+ * Action types.
+ *
+ * Each possible action is represented by a type. Some have associated
+ * configuration structures. Several actions combined in a list can be
+ * affected to a flow rule. That list is not ordered.
+ *
+ * They fall in three categories:
+ *
+ * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent
+ *   processing matched packets by subsequent flow rules, unless overridden
+ *   with PASSTHRU.
+ *
+ * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up
+ *   for additional processing by subsequent flow rules.
+ *
+ * - Other non terminating meta actions that do not affect the fate of
+ *   packets (END, VOID, MARK, FLAG, COUNT).
+ *
+ * When several actions are combined in a flow rule, they should all have
+ * different types (e.g. dropping a packet twice is not possible).
+ *
+ * Only the last action of a given type is taken into account. PMDs still
+ * perform error checking on the entire list.
+ *
+ * Note that PASSTHRU is the only action able to override a terminating
+ * rule.
+ */
+enum rte_flow_action_type {
+	/**
+	 * [META]
+	 *
+	 * End marker for action lists. Prevents further processing of
+	 * actions, thereby ending the list.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_END,
+
+	/**
+	 * [META]
+	 *
+	 * Used as a placeholder for convenience. It is ignored and simply
+	 * discarded by PMDs.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_VOID,
+
+	/**
+	 * Leaves packets up for additional processing by subsequent flow
+	 * rules. This is the default when a rule does not contain a
+	 * terminating action, but can be specified to force a rule to
+	 * become non-terminating.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_PASSTHRU,
+
+	/**
+	 * [META]
+	 *
+	 * Attaches a 32 bit value to packets.
+	 *
+	 * See struct rte_flow_action_mark.
+	 */
+	RTE_FLOW_ACTION_TYPE_MARK,
+
+	/**
+	 * [META]
+	 *
+	 * Flag packets. Similar to MARK but only affects ol_flags.
+	 *
+	 * Note: a distinctive flag must be defined for it.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_FLAG,
+
+	/**
+	 * Assigns packets to a given queue index.
+	 *
+	 * See struct rte_flow_action_queue.
+	 */
+	RTE_FLOW_ACTION_TYPE_QUEUE,
+
+	/**
+	 * Drops packets.
+	 *
+	 * PASSTHRU overrides this action if both are specified.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_DROP,
+
+	/**
+	 * [META]
+	 *
+	 * Enables counters for this rule.
+	 *
+	 * These counters can be retrieved and reset through rte_flow_query(),
+	 * see struct rte_flow_query_count.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_COUNT,
+
+	/**
+	 * Duplicates packets to a given queue index.
+	 *
+	 * This is normally combined with QUEUE, however when used alone, it
+	 * is actually similar to QUEUE + PASSTHRU.
+	 *
+	 * See struct rte_flow_action_dup.
+	 */
+	RTE_FLOW_ACTION_TYPE_DUP,
+
+	/**
+	 * Similar to QUEUE, except RSS is additionally performed on packets
+	 * to spread them among several queues according to the provided
+	 * parameters.
+	 *
+	 * See struct rte_flow_action_rss.
+	 */
+	RTE_FLOW_ACTION_TYPE_RSS,
+
+	/**
+	 * Redirects packets to the physical function (PF) of the current
+	 * device.
+	 *
+	 * No associated configuration structure.
+	 */
+	RTE_FLOW_ACTION_TYPE_PF,
+
+	/**
+	 * Redirects packets to the virtual function (VF) of the current
+	 * device with the specified ID.
+	 *
+	 * See struct rte_flow_action_vf.
+	 */
+	RTE_FLOW_ACTION_TYPE_VF,
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_MARK
+ *
+ * Attaches a 32 bit value to packets.
+ *
+ * This value is arbitrary and application-defined. For compatibility with
+ * FDIR it is returned in the hash.fdir.hi mbuf field. PKT_RX_FDIR_ID is
+ * also set in ol_flags.
+ */
+struct rte_flow_action_mark {
+	uint32_t id; /**< 32 bit value to return with packets. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_QUEUE
+ *
+ * Assign packets to a given queue index.
+ *
+ * Terminating by default.
+ */
+struct rte_flow_action_queue {
+	uint16_t index; /**< Queue index to use. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_COUNT (query)
+ *
+ * Query structure to retrieve and reset flow rule counters.
+ */
+struct rte_flow_query_count {
+	uint32_t reset:1; /**< Reset counters after query [in]. */
+	uint32_t hits_set:1; /**< hits field is set [out]. */
+	uint32_t bytes_set:1; /**< bytes field is set [out]. */
+	uint32_t reserved:29; /**< Reserved, must be zero [in, out]. */
+	uint64_t hits; /**< Number of hits for this rule [out]. */
+	uint64_t bytes; /**< Number of bytes through this rule [out]. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_DUP
+ *
+ * Duplicates packets to a given queue index.
+ *
+ * This is normally combined with QUEUE, however when used alone, it is
+ * actually similar to QUEUE + PASSTHRU.
+ *
+ * Non-terminating by default.
+ */
+struct rte_flow_action_dup {
+	uint16_t index; /**< Queue index to duplicate packets to. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_RSS
+ *
+ * Similar to QUEUE, except RSS is additionally performed on packets to
+ * spread them among several queues according to the provided parameters.
+ *
+ * Note: RSS hash result is normally stored in the hash.rss mbuf field,
+ * however it conflicts with the MARK action as they share the same
+ * space. When both actions are specified, the RSS hash is discarded and
+ * PKT_RX_RSS_HASH is not set in ol_flags. MARK has priority. The mbuf
+ * structure should eventually evolve to store both.
+ *
+ * Terminating by default.
+ */
+struct rte_flow_action_rss {
+	const struct rte_eth_rss_conf *rss_conf; /**< RSS parameters. */
+	uint16_t num; /**< Number of entries in queue[]. */
+	uint16_t queue[]; /**< Queues indices to use. */
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_VF
+ *
+ * Redirects packets to a virtual function (VF) of the current device.
+ *
+ * Packets matched by a VF pattern item can be redirected to their original
+ * VF ID instead of the specified one. This parameter may not be available
+ * and is not guaranteed to work properly if the VF part is matched by a
+ * prior flow rule or if packets are not addressed to a VF in the first
+ * place.
+ *
+ * Terminating by default.
+ */
+struct rte_flow_action_vf {
+	uint32_t original:1; /**< Use original VF ID if possible. */
+	uint32_t reserved:31; /**< Reserved, must be zero. */
+	uint32_t id; /**< VF ID to redirect packets to. */
+};
+
+/**
+ * Definition of a single action.
+ *
+ * A list of actions is terminated by a END action.
+ *
+ * For simple actions without a configuration structure, conf remains NULL.
+ */
+struct rte_flow_action {
+	enum rte_flow_action_type type; /**< Action type. */
+	const void *conf; /**< Pointer to action configuration structure. */
+};
+
+/**
+ * Opaque type returned after successfully creating a flow.
+ *
+ * This handle can be used to manage and query the related flow (e.g. to
+ * destroy it or retrieve counters).
+ */
+struct rte_flow;
+
+/**
+ * Verbose error types.
+ *
+ * Most of them provide the type of the object referenced by struct
+ * rte_flow_error.cause.
+ */
+enum rte_flow_error_type {
+	RTE_FLOW_ERROR_TYPE_NONE, /**< No error. */
+	RTE_FLOW_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */
+	RTE_FLOW_ERROR_TYPE_HANDLE, /**< Flow rule (handle). */
+	RTE_FLOW_ERROR_TYPE_ATTR_GROUP, /**< Group field. */
+	RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, /**< Priority field. */
+	RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, /**< Ingress field. */
+	RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */
+	RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */
+	RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */
+	RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */
+	RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */
+	RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */
+};
+
+/**
+ * Verbose error structure definition.
+ *
+ * This object is normally allocated by applications and set by PMDs, the
+ * message points to a constant string which does not need to be freed by
+ * the application, however its pointer can be considered valid only as long
+ * as its associated DPDK port remains configured. Closing the underlying
+ * device or unloading the PMD invalidates it.
+ *
+ * Both cause and message may be NULL regardless of the error type.
+ */
+struct rte_flow_error {
+	enum rte_flow_error_type type; /**< Cause field and error types. */
+	const void *cause; /**< Object responsible for the error. */
+	const char *message; /**< Human-readable error message. */
+};
+
+/**
+ * Check whether a flow rule can be created on a given port.
+ *
+ * While this function has no effect on the target device, the flow rule is
+ * validated against its current configuration state and the returned value
+ * should be considered valid by the caller for that state only.
+ *
+ * The returned value is guaranteed to remain valid only as long as no
+ * successful calls to rte_flow_create() or rte_flow_destroy() are made in
+ * the meantime and no device parameter affecting flow rules in any way are
+ * modified, due to possible collisions or resource limitations (although in
+ * such cases EINVAL should not be returned).
+ *
+ * @param port_id
+ *   Port identifier of Ethernet device.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 if flow rule is valid and can be created. A negative errno value
+ *   otherwise (rte_errno is also set), the following errors are defined:
+ *
+ *   -ENOSYS: underlying device does not support this functionality.
+ *
+ *   -EINVAL: unknown or invalid rule specification.
+ *
+ *   -ENOTSUP: valid but unsupported rule specification (e.g. partial
+ *   bit-masks are unsupported).
+ *
+ *   -EEXIST: collision with an existing rule.
+ *
+ *   -ENOMEM: not enough resources.
+ *
+ *   -EBUSY: action cannot be performed due to busy device resources, may
+ *   succeed if the affected queues or even the entire port are in a stopped
+ *   state (see rte_eth_dev_rx_queue_stop() and rte_eth_dev_stop()).
+ */
+int
+rte_flow_validate(uint8_t port_id,
+		  const struct rte_flow_attr *attr,
+		  const struct rte_flow_item pattern[],
+		  const struct rte_flow_action actions[],
+		  struct rte_flow_error *error);
+
+/**
+ * Create a flow rule on a given port.
+ *
+ * @param port_id
+ *   Port identifier of Ethernet device.
+ * @param[in] attr
+ *   Flow rule attributes.
+ * @param[in] pattern
+ *   Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ *   Associated actions (list terminated by the END action).
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   A valid handle in case of success, NULL otherwise and rte_errno is set
+ *   to the positive version of one of the error codes defined for
+ *   rte_flow_validate().
+ */
+struct rte_flow *
+rte_flow_create(uint8_t port_id,
+		const struct rte_flow_attr *attr,
+		const struct rte_flow_item pattern[],
+		const struct rte_flow_action actions[],
+		struct rte_flow_error *error);
+
+/**
+ * Destroy a flow rule on a given port.
+ *
+ * Failure to destroy a flow rule handle may occur when other flow rules
+ * depend on it, and destroying it would result in an inconsistent state.
+ *
+ * This function is only guaranteed to succeed if handles are destroyed in
+ * reverse order of their creation.
+ *
+ * @param port_id
+ *   Port identifier of Ethernet device.
+ * @param flow
+ *   Flow rule handle to destroy.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+rte_flow_destroy(uint8_t port_id,
+		 struct rte_flow *flow,
+		 struct rte_flow_error *error);
+
+/**
+ * Destroy all flow rules associated with a port.
+ *
+ * In the unlikely event of failure, handles are still considered destroyed
+ * and no longer valid but the port must be assumed to be in an inconsistent
+ * state.
+ *
+ * @param port_id
+ *   Port identifier of Ethernet device.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+rte_flow_flush(uint8_t port_id,
+	       struct rte_flow_error *error);
+
+/**
+ * Query an existing flow rule.
+ *
+ * This function allows retrieving flow-specific data such as counters.
+ * Data is gathered by special actions which must be present in the flow
+ * rule definition.
+ *
+ * \see RTE_FLOW_ACTION_TYPE_COUNT
+ *
+ * @param port_id
+ *   Port identifier of Ethernet device.
+ * @param flow
+ *   Flow rule handle to query.
+ * @param action
+ *   Action type to query.
+ * @param[in, out] data
+ *   Pointer to storage for the associated query data type.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. PMDs initialize this
+ *   structure in case of error only.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+rte_flow_query(uint8_t port_id,
+	       struct rte_flow *flow,
+	       enum rte_flow_action_type action,
+	       void *data,
+	       struct rte_flow_error *error);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_FLOW_H_ */
diff --git a/lib/librte_ether/rte_flow_driver.h b/lib/librte_ether/rte_flow_driver.h
new file mode 100644
index 0000000..274562c
--- /dev/null
+++ b/lib/librte_ether/rte_flow_driver.h
@@ -0,0 +1,182 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright 2016 6WIND S.A.
+ *   Copyright 2016 Mellanox.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of 6WIND S.A. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_FLOW_DRIVER_H_
+#define RTE_FLOW_DRIVER_H_
+
+/**
+ * @file
+ * RTE generic flow API (driver side)
+ *
+ * This file provides implementation helpers for internal use by PMDs, they
+ * are not intended to be exposed to applications and are not subject to ABI
+ * versioning.
+ */
+
+#include <stdint.h>
+
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include "rte_flow.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Generic flow operations structure implemented and returned by PMDs.
+ *
+ * To implement this API, PMDs must handle the RTE_ETH_FILTER_GENERIC filter
+ * type in their .filter_ctrl callback function (struct eth_dev_ops) as well
+ * as the RTE_ETH_FILTER_GET filter operation.
+ *
+ * If successful, this operation must result in a pointer to a PMD-specific
+ * struct rte_flow_ops written to the argument address as described below:
+ *
+ * \code
+ *
+ * // PMD filter_ctrl callback
+ *
+ * static const struct rte_flow_ops pmd_flow_ops = { ... };
+ *
+ * switch (filter_type) {
+ * case RTE_ETH_FILTER_GENERIC:
+ *     if (filter_op != RTE_ETH_FILTER_GET)
+ *         return -EINVAL;
+ *     *(const void **)arg = &pmd_flow_ops;
+ *     return 0;
+ * }
+ *
+ * \endcode
+ *
+ * See also rte_flow_ops_get().
+ *
+ * These callback functions are not supposed to be used by applications
+ * directly, which must rely on the API defined in rte_flow.h.
+ *
+ * Public-facing wrapper functions perform a few consistency checks so that
+ * unimplemented (i.e. NULL) callbacks simply return -ENOTSUP. These
+ * callbacks otherwise only differ by their first argument (with port ID
+ * already resolved to a pointer to struct rte_eth_dev).
+ */
+struct rte_flow_ops {
+	/** See rte_flow_validate(). */
+	int (*validate)
+		(struct rte_eth_dev *,
+		 const struct rte_flow_attr *,
+		 const struct rte_flow_item [],
+		 const struct rte_flow_action [],
+		 struct rte_flow_error *);
+	/** See rte_flow_create(). */
+	struct rte_flow *(*create)
+		(struct rte_eth_dev *,
+		 const struct rte_flow_attr *,
+		 const struct rte_flow_item [],
+		 const struct rte_flow_action [],
+		 struct rte_flow_error *);
+	/** See rte_flow_destroy(). */
+	int (*destroy)
+		(struct rte_eth_dev *,
+		 struct rte_flow *,
+		 struct rte_flow_error *);
+	/** See rte_flow_flush(). */
+	int (*flush)
+		(struct rte_eth_dev *,
+		 struct rte_flow_error *);
+	/** See rte_flow_query(). */
+	int (*query)
+		(struct rte_eth_dev *,
+		 struct rte_flow *,
+		 enum rte_flow_action_type,
+		 void *,
+		 struct rte_flow_error *);
+};
+
+/**
+ * Initialize generic flow error structure.
+ *
+ * This function also sets rte_errno to a given value.
+ *
+ * @param[out] error
+ *   Pointer to flow error structure (may be NULL).
+ * @param code
+ *   Related error code (rte_errno).
+ * @param type
+ *   Cause field and error types.
+ * @param cause
+ *   Object responsible for the error.
+ * @param message
+ *   Human-readable error message.
+ *
+ * @return
+ *   Pointer to flow error structure.
+ */
+static inline struct rte_flow_error *
+rte_flow_error_set(struct rte_flow_error *error,
+		   int code,
+		   enum rte_flow_error_type type,
+		   const void *cause,
+		   const char *message)
+{
+	if (error) {
+		*error = (struct rte_flow_error){
+			.type = type,
+			.cause = cause,
+			.message = message,
+		};
+	}
+	rte_errno = code;
+	return error;
+}
+
+/**
+ * Get generic flow operations structure from a port.
+ *
+ * @param port_id
+ *   Port identifier to query.
+ * @param[out] error
+ *   Pointer to flow error structure.
+ *
+ * @return
+ *   The flow operations structure associated with port_id, NULL in case of
+ *   error, in which case rte_errno is set and the error structure contains
+ *   additional details.
+ */
+const struct rte_flow_ops *
+rte_flow_ops_get(uint8_t port_id, struct rte_flow_error *error);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_FLOW_DRIVER_H_ */
-- 
2.1.4

^ permalink raw reply related

* [PATCH v3 00/25] Generic flow API (rte_flow)
From: Adrien Mazarguil @ 2016-12-19 17:48 UTC (permalink / raw)
  To: dev
In-Reply-To: <cover.1481903839.git.adrien.mazarguil@6wind.com>

As previously discussed in RFC v1 [1], RFC v2 [2], with changes
described in [3] (also pasted below), here is the first non-draft series
for this new API.

Its capabilities are so generic that its name had to be vague, it may be
called "Generic flow API", "Generic flow interface" (possibly shortened
as "GFI") to refer to the name of the new filter type, or "rte_flow" from
the prefix used for its public symbols. I personally favor the latter.

While it is currently meant to supersede existing filter types in order for
all PMDs to expose a common filtering/classification interface, it may
eventually evolve to cover the following ideas as well:

- Rx/Tx offloads configuration through automatic offloads for specific
  packets, e.g. performing checksum on TCP packets could be expressed with
  an egress rule with a TCP pattern and a kind of checksum action.

- RSS configuration (already defined actually). Could be global or per rule
  depending on hardware capabilities.

- Switching configuration for devices with many physical ports; rules doing
  both ingress and egress could even be used to completely bypass software
  if supported by hardware.

 [1] http://dpdk.org/ml/archives/dev/2016-July/043365.html
 [2] http://dpdk.org/ml/archives/dev/2016-August/045383.html
 [3] http://dpdk.org/ml/archives/dev/2016-November/050044.html

Changes since v2 series:

- Replaced ENOTSUP with ENOSYS in the code (although doing so triggers
  spurious checkpatch warnings) to tell apart unimplemented callbacks from
  unsupported flow rules and match the documented behavior.

- Fixed missing include seen by check-includes.sh in rte_flow_driver.h.

- Made clearer that PMDs must initialize rte_flow_error (if non-NULL) in
  case of error, added related memory poisoning in testpmd to catch missing
  initializations.

- Fixed rte_flow programmer's guide according to John Mcnamara's comments
  (tables, sections header and typos).

- Fixed deprecation notice as well.

Changes since v1 series:

- Added programmer's guide documentation for rte_flow.

- Added depreciation notice for the legacy API.

- Documented testpmd flow command.

- Fixed missing rte_flow_flush symbol in rte_ether_version.map.

- Cleaned up API documentation in rte_flow.h.

- Replaced "min/max" parameters with "num" in struct rte_flow_item_any, to
  align behavior with other item definitions.

- Fixed "type" (EtherType) size in struct rte_flow_item_eth.

- Renamed "queues" to "num" in struct rte_flow_action_rss.

- Fixed missing const in rte_flow_error_set() prototype definition.

- Fixed testpmd flow create command that did not save the rte_flow object
  pointer, causing crashes.

- Hopefully fixed all the remaining ICC/clang errors.

- Replaced testpmd flow command's "fix" token with "is" for clarity.

Changes since RFC v2:

- New separate VLAN pattern item (previously part of the ETH definition),
  found to be much more convenient.

- Removed useless "any" field from VF pattern item, the same effect can be
  achieved by not providing a specification structure.

- Replaced bit-fields from the VXLAN pattern item to avoid endianness
  conversion issues on 24-bit fields.

- Updated struct rte_flow_item with a new "last" field to create inclusive
  ranges. They are defined as the interval between (spec & mask) and
  (last & mask). All three parameters are optional.

- Renamed ID action MARK.

- Renamed "queue" fields in actions QUEUE and DUP to "index".

- "rss_conf" field in RSS action is now const.

- VF action now uses a 32 bit ID like its pattern item counterpart.

- Removed redundant struct rte_flow_pattern, API functions now expect
  struct
  rte_flow_item lists terminated by END items.

- Replaced struct rte_flow_actions for the same reason, with struct
  rte_flow_action lists terminated by END actions.

- Error types (enum rte_flow_error_type) have been updated and the cause
  pointer in struct rte_flow_error is now const.

- Function prototypes (rte_flow_create, rte_flow_validate) have also been
  updated for clarity.

Additions:

- Public wrapper functions rte_flow_{validate|create|destroy|flush|query}
  are now implemented in rte_flow.c, with their symbols exported and
  versioned. Related filter type RTE_ETH_FILTER_GENERIC has been added.

- A separate header (rte_flow_driver.h) has been added for driver-side
  functionality, in particular struct rte_flow_ops which contains PMD
  callbacks returned by RTE_ETH_FILTER_GENERIC query.

- testpmd now exposes most of this API through the new "flow" command.

What remains to be done:

- Using endian-aware integer types (rte_beX_t) where necessary for clarity.

- API documentation (based on RFC).

- testpmd flow command documentation (although context-aware command
  completion should already help quite a bit in this regard).

- A few pattern item / action properties cannot be configured yet
  (e.g. rss_conf parameter for RSS action) and a few completions
  (e.g. possible queue IDs) should be added.

Adrien Mazarguil (25):
  ethdev: introduce generic flow API
  doc: add rte_flow prog guide
  doc: announce deprecation of legacy filter types
  cmdline: add support for dynamic tokens
  cmdline: add alignment constraint
  app/testpmd: implement basic support for rte_flow
  app/testpmd: add flow command
  app/testpmd: add rte_flow integer support
  app/testpmd: add flow list command
  app/testpmd: add flow flush command
  app/testpmd: add flow destroy command
  app/testpmd: add flow validate/create commands
  app/testpmd: add flow query command
  app/testpmd: add rte_flow item spec handler
  app/testpmd: add rte_flow item spec prefix length
  app/testpmd: add rte_flow bit-field support
  app/testpmd: add item any to flow command
  app/testpmd: add various items to flow command
  app/testpmd: add item raw to flow command
  app/testpmd: add items eth/vlan to flow command
  app/testpmd: add items ipv4/ipv6 to flow command
  app/testpmd: add L4 items to flow command
  app/testpmd: add various actions to flow command
  app/testpmd: add queue actions to flow command
  doc: describe testpmd flow command

 MAINTAINERS                                 |    4 +
 app/test-pmd/Makefile                       |    1 +
 app/test-pmd/cmdline.c                      |   32 +
 app/test-pmd/cmdline_flow.c                 | 2575 ++++++++++++++++++++++
 app/test-pmd/config.c                       |  498 +++++
 app/test-pmd/csumonly.c                     |    1 +
 app/test-pmd/flowgen.c                      |    1 +
 app/test-pmd/icmpecho.c                     |    1 +
 app/test-pmd/ieee1588fwd.c                  |    1 +
 app/test-pmd/iofwd.c                        |    1 +
 app/test-pmd/macfwd.c                       |    1 +
 app/test-pmd/macswap.c                      |    1 +
 app/test-pmd/parameters.c                   |    1 +
 app/test-pmd/rxonly.c                       |    1 +
 app/test-pmd/testpmd.c                      |    6 +
 app/test-pmd/testpmd.h                      |   27 +
 app/test-pmd/txonly.c                       |    1 +
 doc/api/doxy-api-index.md                   |    2 +
 doc/guides/prog_guide/index.rst             |    1 +
 doc/guides/prog_guide/rte_flow.rst          | 2042 +++++++++++++++++
 doc/guides/rel_notes/deprecation.rst        |    8 +
 doc/guides/testpmd_app_ug/testpmd_funcs.rst |  612 +++++
 lib/librte_cmdline/cmdline_parse.c          |   67 +-
 lib/librte_cmdline/cmdline_parse.h          |   21 +
 lib/librte_ether/Makefile                   |    3 +
 lib/librte_ether/rte_eth_ctrl.h             |    1 +
 lib/librte_ether/rte_ether_version.map      |   11 +
 lib/librte_ether/rte_flow.c                 |  159 ++
 lib/librte_ether/rte_flow.h                 |  947 ++++++++
 lib/librte_ether/rte_flow_driver.h          |  182 ++
 30 files changed, 7200 insertions(+), 9 deletions(-)
 create mode 100644 app/test-pmd/cmdline_flow.c
 create mode 100644 doc/guides/prog_guide/rte_flow.rst
 create mode 100644 lib/librte_ether/rte_flow.c
 create mode 100644 lib/librte_ether/rte_flow.h
 create mode 100644 lib/librte_ether/rte_flow_driver.h

-- 
2.1.4

^ permalink raw reply

* [PATCH v3 1/4] crypto/aesni_mb: fix incorrect crypto session
From: Pablo de Lara @ 2016-12-19 17:29 UTC (permalink / raw)
  To: declan.doherty; +Cc: dev, Pablo de Lara, stable
In-Reply-To: <1482168543-40289-1-git-send-email-pablo.de.lara.guarch@intel.com>

When using sessionless crypto operations, crypto session
is obtained from a pool of sessions, when processing the
operation. Once the operation is processed, the session
is put back in the pool, but for the AESNI MB PMD, this
session was not being saved in the operation and therefore,
it did not return to the session pool.

Fixes: 924e84f87306 ("aesni_mb: add driver for multi buffer based crypto")

CC: stable@dpdk.org
Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
index f07cd07..7443b47 100644
--- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
+++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
@@ -322,6 +322,7 @@ get_session(struct aesni_mb_qp *qp, struct rte_crypto_op *op)
 			rte_mempool_put(qp->sess_mp, _sess);
 			sess = NULL;
 		}
+		op->sym->session = (struct rte_cryptodev_sym_session *)_sess;
 	}
 
 	return sess;
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 4/4] doc: add missing supported algos for AESNI MB PMD
From: Pablo de Lara @ 2016-12-19 17:29 UTC (permalink / raw)
  To: declan.doherty; +Cc: dev, Pablo de Lara
In-Reply-To: <1482168543-40289-1-git-send-email-pablo.de.lara.guarch@intel.com>

AESNI MB PMD supports SHA224-HMAC and SHA384-HMAC,
but the documentation was not updated with this.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
Acked-by: John McNamara <john.mcnamara@intel.com>
---
 doc/guides/cryptodevs/aesni_mb.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/guides/cryptodevs/aesni_mb.rst b/doc/guides/cryptodevs/aesni_mb.rst
index cb429d7..8b18eba 100644
--- a/doc/guides/cryptodevs/aesni_mb.rst
+++ b/doc/guides/cryptodevs/aesni_mb.rst
@@ -55,7 +55,9 @@ Cipher algorithms:
 Hash algorithms:
 
 * RTE_CRYPTO_HASH_SHA1_HMAC
+* RTE_CRYPTO_HASH_SHA224_HMAC
 * RTE_CRYPTO_HASH_SHA256_HMAC
+* RTE_CRYPTO_HASH_SHA384_HMAC
 * RTE_CRYPTO_HASH_SHA512_HMAC
 
 Limitations
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 3/4] crypto/aesni_mb: add single operation functionality
From: Pablo de Lara @ 2016-12-19 17:29 UTC (permalink / raw)
  To: declan.doherty; +Cc: dev, Pablo de Lara
In-Reply-To: <1482168543-40289-1-git-send-email-pablo.de.lara.guarch@intel.com>

Update driver to use new AESNI Multibuffer IPSec library single
operation functionality (cipher only and authentication only).
This patch also adds tests for this new feature.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 app/test/test_cryptodev.c                          | 34 ++++++++
 app/test/test_cryptodev_aes_test_vectors.h         | 36 +++++---
 app/test/test_cryptodev_hash_test_vectors.h        | 54 ++++++++----
 doc/guides/cryptodevs/aesni_mb.rst                 |  2 -
 doc/guides/rel_notes/release_17_02.rst             |  1 +
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c         | 95 ++++++++++++++++------
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h |  9 ++
 7 files changed, 172 insertions(+), 59 deletions(-)

diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c
index f1f3542..5895d99 100644
--- a/app/test/test_cryptodev.c
+++ b/app/test/test_cryptodev.c
@@ -1466,6 +1466,38 @@ test_AES_CBC_HMAC_SHA512_decrypt_perform(struct rte_cryptodev_sym_session *sess,
 }
 
 static int
+test_AES_cipheronly_mb_all(void)
+{
+	struct crypto_testsuite_params *ts_params = &testsuite_params;
+	int status;
+
+	status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+		ts_params->op_mpool, ts_params->valid_devs[0],
+		RTE_CRYPTODEV_AESNI_MB_PMD,
+		BLKCIPHER_AES_CIPHERONLY_TYPE);
+
+	TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_authonly_mb_all(void)
+{
+	struct crypto_testsuite_params *ts_params = &testsuite_params;
+	int status;
+
+	status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+		ts_params->op_mpool, ts_params->valid_devs[0],
+		RTE_CRYPTODEV_AESNI_MB_PMD,
+		BLKCIPHER_AUTHONLY_TYPE);
+
+	TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+	return TEST_SUCCESS;
+}
+
+static int
 test_AES_chain_mb_all(void)
 {
 	struct crypto_testsuite_params *ts_params = &testsuite_params;
@@ -6559,6 +6591,8 @@ static struct unit_test_suite cryptodev_aesni_mb_testsuite  = {
 	.teardown = testsuite_teardown,
 	.unit_test_cases = {
 		TEST_CASE_ST(ut_setup, ut_teardown, test_AES_chain_mb_all),
+		TEST_CASE_ST(ut_setup, ut_teardown, test_AES_cipheronly_mb_all),
+		TEST_CASE_ST(ut_setup, ut_teardown, test_authonly_mb_all),
 
 		TEST_CASES_END() /**< NULL terminate unit test array */
 	}
diff --git a/app/test/test_cryptodev_aes_test_vectors.h b/app/test/test_cryptodev_aes_test_vectors.h
index efbe7da..898aae1 100644
--- a/app/test/test_cryptodev_aes_test_vectors.h
+++ b/app/test/test_cryptodev_aes_test_vectors.h
@@ -1025,84 +1025,96 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
 		.test_data = &aes_test_data_4,
 		.op_mask = BLOCKCIPHER_TEST_OP_ENCRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "AES-128-CBC Decryption",
 		.test_data = &aes_test_data_4,
 		.op_mask = BLOCKCIPHER_TEST_OP_DECRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "AES-192-CBC Encryption",
 		.test_data = &aes_test_data_10,
 		.op_mask = BLOCKCIPHER_TEST_OP_ENCRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "AES-192-CBC Decryption",
 		.test_data = &aes_test_data_10,
 		.op_mask = BLOCKCIPHER_TEST_OP_DECRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "AES-256-CBC Encryption",
 		.test_data = &aes_test_data_11,
 		.op_mask = BLOCKCIPHER_TEST_OP_ENCRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "AES-256-CBC Decryption",
 		.test_data = &aes_test_data_11,
 		.op_mask = BLOCKCIPHER_TEST_OP_DECRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "AES-128-CTR Encryption",
 		.test_data = &aes_test_data_1,
 		.op_mask = BLOCKCIPHER_TEST_OP_ENCRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "AES-128-CTR Decryption",
 		.test_data = &aes_test_data_1,
 		.op_mask = BLOCKCIPHER_TEST_OP_DECRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "AES-192-CTR Encryption",
 		.test_data = &aes_test_data_2,
 		.op_mask = BLOCKCIPHER_TEST_OP_ENCRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "AES-192-CTR Decryption",
 		.test_data = &aes_test_data_2,
 		.op_mask = BLOCKCIPHER_TEST_OP_DECRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "AES-256-CTR Encryption",
 		.test_data = &aes_test_data_3,
 		.op_mask = BLOCKCIPHER_TEST_OP_ENCRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "AES-256-CTR Decryption",
 		.test_data = &aes_test_data_3,
 		.op_mask = BLOCKCIPHER_TEST_OP_DECRYPT,
 		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-			BLOCKCIPHER_TEST_TARGET_PMD_QAT
+			BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 };
 
diff --git a/app/test/test_cryptodev_hash_test_vectors.h b/app/test/test_cryptodev_hash_test_vectors.h
index 9f095cf..a8f9da0 100644
--- a/app/test/test_cryptodev_hash_test_vectors.h
+++ b/app/test/test_cryptodev_hash_test_vectors.h
@@ -97,7 +97,8 @@ hmac_md5_test_vector = {
 			0x50, 0xE8, 0xDE, 0xC5, 0xC1, 0x76, 0xAC, 0xAE,
 			0x15, 0x4A, 0xF1, 0x7F, 0x7E, 0x04, 0x42, 0x9B
 		},
-		.len = 16
+		.len = 16,
+		.truncated_len = 12
 	}
 };
 
@@ -139,7 +140,8 @@ hmac_sha1_test_vector = {
 			0x7E, 0x2E, 0x8F, 0xFC, 0x48, 0x39, 0x46, 0x17,
 			0x3F, 0x91, 0x64, 0x59
 		},
-		.len = 20
+		.len = 20,
+		.truncated_len = 12
 	}
 };
 
@@ -184,7 +186,8 @@ hmac_sha224_test_vector = {
 			0xF1, 0x8A, 0x63, 0xBB, 0x5D, 0x1D, 0xE3, 0x9F,
 			0x92, 0xF6, 0xAA, 0x19
 		},
-		.len = 28
+		.len = 28,
+		.truncated_len = 14
 	}
 };
 
@@ -229,7 +232,8 @@ hmac_sha256_test_vector = {
 			0x06, 0x4D, 0x64, 0x09, 0x0A, 0xCC, 0x02, 0x77,
 			0x71, 0x83, 0x48, 0x71, 0x07, 0x02, 0x25, 0x17
 		},
-		.len = 32
+		.len = 32,
+		.truncated_len = 16
 	}
 };
 
@@ -280,7 +284,8 @@ hmac_sha384_test_vector = {
 			0x10, 0x90, 0x0A, 0xE3, 0xF0, 0x59, 0xDD, 0xC0,
 			0x6F, 0xE6, 0x8C, 0x84, 0xD5, 0x03, 0xF8, 0x9E
 		},
-		.len = 48
+		.len = 48,
+		.truncated_len = 24
 	}
 };
 
@@ -337,7 +342,8 @@ hmac_sha512_test_vector = {
 			0x97, 0x37, 0x0F, 0xBE, 0xC2, 0x45, 0xA0, 0x87,
 			0xAF, 0x24, 0x27, 0x0C, 0x78, 0xBA, 0xBE, 0x20
 		},
-		.len = 64
+		.len = 64,
+		.truncated_len = 32
 	}
 };
 
@@ -358,13 +364,15 @@ static const struct blockcipher_test_case hash_test_cases[] = {
 		.test_descr = "HMAC-MD5 Digest",
 		.test_data = &hmac_md5_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "HMAC-MD5 Digest Verify",
 		.test_data = &hmac_md5_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "SHA1 Digest",
@@ -382,13 +390,15 @@ static const struct blockcipher_test_case hash_test_cases[] = {
 		.test_descr = "HMAC-SHA1 Digest",
 		.test_data = &hmac_sha1_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "HMAC-SHA1 Digest Verify",
 		.test_data = &hmac_sha1_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "SHA224 Digest",
@@ -406,13 +416,15 @@ static const struct blockcipher_test_case hash_test_cases[] = {
 		.test_descr = "HMAC-SHA224 Digest",
 		.test_data = &hmac_sha224_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "HMAC-SHA224 Digest Verify",
 		.test_data = &hmac_sha224_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "SHA256 Digest",
@@ -430,13 +442,15 @@ static const struct blockcipher_test_case hash_test_cases[] = {
 		.test_descr = "HMAC-SHA256 Digest",
 		.test_data = &hmac_sha256_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "HMAC-SHA256 Digest Verify",
 		.test_data = &hmac_sha256_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "SHA384 Digest",
@@ -454,13 +468,15 @@ static const struct blockcipher_test_case hash_test_cases[] = {
 		.test_descr = "HMAC-SHA384 Digest",
 		.test_data = &hmac_sha384_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "HMAC-SHA384 Digest Verify",
 		.test_data = &hmac_sha384_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "SHA512 Digest",
@@ -478,13 +494,15 @@ static const struct blockcipher_test_case hash_test_cases[] = {
 		.test_descr = "HMAC-SHA512 Digest",
 		.test_data = &hmac_sha512_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 	{
 		.test_descr = "HMAC-SHA512 Digest Verify",
 		.test_data = &hmac_sha512_test_vector,
 		.op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
-		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+		.pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+			BLOCKCIPHER_TEST_TARGET_PMD_MB
 	},
 };
 
diff --git a/doc/guides/cryptodevs/aesni_mb.rst b/doc/guides/cryptodevs/aesni_mb.rst
index b47cb6a..cb429d7 100644
--- a/doc/guides/cryptodevs/aesni_mb.rst
+++ b/doc/guides/cryptodevs/aesni_mb.rst
@@ -62,8 +62,6 @@ Limitations
 -----------
 
 * Chained mbufs are not supported.
-* Hash only is not supported.
-* Cipher only is not supported.
 * Only in-place is currently supported (destination address is the same as source address).
 * Only supports session-oriented API implementation (session-less APIs are not supported).
 
diff --git a/doc/guides/rel_notes/release_17_02.rst b/doc/guides/rel_notes/release_17_02.rst
index 4f666df..5aa8a94 100644
--- a/doc/guides/rel_notes/release_17_02.rst
+++ b/doc/guides/rel_notes/release_17_02.rst
@@ -49,6 +49,7 @@ New Features
 
   * The Intel(R) Multi Buffer Crypto for IPsec library used in
     AESNI MB PMD has been moved to a new repository, in github.
+  * Support for single operations (cipher only and authentication only).
 
 
 Resolved Issues
diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
index 7443b47..bafd4d7 100644
--- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
+++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
@@ -107,26 +107,27 @@ calculate_auth_precomputes(hash_one_block_t one_block_hash,
 }
 
 /** Get xform chain order */
-static int
+static enum aesni_mb_operation
 aesni_mb_get_chain_order(const struct rte_crypto_sym_xform *xform)
 {
-	/*
-	 * Multi-buffer only supports HASH_CIPHER or CIPHER_HASH chained
-	 * operations, all other options are invalid, so we must have exactly
-	 * 2 xform structs chained together
-	 */
-	if (xform->next == NULL || xform->next->next != NULL)
-		return -1;
-
-	if (xform->type == RTE_CRYPTO_SYM_XFORM_AUTH &&
-			xform->next->type == RTE_CRYPTO_SYM_XFORM_CIPHER)
-		return HASH_CIPHER;
+	if (xform == NULL)
+		return AESNI_MB_OP_NOT_SUPPORTED;
+
+	if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER) {
+		if (xform->next == NULL)
+			return AESNI_MB_OP_CIPHER_ONLY;
+		if (xform->next->type == RTE_CRYPTO_SYM_XFORM_AUTH)
+			return AESNI_MB_OP_CIPHER_HASH;
+	}
 
-	if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER &&
-				xform->next->type == RTE_CRYPTO_SYM_XFORM_AUTH)
-		return CIPHER_HASH;
+	if (xform->type == RTE_CRYPTO_SYM_XFORM_AUTH) {
+		if (xform->next == NULL)
+			return AESNI_MB_OP_HASH_ONLY;
+		if (xform->next->type == RTE_CRYPTO_SYM_XFORM_CIPHER)
+			return AESNI_MB_OP_HASH_CIPHER;
+	}
 
-	return -1;
+	return AESNI_MB_OP_NOT_SUPPORTED;
 }
 
 /** Set session authentication parameters */
@@ -137,11 +138,19 @@ aesni_mb_set_session_auth_parameters(const struct aesni_mb_ops *mb_ops,
 {
 	hash_one_block_t hash_oneblock_fn;
 
+	if (xform == NULL) {
+		sess->auth.algo = NULL_HASH;
+		return 0;
+	}
+
 	if (xform->type != RTE_CRYPTO_SYM_XFORM_AUTH) {
 		MB_LOG_ERR("Crypto xform struct not of type auth");
 		return -1;
 	}
 
+	/* Select auth generate/verify */
+	sess->auth.operation = xform->auth.op;
+
 	/* Set Authentication Parameters */
 	if (xform->auth.algo == RTE_CRYPTO_AUTH_AES_XCBC_MAC) {
 		sess->auth.algo = AES_XCBC;
@@ -199,6 +208,11 @@ aesni_mb_set_session_cipher_parameters(const struct aesni_mb_ops *mb_ops,
 {
 	aes_keyexp_t aes_keyexp_fn;
 
+	if (xform == NULL) {
+		sess->cipher.mode = NULL_CIPHER;
+		return 0;
+	}
+
 	if (xform->type != RTE_CRYPTO_SYM_XFORM_CIPHER) {
 		MB_LOG_ERR("Crypto xform struct not of type cipher");
 		return -1;
@@ -268,16 +282,36 @@ aesni_mb_set_session_parameters(const struct aesni_mb_ops *mb_ops,
 
 	/* Select Crypto operation - hash then cipher / cipher then hash */
 	switch (aesni_mb_get_chain_order(xform)) {
-	case HASH_CIPHER:
+	case AESNI_MB_OP_HASH_CIPHER:
 		sess->chain_order = HASH_CIPHER;
 		auth_xform = xform;
 		cipher_xform = xform->next;
 		break;
-	case CIPHER_HASH:
+	case AESNI_MB_OP_CIPHER_HASH:
 		sess->chain_order = CIPHER_HASH;
 		auth_xform = xform->next;
 		cipher_xform = xform;
 		break;
+	case AESNI_MB_OP_HASH_ONLY:
+		sess->chain_order = HASH_CIPHER;
+		auth_xform = xform;
+		cipher_xform = NULL;
+		break;
+	case AESNI_MB_OP_CIPHER_ONLY:
+		/*
+		 * Multi buffer library operates only at two modes,
+		 * CIPHER_HASH and HASH_CIPHER. When doing ciphering only,
+		 * chain order depends on cipher operation: encryption is always
+		 * the first operation and decryption the last one.
+		 */
+		if (xform->cipher.op == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
+			sess->chain_order = CIPHER_HASH;
+		else
+			sess->chain_order = HASH_CIPHER;
+		auth_xform = NULL;
+		cipher_xform = xform;
+		break;
+	case AESNI_MB_OP_NOT_SUPPORTED:
 	default:
 		MB_LOG_ERR("Unsupported operation chain order parameter");
 		return -1;
@@ -397,7 +431,8 @@ process_crypto_op(struct aesni_mb_qp *qp, struct rte_crypto_op *op,
 	}
 
 	/* Set digest output location */
-	if (job->cipher_direction == DECRYPT) {
+	if (job->hash_alg != NULL_HASH &&
+			session->auth.operation == RTE_CRYPTO_AUTH_OP_VERIFY) {
 		job->auth_tag_output = (uint8_t *)rte_pktmbuf_append(m_dst,
 				get_digest_byte_length(job->hash_alg));
 
@@ -459,6 +494,7 @@ post_process_mb_job(struct aesni_mb_qp *qp, JOB_AES_HMAC *job)
 			(struct rte_crypto_op *)job->user_data;
 	struct rte_mbuf *m_dst =
 			(struct rte_mbuf *)job->user_data2;
+	struct aesni_mb_session *sess;
 
 	if (op == NULL || m_dst == NULL)
 		return NULL;
@@ -470,14 +506,19 @@ post_process_mb_job(struct aesni_mb_qp *qp, JOB_AES_HMAC *job)
 	if (unlikely(job->status != STS_COMPLETED)) {
 		op->status = RTE_CRYPTO_OP_STATUS_ERROR;
 		return op;
-	} else if (job->chain_order == HASH_CIPHER) {
-		/* Verify digest if required */
-		if (memcmp(job->auth_tag_output, op->sym->auth.digest.data,
-				job->auth_tag_output_len_in_bytes) != 0)
-			op->status = RTE_CRYPTO_OP_STATUS_AUTH_FAILED;
-
-		/* trim area used for digest from mbuf */
-		rte_pktmbuf_trim(m_dst, get_digest_byte_length(job->hash_alg));
+	} else if (job->hash_alg != NULL_HASH) {
+		sess = (struct aesni_mb_session *)op->sym->session->_private;
+		if (sess->auth.operation == RTE_CRYPTO_AUTH_OP_VERIFY) {
+			/* Verify digest if required */
+			if (memcmp(job->auth_tag_output,
+					op->sym->auth.digest.data,
+					job->auth_tag_output_len_in_bytes) != 0)
+				op->status = RTE_CRYPTO_OP_STATUS_AUTH_FAILED;
+
+			/* trim area used for digest from mbuf */
+			rte_pktmbuf_trim(m_dst,
+					get_digest_byte_length(job->hash_alg));
+		}
 	}
 
 	/* Free session if a session-less crypto op */
diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h
index 17f367f..5f125b2 100644
--- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h
+++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h
@@ -125,6 +125,13 @@ get_digest_byte_length(JOB_HASH_ALG algo)
 	return auth_digest_byte_lengths[algo];
 }
 
+enum aesni_mb_operation {
+	AESNI_MB_OP_HASH_CIPHER,
+	AESNI_MB_OP_CIPHER_HASH,
+	AESNI_MB_OP_HASH_ONLY,
+	AESNI_MB_OP_CIPHER_ONLY,
+	AESNI_MB_OP_NOT_SUPPORTED
+};
 
 /** private data structure for each virtual AESNI device */
 struct aesni_mb_private {
@@ -185,6 +192,8 @@ struct aesni_mb_session {
 	/** Authentication Parameters */
 	struct {
 		JOB_HASH_ALG algo; /**< Authentication Algorithm */
+		enum rte_crypto_auth_operation operation;
+		/**< auth operation generate or verify */
 		union {
 			struct {
 				uint8_t inner[128] __rte_aligned(16);
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 2/4] doc: update AESNI MB PMD guide
From: Pablo de Lara @ 2016-12-19 17:29 UTC (permalink / raw)
  To: declan.doherty; +Cc: dev, Pablo de Lara
In-Reply-To: <1482168543-40289-1-git-send-email-pablo.de.lara.guarch@intel.com>

The Intel(R) Multi Buffer Crypto library used in the AESNI MB PMD
has been moved to a new repository, in github.
This patch updates the link where it can be downloaded.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
Acked-by: John McNamara <john.mcnamara@intel.com>
---
 doc/guides/cryptodevs/aesni_mb.rst     | 10 +++-------
 doc/guides/rel_notes/release_17_02.rst |  7 +++++++
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/doc/guides/cryptodevs/aesni_mb.rst b/doc/guides/cryptodevs/aesni_mb.rst
index e812e95..b47cb6a 100644
--- a/doc/guides/cryptodevs/aesni_mb.rst
+++ b/doc/guides/cryptodevs/aesni_mb.rst
@@ -66,21 +66,17 @@ Limitations
 * Cipher only is not supported.
 * Only in-place is currently supported (destination address is the same as source address).
 * Only supports session-oriented API implementation (session-less APIs are not supported).
-*  Not performance tuned.
 
 Installation
 ------------
 
 To build DPDK with the AESNI_MB_PMD the user is required to download the mult-
-buffer library from `here <https://downloadcenter.intel.com/download/22972>`_
-and compile it on their user system before building DPDK. When building the
-multi-buffer library it is necessary to have YASM package installed and also
-requires the overriding of YASM path when building, as a path is hard coded in
-the Makefile of the release package.
+buffer library from `here <https://github.com/01org/intel-ipsec-mb>`_
+and compile it on their user system before building DPDK.
 
 .. code-block:: console
 
-	make YASM=/usr/bin/yasm
+	make
 
 Initialization
 --------------
diff --git a/doc/guides/rel_notes/release_17_02.rst b/doc/guides/rel_notes/release_17_02.rst
index 873333b..4f666df 100644
--- a/doc/guides/rel_notes/release_17_02.rst
+++ b/doc/guides/rel_notes/release_17_02.rst
@@ -44,6 +44,13 @@ New Features
 
   * Scatter-gather list (SGL) support.
 
+
+* **Updated the AESNI MB PMD.**
+
+  * The Intel(R) Multi Buffer Crypto for IPsec library used in
+    AESNI MB PMD has been moved to a new repository, in github.
+
+
 Resolved Issues
 ---------------
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 0/4] AESNI MB PMD updates
From: Pablo de Lara @ 2016-12-19 17:28 UTC (permalink / raw)
  To: declan.doherty; +Cc: dev, Pablo de Lara
In-Reply-To: <1481817632-183082-1-git-send-email-pablo.de.lara.guarch@intel.com>

The library used in AESNI MB PMD, Intel Multi Buffer Crypto for IPsec,
has been migrated to a new location, in github (see documentation patch
for the link).

The library has also been updated, so single crypto operations
are supported (cipher and authentication only). Therefore, the PMD
has been updated to support these operations.

This patchset depends on patchset "Add scatter-gather list capability to
Intel QuickAssist Technology driver" (http://dpdk.org/ml/archives/dev/2016-November/050947.html)

Changes in v3:
- Included authentication operation in private session,
  since digest only has to be trimmed from buffer when digest is verified
- Fixed missing crypto session save in crypto operation when it is sessionless
- Made operation mode setting more clear (CIPHER_HASH, HASH_CIPHER)

Changes in v2:
- Fixed hash only tests, including truncated digest length

Pablo de Lara (4):
  crypto/aesni_mb: fix incorrect crypto session
  doc: update AESNI MB PMD guide
  crypto/aesni_mb: add single operation functionality
  doc: add missing supported algos for AESNI MB PMD

 app/test/test_cryptodev.c                          | 34 ++++++++
 app/test/test_cryptodev_aes_test_vectors.h         | 36 +++++---
 app/test/test_cryptodev_hash_test_vectors.h        | 54 ++++++++----
 doc/guides/cryptodevs/aesni_mb.rst                 | 14 ++--
 doc/guides/rel_notes/release_17_02.rst             |  8 ++
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c         | 96 ++++++++++++++++------
 drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h |  9 ++
 7 files changed, 185 insertions(+), 66 deletions(-)

-- 
2.7.4

^ permalink raw reply

* Re: [PATCH] gitignore: ignore top level build/ directory
From: Bruce Richardson @ 2016-12-19 17:14 UTC (permalink / raw)
  To: Mcnamara, John; +Cc: Thomas Monjalon, Baruch Siach, Yigit, Ferruh, dev@dpdk.org
In-Reply-To: <B27915DBBA3421428155699D51E4CFE2026891C2@IRSMSX103.ger.corp.intel.com>

On Mon, Dec 19, 2016 at 04:50:57PM +0000, Mcnamara, John wrote:
> 
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce Richardson
> > Sent: Monday, December 19, 2016 4:14 PM
> > To: Thomas Monjalon <thomas.monjalon@6wind.com>
> > Cc: Baruch Siach <baruch@tkos.co.il>; Yigit, Ferruh
> > <ferruh.yigit@intel.com>; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH] gitignore: ignore top level build/
> > directory
> > 
> > On Mon, Dec 19, 2016 at 03:05:20PM +0100, Thomas Monjalon wrote:
> > > 2016-12-13 12:02, Ferruh Yigit:
> > > > On 12/13/2016 11:48 AM, Baruch Siach wrote:
> > > > > RTE_OUTPUT defaults to build/.
> > > > >
> > > > > Signed-off-by: Baruch Siach <baruch@tkos.co.il>
> > > >
> > > > There is a similar patch:
> > > > http://dpdk.org/dev/patchwork/patch/11637/
> > > >
> > > > If you want you can review/comment that one too.
> > >
> > > Yes, sorry I've never commented above patch.
> > >
> > > I do not like filling .gitignore because I prefer seeing what is built
> > > or copied or whatever with "git status".
> > > What is really the benefit of .gitignore?
> > 
> > I take the opposite view. I only like to see files that I actually care
> > about in the git status. Any build artifacts should be ignored by git as
> > they are not files that it ever should track. That way doing a build does
> > not change the status of the repo as git sees it.
> 
> As a workaround I have the following in my .gitconfig:
> 
>     [core]
>     excludesfile = ~/.gitignore
> 
> Then I put the ignore rules in ~/.gitignore.
> 
> John
> 
Yes, I have something similar done, so this is not a problem for me
personally. I just find it strange that we don't make more use of
gitignore in DPDK. The file's name itself seems to imply that it should
be used to list out files that git should not track, and build output is
definitely one of those.

/Bruce

^ permalink raw reply

* Re: [PATCH v5 18/29] app/testpmd: use VFD APIs on i40e
From: Ferruh Yigit @ 2016-12-19 17:09 UTC (permalink / raw)
  To: dev; +Cc: Jingjing Wu, Helin Zhang, Wenzhuo Lu, Chen Jing D,
	Bernard Iremonger
In-Reply-To: <20161216190257.6921-19-ferruh.yigit@intel.com>

On 12/16/2016 7:02 PM, Ferruh Yigit wrote:
> From: Wenzhuo Lu <wenzhuo.lu@intel.com>
> 
> The new VF Daemon (VFD) APIs is implemented on i40e. Change
> testpmd code to use them, including VF MAC anti-spoofing,
> VF VLAN anti-spoofing, TX loopback, VF VLAN strip, VF VLAN
> insert.
> 
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> Signed-off-by: Chen Jing D(Mark) <jing.d.chen@intel.com>
> Signed-off-by: Bernard Iremonger <bernard.iremonger@intel.com>
> ---
>  app/test-pmd/Makefile  |   2 +
>  app/test-pmd/cmdline.c | 150 +++++++++++++++++++++++++++++++++++++++----------
>  2 files changed, 121 insertions(+), 31 deletions(-)
> 
> diff --git a/app/test-pmd/Makefile b/app/test-pmd/Makefile
> index 891b85a..a0c3366 100644
> --- a/app/test-pmd/Makefile
> +++ b/app/test-pmd/Makefile
> @@ -58,7 +58,9 @@ SRCS-y += csumonly.c
>  SRCS-y += icmpecho.c
>  SRCS-$(CONFIG_RTE_LIBRTE_IEEE1588) += ieee1588fwd.c
>  
> +_LDLIBS-y += --whole-archive

Hi Wenzhuo,

Following lines are required for shared library, but I guess above line
required because they cause problem with static library.
So, instead of adding above line, what do you think wrapping below lines
with ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),y) ?

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += -lrte_pmd_ixgbe
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += -lrte_pmd_i40e
>  
>  CFLAGS_cmdline.o := -D_GNU_SOURCE
>  
<...>

^ permalink raw reply

* Re: [PATCH v2] maintainers: update for qede PMD and bnx2x PMD
From: Thomas Monjalon @ 2016-12-19 16:54 UTC (permalink / raw)
  To: Rasesh Mody; +Cc: dev, Dept-EngDPDKDev
In-Reply-To: <1481138539-3563-1-git-send-email-Rasesh.Mody@cavium.com>

2016-12-07 11:22, Rasesh Mody:
> Following Cavium's acquisition of QLogic we need to update all the
> qlogic PMD maintainer's entries to point to our new e-mail addresses.
> Update driver's maintainers as they are no longer working for Cavium.
> 
> Thanks to Sony Chacko for his support and development of our various
> dpdk drivers.
> 
> Signed-off-by: Rasesh Mody <Rasesh.Mody@cavium.com>

Applied, thanks

^ permalink raw reply

* Re: [PATCH] gitignore: ignore top level build/ directory
From: Mcnamara, John @ 2016-12-19 16:50 UTC (permalink / raw)
  To: Richardson, Bruce, Thomas Monjalon
  Cc: Baruch Siach, Yigit, Ferruh, dev@dpdk.org
In-Reply-To: <20161219161422.GB166228@bricha3-MOBL3.ger.corp.intel.com>



> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Bruce Richardson
> Sent: Monday, December 19, 2016 4:14 PM
> To: Thomas Monjalon <thomas.monjalon@6wind.com>
> Cc: Baruch Siach <baruch@tkos.co.il>; Yigit, Ferruh
> <ferruh.yigit@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] gitignore: ignore top level build/
> directory
> 
> On Mon, Dec 19, 2016 at 03:05:20PM +0100, Thomas Monjalon wrote:
> > 2016-12-13 12:02, Ferruh Yigit:
> > > On 12/13/2016 11:48 AM, Baruch Siach wrote:
> > > > RTE_OUTPUT defaults to build/.
> > > >
> > > > Signed-off-by: Baruch Siach <baruch@tkos.co.il>
> > >
> > > There is a similar patch:
> > > http://dpdk.org/dev/patchwork/patch/11637/
> > >
> > > If you want you can review/comment that one too.
> >
> > Yes, sorry I've never commented above patch.
> >
> > I do not like filling .gitignore because I prefer seeing what is built
> > or copied or whatever with "git status".
> > What is really the benefit of .gitignore?
> 
> I take the opposite view. I only like to see files that I actually care
> about in the git status. Any build artifacts should be ignored by git as
> they are not files that it ever should track. That way doing a build does
> not change the status of the repo as git sees it.

As a workaround I have the following in my .gitconfig:

    [core]
    excludesfile = ~/.gitignore

Then I put the ignore rules in ~/.gitignore.

John

^ permalink raw reply

* Re: [PATCH] nfp: extend speed capabilities advertised
From: Ferruh Yigit @ 2016-12-19 16:43 UTC (permalink / raw)
  To: Alejandro Lucero; +Cc: dev
In-Reply-To: <CAD+H992J5mossfmWG1wq-1YE=12uAQq-R8STX-G42vv13--GBQ@mail.gmail.com>

On 12/19/2016 4:18 PM, Alejandro Lucero wrote:
> On Mon, Dec 19, 2016 at 3:05 PM, Ferruh Yigit <ferruh.yigit@intel.com>
> wrote:
> 
>> On 12/19/2016 3:02 PM, Alejandro Lucero wrote:
>>>
>>>
>>> On Mon, Dec 19, 2016 at 2:36 PM, Ferruh Yigit <ferruh.yigit@intel.com
>>> <mailto:ferruh.yigit@intel.com>> wrote:
>>>
>>>     Hi Alejandro,
>>>
>>>
>>> Hi,
>>>
>>>
>>>     On 12/19/2016 12:05 PM, Alejandro Lucero wrote:
>>>     > NFP supports more speeds than just 40 and 100GB, which were
>>>     > what was advertised before.
>>>     >
>>>     > Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com
>> <mailto:alejandro.lucero@netronome.com>>
>>>     > ---
>>>     >  drivers/net/nfp/nfp_net.c | 4 +++-
>>>     >  1 file changed, 3 insertions(+), 1 deletion(-)
>>>     >
>>>     > diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
>>>     > index 27afbfd..77015c4 100644
>>>     > --- a/drivers/net/nfp/nfp_net.c
>>>     > +++ b/drivers/net/nfp/nfp_net.c
>>>     > @@ -1077,7 +1077,9 @@ static void nfp_net_read_mac(struct
>> nfp_net_hw *hw)
>>>     >       dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ;
>>>     >       dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ;
>>>     >
>>>     > -     dev_info->speed_capa = ETH_LINK_SPEED_40G |
>> ETH_LINK_SPEED_100G;
>>>     > +     dev_info->speed_capa = ETH_SPEED_NUM_1G | ETH_LINK_SPEED_10G
>> |
>>>     > +                            ETH_SPEED_NUM_25G | ETH_SPEED_NUM_40G
>> |
>>>     > +                            ETH_SPEED_NUM_50G |
>> ETH_LINK_SPEED_100G;
>>>
>>>     Does all devices driver by this driver supports all these speeds?
>>>
>>>     I am aware of at least one exception to this, from previous patch
>> [1],
>>>     should we take that into account?
>>>
>>>
>>> So we have different NFP devices and different firmwares.
>>> NFP by design support all those speeds, but the PMD relies on the
>>> firmware for being able to know which is the current configured speed
>>> after link negotiation. PMD development was done with a specific
>>> firmware, and I was told to just report such speed by default. Last
>>> firmware versions give that speed info, but old firmware versions do not.
>>>
>>> So, all devices support such a speed range, indeed PMD works with any of
>>> them, but reported speed is always 40G with old firmware. This is a
>>> firmware limitation but we have to support old and new firmware.
>>
>> But this information to the application will be wrong for some (old) FW.
>> What do you think checking the FW version here and report capability
>> based on what FW supports?
>>
>>
> The driver advertises the right speed range supported. The problem is with
> the report about the current link speed configured.
> Maybe, is the right thing to do here to not report the current link speed
> because the driver really does not know about it?

Sorry, confused. Is it like following:

"
For new FW, there is no problem, it supports the range between 1G - 50G,
and reports correct current speed.

With old FW, device still can be set to speed range between 1G - 50G,
but it doesn't report current speed correct, and DPDK driver reports
back to application as device current speed is 40G, without really
knowing the current speed.
"

Thanks,
ferruh

> 
> If you agree with this, I'm afraid the just accepted patch about the link
> report needs to be modified.
> 
> 
> 
>>>
>>>
>>>
>>>     Also other than that exception, can you please confirm all other
>> devices
>>>     support all above speeds?
>>>
>>>     [1]
>>>     +       if ((NFD_CFG_MAJOR_VERSION_of(hw->ver) < 4) ||
>>>     +           ((NFD_CFG_MINOR_VERSION_of(hw->ver) == 4) &&
>>>     +           (NFD_CFG_MINOR_VERSION_of(hw->ver) == 0)))
>>>     +               link.link_speed = ETH_SPEED_NUM_40G;
>>>
>>>
>>>     >  }
>>>     >
>>>     >  static const uint32_t *
>>>     >
>>>
>>>
>>
>>

^ permalink raw reply

* Re: [PATCH] gitignore: ignore top level build/ directory
From: Thomas Monjalon @ 2016-12-19 16:38 UTC (permalink / raw)
  To: Bruce Richardson; +Cc: Baruch Siach, Ferruh Yigit, dev
In-Reply-To: <20161219161422.GB166228@bricha3-MOBL3.ger.corp.intel.com>

2016-12-19 16:14, Bruce Richardson:
> On Mon, Dec 19, 2016 at 03:05:20PM +0100, Thomas Monjalon wrote:
> > 2016-12-13 12:02, Ferruh Yigit:
> > > On 12/13/2016 11:48 AM, Baruch Siach wrote:
> > > > RTE_OUTPUT defaults to build/.
> > > > 
> > > > Signed-off-by: Baruch Siach <baruch@tkos.co.il>
> > > 
> > > There is a similar patch:
> > > http://dpdk.org/dev/patchwork/patch/11637/
> > > 
> > > If you want you can review/comment that one too.
> > 
> > Yes, sorry I've never commented above patch.
> > 
> > I do not like filling .gitignore because I prefer
> > seeing what is built or copied or whatever with "git status".
> > What is really the benefit of .gitignore?
> 
> I take the opposite view. I only like to see files that I actually care
> about in the git status. Any build artifacts should be ignored by git as
> they are not files that it ever should track. That way doing a build
> does not change the status of the repo as git sees it.

I use git status to see also the untracked files.
And build/ is just the default build directory.

^ permalink raw reply

* Re: [PATCH] nfp: extend speed capabilities advertised
From: Marc @ 2016-12-19 16:35 UTC (permalink / raw)
  To: Alejandro Lucero; +Cc: Ferruh Yigit, dev
In-Reply-To: <CAD+H992J5mossfmWG1wq-1YE=12uAQq-R8STX-G42vv13--GBQ@mail.gmail.com>

On 19 December 2016 at 17:18, Alejandro Lucero <
alejandro.lucero@netronome.com> wrote:

> On Mon, Dec 19, 2016 at 3:05 PM, Ferruh Yigit <ferruh.yigit@intel.com>
> wrote:
>
> > On 12/19/2016 3:02 PM, Alejandro Lucero wrote:
> > >
> > >
> > > On Mon, Dec 19, 2016 at 2:36 PM, Ferruh Yigit <ferruh.yigit@intel.com
> > > <mailto:ferruh.yigit@intel.com>> wrote:
> > >
> > >     Hi Alejandro,
> > >
> > >
> > > Hi,
> > >
> > >
> > >     On 12/19/2016 12:05 PM, Alejandro Lucero wrote:
> > >     > NFP supports more speeds than just 40 and 100GB, which were
> > >     > what was advertised before.
> > >     >
> > >     > Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com
> > <mailto:alejandro.lucero@netronome.com>>
> > >     > ---
> > >     >  drivers/net/nfp/nfp_net.c | 4 +++-
> > >     >  1 file changed, 3 insertions(+), 1 deletion(-)
> > >     >
> > >     > diff --git a/drivers/net/nfp/nfp_net.c
> b/drivers/net/nfp/nfp_net.c
> > >     > index 27afbfd..77015c4 100644
> > >     > --- a/drivers/net/nfp/nfp_net.c
> > >     > +++ b/drivers/net/nfp/nfp_net.c
> > >     > @@ -1077,7 +1077,9 @@ static void nfp_net_read_mac(struct
> > nfp_net_hw *hw)
> > >     >       dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ;
> > >     >       dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ;
> > >     >
> > >     > -     dev_info->speed_capa = ETH_LINK_SPEED_40G |
> > ETH_LINK_SPEED_100G;
> > >     > +     dev_info->speed_capa = ETH_SPEED_NUM_1G |
> ETH_LINK_SPEED_10G
> > |
> > >     > +                            ETH_SPEED_NUM_25G |
> ETH_SPEED_NUM_40G
> > |
> > >     > +                            ETH_SPEED_NUM_50G |
> > ETH_LINK_SPEED_100G;
> > >
> > >     Does all devices driver by this driver supports all these speeds?
> > >
> > >     I am aware of at least one exception to this, from previous patch
> > [1],
> > >     should we take that into account?
> > >
> > >
> > > So we have different NFP devices and different firmwares.
> > > NFP by design support all those speeds, but the PMD relies on the
> > > firmware for being able to know which is the current configured speed
> > > after link negotiation. PMD development was done with a specific
> > > firmware, and I was told to just report such speed by default. Last
> > > firmware versions give that speed info, but old firmware versions do
> not.
> > >
> > > So, all devices support such a speed range, indeed PMD works with any
> of
> > > them, but reported speed is always 40G with old firmware. This is a
> > > firmware limitation but we have to support old and new firmware.
> >
> > But this information to the application will be wrong for some (old) FW.
> > What do you think checking the FW version here and report capability
> > based on what FW supports?
> >
> >
> The driver advertises the right speed range supported. The problem is with
> the report about the current link speed configured.
> Maybe, is the right thing to do here to not report the current link speed
> because the driver really does not know about it?
>
> If you agree with this, I'm afraid the just accepted patch about the link
> report needs to be modified.
>

Alejandro,

If negociated link state has to be changed, then struct rte_eth_dev_data
dev_link field is where to do it.

As Ferruh was saying, dev_info->speed_capa contains the speed capabilties
of the particular NIC in use, not the driver (detecting firmware version
would be the best here).

marc


>
>
>
> > >
> > >
> > >
> > >     Also other than that exception, can you please confirm all other
> > devices
> > >     support all above speeds?
> > >
> > >     [1]
> > >     +       if ((NFD_CFG_MAJOR_VERSION_of(hw->ver) < 4) ||
> > >     +           ((NFD_CFG_MINOR_VERSION_of(hw->ver) == 4) &&
> > >     +           (NFD_CFG_MINOR_VERSION_of(hw->ver) == 0)))
> > >     +               link.link_speed = ETH_SPEED_NUM_40G;
> > >
> > >
> > >     >  }
> > >     >
> > >     >  static const uint32_t *
> > >     >
> > >
> > >
> >
> >
>

^ permalink raw reply

* [PATCH] doc: simplify L3fwd user guide examples
From: Pablo de Lara @ 2016-12-19 16:34 UTC (permalink / raw)
  To: john.mcnamara; +Cc: dev, Pablo de Lara

L3 Forwarding sample app user guides have some inconsistencies
between the example command line and the configuration table.
Also, they were showing too complicated configuration, using two
different NUMA nodes for two ports, which will probably lead
to performance drop due to use cross-socket channel.

This patch simplifies the configuration of these examples,
by using a single NUMA node and a single queue per port.

Signed-off-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
---
 doc/guides/sample_app_ug/l3_forward.rst            | 28 +++++--------
 .../sample_app_ug/l3_forward_access_ctrl.rst       | 47 ++++++++--------------
 doc/guides/sample_app_ug/l3_forward_virtual.rst    | 23 ++++-------
 3 files changed, 34 insertions(+), 64 deletions(-)

diff --git a/doc/guides/sample_app_ug/l3_forward.rst b/doc/guides/sample_app_ug/l3_forward.rst
index ab916b9..6a6b8fb 100644
--- a/doc/guides/sample_app_ug/l3_forward.rst
+++ b/doc/guides/sample_app_ug/l3_forward.rst
@@ -129,43 +129,33 @@ Where,
 
 * ``--parse-ptype:`` Optional, set to use software to analyze packet type. Without this option, hardware will check the packet type.
 
-For example, consider a dual processor socket platform where cores 0-7 and 16-23 appear on socket 0, while cores 8-15 and 24-31 appear on socket 1.
-Let's say that the programmer wants to use memory from both NUMA nodes, the platform has only two ports, one connected to each NUMA node,
-and the programmer wants to use two cores from each processor socket to do the packet processing.
+For example, consider a dual processor socket platform with 8 physical cores, where cores 0-7 and 16-23 appear on socket 0,
+while cores 8-15 and 24-31 appear on socket 1.
 
-To enable L3 forwarding between two ports, using two cores, cores 1 and 2, from each processor,
-while also taking advantage of local memory access by optimizing around NUMA, the programmer must enable two queues from each port,
-pin to the appropriate cores and allocate memory from the appropriate NUMA node. This is achieved using the following command:
+To enable L3 forwarding between two ports, assuming that both ports are in the same socket, using two cores, cores 1 and 2,
+(which are in the same socket too), use the following command:
 
 .. code-block:: console
 
-    ./build/l3fwd -c 606 -n 4 -- -p 0x3 --config="(0,0,1),(0,1,2),(1,0,9),(1,1,10)"
+    ./build/l3fwd -l 1,2 -n 4 -- -p 0x3 --config="(0,0,1),(1,0,2)"
 
 In this command:
 
-*   The -c option enables cores 0, 1, 2, 3
+*   The -l option enables cores 1, 2
 
 *   The -p option enables ports 0 and 1
 
-*   The --config option enables two queues on each port and maps each (port,queue) pair to a specific core.
-    Logic to enable multiple RX queues using RSS and to allocate memory from the correct NUMA nodes
-    is included in the application and is done transparently.
+*   The --config option enables one queue on each port and maps each (port,queue) pair to a specific core.
     The following table shows the mapping in this example:
 
 +----------+-----------+-----------+-------------------------------------+
 | **Port** | **Queue** | **lcore** | **Description**                     |
 |          |           |           |                                     |
 +----------+-----------+-----------+-------------------------------------+
-| 0        | 0         | 0         | Map queue 0 from port 0 to lcore 0. |
+| 0        | 0         | 1         | Map queue 0 from port 0 to lcore 1. |
 |          |           |           |                                     |
 +----------+-----------+-----------+-------------------------------------+
-| 0        | 1         | 2         | Map queue 1 from port 0 to lcore 2. |
-|          |           |           |                                     |
-+----------+-----------+-----------+-------------------------------------+
-| 1        | 0         | 1         | Map queue 0 from port 1 to lcore 1. |
-|          |           |           |                                     |
-+----------+-----------+-----------+-------------------------------------+
-| 1        | 1         | 3         | Map queue 1 from port 1 to lcore 3. |
+| 1        | 0         | 2         | Map queue 0 from port 1 to lcore 2. |
 |          |           |           |                                     |
 +----------+-----------+-----------+-------------------------------------+
 
diff --git a/doc/guides/sample_app_ug/l3_forward_access_ctrl.rst b/doc/guides/sample_app_ug/l3_forward_access_ctrl.rst
index 4049e01..3574a25 100644
--- a/doc/guides/sample_app_ug/l3_forward_access_ctrl.rst
+++ b/doc/guides/sample_app_ug/l3_forward_access_ctrl.rst
@@ -306,48 +306,35 @@ where,
 
 *   --no-numa: optional, disables numa awareness
 
-As an example, consider a dual processor socket platform where cores 0, 2, 4, 6, 8 and 10 appear on socket 0,
-while cores 1, 3, 5, 7, 9 and 11 appear on socket 1.
-Let's say that the user wants to use memory from both NUMA nodes,
-the platform has only two ports and the user wants to use two cores from each processor socket to do the packet processing.
+For example, consider a dual processor socket platform with 8 physical cores, where cores 0-7 and 16-23 appear on socket 0,
+while cores 8-15 and 24-31 appear on socket 1.
 
-To enable L3 forwarding between two ports, using two cores from each processor,
-while also taking advantage of local memory access by optimizing around NUMA,
-the user must enable two queues from each port,
-pin to the appropriate cores and allocate memory from the appropriate NUMA node.
-This is achieved using the following command:
+To enable L3 forwarding between two ports, assuming that both ports are in the same socket, using two cores, cores 1 and 2,
+(which are in the same socket too), use the following command:
 
 ..  code-block:: console
 
-    ./build/l3fwd-acl -c f -n 4 -- -p 0x3 --config="(0,0,0),(0,1,2),(1,0,1),(1,1,3)" --rule_ipv4="./rule_ipv4.db" -- rule_ipv6="./rule_ipv6.db" --scalar
+    ./build/l3fwd-acl -l 1,2 -n 4 -- -p 0x3 --config="(0,0,1),(1,0,2)" --rule_ipv4="./rule_ipv4.db" -- rule_ipv6="./rule_ipv6.db" --scalar
 
 In this command:
 
-*   The -c option enables cores 0, 1, 2, 3
+*   The -c option enables cores 1, 2
 
 *   The -p option enables ports 0 and 1
 
-*   The --config option enables two queues on each port and maps each (port,queue) pair to a specific core.
-    Logic to enable multiple RX queues using RSS and to allocate memory from the correct NUMA nodes is included in the application
-    and is done transparently.
+*   The --config option enables one queue on each port and maps each (port,queue) pair to a specific core.
     The following table shows the mapping in this example:
 
-    +----------+------------+-----------+------------------------------------------------+
-    | **Port** | **Queue**  | **lcore** |            **Description**                     |
-    |          |            |           |                                                |
-    +==========+============+===========+================================================+
-    | 0        | 0          | 0         | Map queue 0 from port 0 to lcore 0.            |
-    |          |            |           |                                                |
-    +----------+------------+-----------+------------------------------------------------+
-    | 0        | 1          | 2         | Map queue 1 from port 0 to lcore 2.            |
-    |          |            |           |                                                |
-    +----------+------------+-----------+------------------------------------------------+
-    | 1        | 0          | 1         | Map queue 0 from port 1 to lcore 1.            |
-    |          |            |           |                                                |
-    +----------+------------+-----------+------------------------------------------------+
-    | 1        | 1          | 3         | Map queue 1 from port 1 to lcore 3.            |
-    |          |            |           |                                                |
-    +----------+------------+-----------+------------------------------------------------+
+    +----------+------------+-----------+-------------------------------------+
+    | **Port** | **Queue**  | **lcore** |            **Description**          |
+    |          |            |           |                                     |
+    +==========+============+===========+=====================================+
+    | 0        | 0          | 1         | Map queue 0 from port 0 to lcore 1. |
+    |          |            |           |                                     |
+    +----------+------------+-----------+-------------------------------------+
+    | 1        | 0          | 2         | Map queue 0 from port 1 to lcore 2. |
+    |          |            |           |                                     |
+    +----------+------------+-----------+-------------------------------------+
 
 *   The --rule_ipv4 option specifies the reading of IPv4 rules sets from the ./ rule_ipv4.db file.
 
diff --git a/doc/guides/sample_app_ug/l3_forward_virtual.rst b/doc/guides/sample_app_ug/l3_forward_virtual.rst
index fa04722..5f9d894 100644
--- a/doc/guides/sample_app_ug/l3_forward_virtual.rst
+++ b/doc/guides/sample_app_ug/l3_forward_virtual.rst
@@ -110,40 +110,33 @@ where,
 
 *   --no-numa: optional, disables numa awareness
 
-For example, consider a dual processor socket platform where cores 0,2,4,6, 8, and 10 appear on socket 0,
-while cores 1,3,5,7,9, and 11 appear on socket 1.
-Let's say that the programmer wants to use memory from both NUMA nodes,
-the platform has only two ports and the programmer wants to use one core from each processor socket to do the packet processing
-since only one Rx/Tx queue pair can be used in virtualization mode.
+For example, consider a dual processor socket platform with 8 physical cores, where cores 0-7 and 16-23 appear on socket 0,
+while cores 8-15 and 24-31 appear on socket 1.
 
-To enable L3 forwarding between two ports, using one core from each processor,
-while also taking advantage of local memory accesses by optimizing around NUMA,
-the programmer can pin to the appropriate cores and allocate memory from the appropriate NUMA node.
-This is achieved using the following command:
+To enable L3 forwarding between two ports, assuming that both ports are in the same socket, using two cores, cores 1 and 2,
+(which are in the same socket too), use the following command:
 
 .. code-block:: console
 
-   ./build/l3fwd-vf -c 0x03 -n 3 -- -p 0x3 --config="(0,0,0),(1,0,1)"
+   ./build/l3fwd-vf -l 1,2 -n 4 -- -p 0x3 --config="(0,0,1),(1,0,2)"
 
 In this command:
 
-*   The -c option enables cores 0 and 1
+*   The -l option enables cores 1 and 2
 
 *   The -p option enables ports 0 and 1
 
 *   The --config option enables one queue on each port and maps each (port,queue) pair to a specific core.
-    Logic to enable multiple RX queues using RSS and to allocate memory from the correct NUMA nodes
-    is included in the application and is done transparently.
     The following table shows the mapping in this example:
 
     +----------+-----------+-----------+------------------------------------+
     | **Port** | **Queue** | **lcore** | **Description**                    |
     |          |           |           |                                    |
     +==========+===========+===========+====================================+
-    | 0        | 0         | 0         | Map queue 0 from port 0 to lcore 0 |
+    | 0        | 0         | 1         | Map queue 0 from port 0 to lcore 1 |
     |          |           |           |                                    |
     +----------+-----------+-----------+------------------------------------+
-    | 1        | 1         | 1         | Map queue 0 from port 1 to lcore 1 |
+    | 1        | 0         | 2         | Map queue 0 from port 1 to lcore 2 |
     |          |           |           |                                    |
     +----------+-----------+-----------+------------------------------------+
 
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH] nfp: extend speed capabilities advertised
From: Alejandro Lucero @ 2016-12-19 16:18 UTC (permalink / raw)
  To: Ferruh Yigit; +Cc: dev
In-Reply-To: <3dd84942-5c7b-5f3f-2760-a71f97dfcd6a@intel.com>

On Mon, Dec 19, 2016 at 3:05 PM, Ferruh Yigit <ferruh.yigit@intel.com>
wrote:

> On 12/19/2016 3:02 PM, Alejandro Lucero wrote:
> >
> >
> > On Mon, Dec 19, 2016 at 2:36 PM, Ferruh Yigit <ferruh.yigit@intel.com
> > <mailto:ferruh.yigit@intel.com>> wrote:
> >
> >     Hi Alejandro,
> >
> >
> > Hi,
> >
> >
> >     On 12/19/2016 12:05 PM, Alejandro Lucero wrote:
> >     > NFP supports more speeds than just 40 and 100GB, which were
> >     > what was advertised before.
> >     >
> >     > Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com
> <mailto:alejandro.lucero@netronome.com>>
> >     > ---
> >     >  drivers/net/nfp/nfp_net.c | 4 +++-
> >     >  1 file changed, 3 insertions(+), 1 deletion(-)
> >     >
> >     > diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
> >     > index 27afbfd..77015c4 100644
> >     > --- a/drivers/net/nfp/nfp_net.c
> >     > +++ b/drivers/net/nfp/nfp_net.c
> >     > @@ -1077,7 +1077,9 @@ static void nfp_net_read_mac(struct
> nfp_net_hw *hw)
> >     >       dev_info->reta_size = NFP_NET_CFG_RSS_ITBL_SZ;
> >     >       dev_info->hash_key_size = NFP_NET_CFG_RSS_KEY_SZ;
> >     >
> >     > -     dev_info->speed_capa = ETH_LINK_SPEED_40G |
> ETH_LINK_SPEED_100G;
> >     > +     dev_info->speed_capa = ETH_SPEED_NUM_1G | ETH_LINK_SPEED_10G
> |
> >     > +                            ETH_SPEED_NUM_25G | ETH_SPEED_NUM_40G
> |
> >     > +                            ETH_SPEED_NUM_50G |
> ETH_LINK_SPEED_100G;
> >
> >     Does all devices driver by this driver supports all these speeds?
> >
> >     I am aware of at least one exception to this, from previous patch
> [1],
> >     should we take that into account?
> >
> >
> > So we have different NFP devices and different firmwares.
> > NFP by design support all those speeds, but the PMD relies on the
> > firmware for being able to know which is the current configured speed
> > after link negotiation. PMD development was done with a specific
> > firmware, and I was told to just report such speed by default. Last
> > firmware versions give that speed info, but old firmware versions do not.
> >
> > So, all devices support such a speed range, indeed PMD works with any of
> > them, but reported speed is always 40G with old firmware. This is a
> > firmware limitation but we have to support old and new firmware.
>
> But this information to the application will be wrong for some (old) FW.
> What do you think checking the FW version here and report capability
> based on what FW supports?
>
>
The driver advertises the right speed range supported. The problem is with
the report about the current link speed configured.
Maybe, is the right thing to do here to not report the current link speed
because the driver really does not know about it?

If you agree with this, I'm afraid the just accepted patch about the link
report needs to be modified.



> >
> >
> >
> >     Also other than that exception, can you please confirm all other
> devices
> >     support all above speeds?
> >
> >     [1]
> >     +       if ((NFD_CFG_MAJOR_VERSION_of(hw->ver) < 4) ||
> >     +           ((NFD_CFG_MINOR_VERSION_of(hw->ver) == 4) &&
> >     +           (NFD_CFG_MINOR_VERSION_of(hw->ver) == 0)))
> >     +               link.link_speed = ETH_SPEED_NUM_40G;
> >
> >
> >     >  }
> >     >
> >     >  static const uint32_t *
> >     >
> >
> >
>
>

^ permalink raw reply

* Re: [PATCHv2 01/34] lib/ether: add rte_device in rte_eth_dev
From: Stephen Hemminger @ 2016-12-19 16:16 UTC (permalink / raw)
  To: Hemant Agrawal
  Cc: dev, thomas.monjalon, bruce.richardson, shreyansh.jain,
	john.mcnamara, ferruh.yigit, jerin.jacob
In-Reply-To: <1482180853-18823-2-git-send-email-hemant.agrawal@nxp.com>

On Tue, 20 Dec 2016 02:23:40 +0530
Hemant Agrawal <hemant.agrawal@nxp.com> wrote:

> Signed-off-by: Hemant Agrawal <hemant.agrawal@nxp.com>
> ---
>  lib/librte_ether/rte_ethdev.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 9678179..0b601e9 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -1626,6 +1626,7 @@ struct rte_eth_dev {
>  	eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
>  	eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
>  	struct rte_eth_dev_data *data;  /**< Pointer to device data */
> +	struct rte_device *device;
>  	const struct eth_driver *driver;/**< Driver for this device */
>  	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
>  	struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */

NAK
I would rather that rte_pci_device be eliminated from rte_eth_dev_data and
replace by more generic rte_device. I am working on a patch set to do this,
it is not fundamentally hard.

^ permalink raw reply

* Re: [PATCH] gitignore: ignore top level build/ directory
From: Bruce Richardson @ 2016-12-19 16:14 UTC (permalink / raw)
  To: Thomas Monjalon; +Cc: Baruch Siach, Ferruh Yigit, dev
In-Reply-To: <1984848.mjZxypNVZ9@xps13>

On Mon, Dec 19, 2016 at 03:05:20PM +0100, Thomas Monjalon wrote:
> 2016-12-13 12:02, Ferruh Yigit:
> > On 12/13/2016 11:48 AM, Baruch Siach wrote:
> > > RTE_OUTPUT defaults to build/.
> > > 
> > > Signed-off-by: Baruch Siach <baruch@tkos.co.il>
> > 
> > There is a similar patch:
> > http://dpdk.org/dev/patchwork/patch/11637/
> > 
> > If you want you can review/comment that one too.
> 
> Yes, sorry I've never commented above patch.
> 
> I do not like filling .gitignore because I prefer
> seeing what is built or copied or whatever with "git status".
> What is really the benefit of .gitignore?

I take the opposite view. I only like to see files that I actually care
about in the git status. Any build artifacts should be ignored by git as
they are not files that it ever should track. That way doing a build
does not change the status of the repo as git sees it.

/Bruce

^ permalink raw reply

* [PATCH] nfp: avoid modulo operations for handling ring wrapping
From: Alejandro Lucero @ 2016-12-19 16:13 UTC (permalink / raw)
  To: dev

Having those modulo operations implies costly instructions execution,
what can be avoided with conditionals and unlikely clauses.

This change makes the software ring read and write indexes to be now
always within the ring size which has to be handled properly. The main
problem is when write pointer wraps and being less than the read pointer.
This happened before, but just with indexes type size (uint32_t) wrapping,
and in that case the processor does the right thing no requiring special
hanling by software.

This work has also led to discovering redundant pointers in the driver,
which have been removed.

Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
---
 drivers/net/nfp/nfp_net.c     | 66 +++++++++++++++++++++----------------------
 drivers/net/nfp/nfp_net_pmd.h |  5 +---
 2 files changed, 33 insertions(+), 38 deletions(-)

diff --git a/drivers/net/nfp/nfp_net.c b/drivers/net/nfp/nfp_net.c
index be0fefa..0e6bf4c 100644
--- a/drivers/net/nfp/nfp_net.c
+++ b/drivers/net/nfp/nfp_net.c
@@ -308,7 +308,6 @@ enum nfp_qcp_ptr {
 nfp_net_reset_rx_queue(struct nfp_net_rxq *rxq)
 {
 	nfp_net_rx_queue_release_mbufs(rxq);
-	rxq->wr_p = 0;
 	rxq->rd_p = 0;
 	rxq->nb_rx_hold = 0;
 }
@@ -347,8 +346,6 @@ enum nfp_qcp_ptr {
 	nfp_net_tx_queue_release_mbufs(txq);
 	txq->wr_p = 0;
 	txq->rd_p = 0;
-	txq->tail = 0;
-	txq->qcp_rd_p = 0;
 }
 
 static int
@@ -1114,8 +1111,7 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 		return 0;
 	}
 
-	idx = rxq->rd_p % rxq->rx_count;
-	rxds = &rxq->rxds[idx];
+	idx = rxq->rd_p;
 
 	count = 0;
 
@@ -1414,8 +1410,6 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 		rxd->fld.dma_addr_lo = dma_addr & 0xffffffff;
 		rxe[i].mbuf = mbuf;
 		PMD_RX_LOG(DEBUG, "[%d]: %" PRIx64 "\n", i, dma_addr);
-
-		rxq->wr_p++;
 	}
 
 	/* Make sure all writes are flushed before telling the hardware */
@@ -1499,7 +1493,6 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 	}
 
 	txq->tx_count = nb_desc;
-	txq->tail = 0;
 	txq->tx_free_thresh = tx_free_thresh;
 	txq->tx_pthresh = tx_conf->tx_thresh.pthresh;
 	txq->tx_hthresh = tx_conf->tx_thresh.hthresh;
@@ -1691,7 +1684,6 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 	struct nfp_net_hw *hw;
 	struct rte_mbuf *mb;
 	struct rte_mbuf *new_mb;
-	int idx;
 	uint16_t nb_hold;
 	uint64_t dma_addr;
 	int avail;
@@ -1711,9 +1703,7 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 	nb_hold = 0;
 
 	while (avail < nb_pkts) {
-		idx = rxq->rd_p % rxq->rx_count;
-
-		rxb = &rxq->rxbufs[idx];
+		rxb = &rxq->rxbufs[rxq->rd_p];
 		if (unlikely(rxb == NULL)) {
 			RTE_LOG_DP(ERR, PMD, "rxb does not exist!\n");
 			break;
@@ -1725,7 +1715,7 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 		 */
 		rte_rmb();
 
-		rxds = &rxq->rxds[idx];
+		rxds = &rxq->rxds[rxq->rd_p];
 		if ((rxds->rxd.meta_len_dd & PCIE_DESC_RX_DD) == 0)
 			break;
 
@@ -1813,6 +1803,8 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 		rxds->fld.dma_addr_lo = dma_addr & 0xffffffff;
 
 		rxq->rd_p++;
+		if (unlikely(rxq->rd_p == rxq->rx_count)) /* wrapping?*/
+			rxq->rd_p = 0;
 	}
 
 	if (nb_hold == 0)
@@ -1858,33 +1850,40 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
 	/* Work out how many packets have been sent */
 	qcp_rd_p = nfp_qcp_read(txq->qcp_q, NFP_QCP_READ_PTR);
 
-	if (qcp_rd_p == txq->qcp_rd_p) {
+	if (qcp_rd_p == txq->rd_p) {
 		PMD_TX_LOG(DEBUG, "queue %u: It seems harrier is not sending "
 			   "packets (%u, %u)\n", txq->qidx,
-			   qcp_rd_p, txq->qcp_rd_p);
+			   qcp_rd_p, txq->rd_p);
 		return 0;
 	}
 
-	if (qcp_rd_p > txq->qcp_rd_p)
-		todo = qcp_rd_p - txq->qcp_rd_p;
+	if (qcp_rd_p > txq->rd_p)
+		todo = qcp_rd_p - txq->rd_p;
 	else
-		todo = qcp_rd_p + txq->tx_count - txq->qcp_rd_p;
+		todo = qcp_rd_p + txq->tx_count - txq->rd_p;
 
-	PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->qcp_rd_p: %u, qcp->rd_p: %u\n",
-		   qcp_rd_p, txq->qcp_rd_p, txq->rd_p);
+	PMD_TX_LOG(DEBUG, "qcp_rd_p %u, txq->rd_p: %u, qcp->rd_p: %u\n",
+		   qcp_rd_p, txq->rd_p, txq->rd_p);
 
 	if (todo == 0)
 		return todo;
 
-	txq->qcp_rd_p += todo;
-	txq->qcp_rd_p %= txq->tx_count;
 	txq->rd_p += todo;
+	if (unlikely(txq->rd_p >= txq->tx_count))
+		txq->rd_p -= txq->tx_count;
 
 	return todo;
 }
 
 /* Leaving always free descriptors for avoiding wrapping confusion */
-#define NFP_FREE_TX_DESC(t) (t->tx_count - (t->wr_p - t->rd_p) - 8)
+static inline
+uint32_t nfp_free_tx_desc(struct nfp_net_txq *txq)
+{
+	if (txq->wr_p >= txq->rd_p)
+		return txq->tx_count - (txq->wr_p - txq->rd_p) - 8;
+	else
+		return txq->rd_p - txq->wr_p - 8;
+}
 
 /*
  * nfp_net_txq_full - Check if the TX queue free descriptors
@@ -1895,9 +1894,9 @@ static void nfp_net_read_mac(struct nfp_net_hw *hw)
  * This function uses the host copy* of read/write pointers
  */
 static inline
-int nfp_net_txq_full(struct nfp_net_txq *txq)
+uint32_t nfp_net_txq_full(struct nfp_net_txq *txq)
 {
-	return NFP_FREE_TX_DESC(txq) < txq->tx_free_thresh;
+	return (nfp_free_tx_desc(txq) < txq->tx_free_thresh);
 }
 
 static uint16_t
@@ -1915,15 +1914,15 @@ int nfp_net_txq_full(struct nfp_net_txq *txq)
 
 	txq = tx_queue;
 	hw = txq->hw;
-	txds = &txq->txds[txq->tail];
+	txds = &txq->txds[txq->wr_p];
 
 	PMD_TX_LOG(DEBUG, "working for queue %u at pos %d and %u packets\n",
-		   txq->qidx, txq->tail, nb_pkts);
+		   txq->qidx, txq->wr_p, nb_pkts);
 
-	if ((NFP_FREE_TX_DESC(txq) < nb_pkts) || (nfp_net_txq_full(txq)))
+	if ((nfp_free_tx_desc(txq) < nb_pkts) || (nfp_net_txq_full(txq)))
 		nfp_net_tx_free_bufs(txq);
 
-	free_descs = (uint16_t)NFP_FREE_TX_DESC(txq);
+	free_descs = (uint16_t)nfp_free_tx_desc(txq);
 	if (unlikely(free_descs == 0))
 		return 0;
 
@@ -1936,7 +1935,7 @@ int nfp_net_txq_full(struct nfp_net_txq *txq)
 	/* Sending packets */
 	while ((i < nb_pkts) && free_descs) {
 		/* Grabbing the mbuf linked to the current descriptor */
-		lmbuf = &txq->txbufs[txq->tail].mbuf;
+		lmbuf = &txq->txbufs[txq->wr_p].mbuf;
 		/* Warming the cache for releasing the mbuf later on */
 		RTE_MBUF_PREFETCH_TO_FREE(*lmbuf);
 
@@ -1998,9 +1997,8 @@ int nfp_net_txq_full(struct nfp_net_txq *txq)
 			free_descs--;
 
 			txq->wr_p++;
-			txq->tail++;
-			if (unlikely(txq->tail == txq->tx_count)) /* wrapping?*/
-				txq->tail = 0;
+			if (unlikely(txq->wr_p == txq->tx_count)) /* wrapping?*/
+				txq->wr_p = 0;
 
 			pkt_size -= dma_size;
 			if (!pkt_size) {
@@ -2011,7 +2009,7 @@ int nfp_net_txq_full(struct nfp_net_txq *txq)
 				pkt = pkt->next;
 			}
 			/* Referencing next free TX descriptor */
-			txds = &txq->txds[txq->tail];
+			txds = &txq->txds[txq->wr_p];
 			issued_descs++;
 		}
 		i++;
diff --git a/drivers/net/nfp/nfp_net_pmd.h b/drivers/net/nfp/nfp_net_pmd.h
index dc70d57..4df2275 100644
--- a/drivers/net/nfp/nfp_net_pmd.h
+++ b/drivers/net/nfp/nfp_net_pmd.h
@@ -216,12 +216,10 @@ struct nfp_net_txq {
 
 	uint32_t wr_p;
 	uint32_t rd_p;
-	uint32_t qcp_rd_p;
 
 	uint32_t tx_count;
 
 	uint32_t tx_free_thresh;
-	uint32_t tail;
 
 	/*
 	 * For each descriptor keep a reference to the mbuff and
@@ -240,7 +238,7 @@ struct nfp_net_txq {
 	struct nfp_net_tx_desc *txds;
 
 	/*
-	 * At this point 56 bytes have been used for all the fields in the
+	 * At this point 48 bytes have been used for all the fields in the
 	 * TX critical path. We have room for 8 bytes and still all placed
 	 * in a cache line. We are not using the threshold values below nor
 	 * the txq_flags but if we need to, we can add the most used in the
@@ -326,7 +324,6 @@ struct nfp_net_rxq {
 	 * freelist descriptors and @rd_p is where the driver start
 	 * reading descriptors for newly arrive packets from.
 	 */
-	uint32_t wr_p;
 	uint32_t rd_p;
 
 	/*
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH v3 2/6] eventdev: define southbound driver interface
From: Bruce Richardson @ 2016-12-19 15:50 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: dev, thomas.monjalon, hemant.agrawal, gage.eads, harry.van.haaren
In-Reply-To: <1482070895-32491-3-git-send-email-jerin.jacob@caviumnetworks.com>

On Sun, Dec 18, 2016 at 07:51:31PM +0530, Jerin Jacob wrote:
> Signed-off-by: Jerin Jacob <jerin.jacob@caviumnetworks.com>
> ---
>  lib/librte_eventdev/rte_eventdev.h     |  38 +++++
>  lib/librte_eventdev/rte_eventdev_pmd.h | 291 +++++++++++++++++++++++++++++++++
>  2 files changed, 329 insertions(+)
>  create mode 100644 lib/librte_eventdev/rte_eventdev_pmd.h
> 
<snip>
> +
> +/**
> + * Release resources allocated by given event queue.
> + *
> + * @param queue_id
> + *   Event queue index
> + *
> + */
> +typedef void (*eventdev_queue_release_t)(uint8_t queue_id);
> +

Missing an eventdev pointer here too, otherwise you can't use the queue
index to find the queue.

/Bruce

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox