All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stephen Hemminger <stephen@networkplumber.org>
To: dev@dpdk.org
Cc: Stephen Hemminger <stephen@networkplumber.org>
Subject: [dpdk-dev] [RFC 5/8] pdump: add classic BPF filtering
Date: Mon,  7 Oct 2019 09:52:29 -0700	[thread overview]
Message-ID: <20191007165232.14535-6-stephen@networkplumber.org> (raw)
In-Reply-To: <20191007165232.14535-1-stephen@networkplumber.org>

Simple classic BPF interpreter based off of libpcap.

This is a copy of the BPF interpreter from libpcap which is
modified to handle mbuf meta data. The existing pcap_offline_filter
does not expose a way to match VLAN tags. Copying the BPF interpreter
also means that rte_pdump still does not have a hard dependency
on libpcap.

The API for pdump is versioned because the filter needs to
know both byte code and length of program to validate it.

This patch does cause a small checkpatch warning because
it keeps the original variable names from the pcap code.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 app/pdump/main.c                       |  16 +-
 app/test/test_pdump.c                  |   4 +-
 lib/librte_pdump/Makefile              |   2 +-
 lib/librte_pdump/pdump_bpf.h           | 168 +++++++++
 lib/librte_pdump/rte_pcap_filter.c     | 462 +++++++++++++++++++++++++
 lib/librte_pdump/rte_pdump.c           | 145 ++++++--
 lib/librte_pdump/rte_pdump.h           |  54 ++-
 lib/librte_pdump/rte_pdump_version.map |   7 +
 8 files changed, 806 insertions(+), 52 deletions(-)
 create mode 100644 lib/librte_pdump/pdump_bpf.h
 create mode 100644 lib/librte_pdump/rte_pcap_filter.c

diff --git a/app/pdump/main.c b/app/pdump/main.c
index c1b901279f4b..c3eb554ef28b 100644
--- a/app/pdump/main.c
+++ b/app/pdump/main.c
@@ -828,20 +828,20 @@ enable_pdump(void)
 						pt->queue,
 						RTE_PDUMP_FLAG_RX,
 						pt->rx_ring,
-						pt->mp, NULL);
+						pt->mp, NULL, 0);
 				ret1 = rte_pdump_enable_by_deviceid(
 						pt->device_id,
 						pt->queue,
 						RTE_PDUMP_FLAG_TX,
 						pt->tx_ring,
-						pt->mp, NULL);
+						pt->mp, NULL, 0);
 			} else if (pt->dump_by_type == PORT_ID) {
 				ret = rte_pdump_enable(pt->port, pt->queue,
 						RTE_PDUMP_FLAG_RX,
-						pt->rx_ring, pt->mp, NULL);
+						pt->rx_ring, pt->mp, NULL, 0);
 				ret1 = rte_pdump_enable(pt->port, pt->queue,
 						RTE_PDUMP_FLAG_TX,
-						pt->tx_ring, pt->mp, NULL);
+						pt->tx_ring, pt->mp, NULL, 0);
 			}
 		} else if (pt->dir == RTE_PDUMP_FLAG_RX) {
 			if (pt->dump_by_type == DEVICE_ID)
@@ -849,22 +849,22 @@ enable_pdump(void)
 						pt->device_id,
 						pt->queue,
 						pt->dir, pt->rx_ring,
-						pt->mp, NULL);
+						pt->mp, NULL, 0);
 			else if (pt->dump_by_type == PORT_ID)
 				ret = rte_pdump_enable(pt->port, pt->queue,
 						pt->dir,
-						pt->rx_ring, pt->mp, NULL);
+						pt->rx_ring, pt->mp, NULL, 0);
 		} else if (pt->dir == RTE_PDUMP_FLAG_TX) {
 			if (pt->dump_by_type == DEVICE_ID)
 				ret = rte_pdump_enable_by_deviceid(
 						pt->device_id,
 						pt->queue,
 						pt->dir,
-						pt->tx_ring, pt->mp, NULL);
+						pt->tx_ring, pt->mp, NULL, 0);
 			else if (pt->dump_by_type == PORT_ID)
 				ret = rte_pdump_enable(pt->port, pt->queue,
 						pt->dir,
-						pt->tx_ring, pt->mp, NULL);
+						pt->tx_ring, pt->mp, NULL, 0);
 		}
 		if (ret < 0 || ret1 < 0) {
 			cleanup_pdump_resources();
diff --git a/app/test/test_pdump.c b/app/test/test_pdump.c
index af206968b38d..f0187a4cd279 100644
--- a/app/test/test_pdump.c
+++ b/app/test/test_pdump.c
@@ -79,7 +79,7 @@ run_pdump_client_tests(void)
 
 	for (itr = 0; itr < NUM_ITR; itr++) {
 		ret = rte_pdump_enable(portid, QUEUE_ID, flags, ring_client,
-				       mp, NULL);
+				       mp, NULL, 0);
 		if (ret < 0) {
 			printf("rte_pdump_enable failed\n");
 			return -1;
@@ -94,7 +94,7 @@ run_pdump_client_tests(void)
 		printf("pdump_disable success\n");
 
 		ret = rte_pdump_enable_by_deviceid(deviceid, QUEUE_ID, flags,
-						   ring_client, mp, NULL);
+						   ring_client, mp, NULL, 0);
 		if (ret < 0) {
 			printf("rte_pdump_enable_by_deviceid failed\n");
 			return -1;
diff --git a/lib/librte_pdump/Makefile b/lib/librte_pdump/Makefile
index 89593689a7d5..4a631c06a0ec 100644
--- a/lib/librte_pdump/Makefile
+++ b/lib/librte_pdump/Makefile
@@ -15,7 +15,7 @@ EXPORT_MAP := rte_pdump_version.map
 LIBABIVER := 3
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) := rte_pdump.c
+SRCS-$(CONFIG_RTE_LIBRTE_PDUMP) := rte_pdump.c rte_pcap_filter.c
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_PDUMP)-include := rte_pdump.h
diff --git a/lib/librte_pdump/pdump_bpf.h b/lib/librte_pdump/pdump_bpf.h
new file mode 100644
index 000000000000..8f6d00f3cee2
--- /dev/null
+++ b/lib/librte_pdump/pdump_bpf.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This is code is derived from the libpcap bpf_filter which
+ * in turn is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ */
+
+#ifndef _PDUMP_BPF_H_
+#define _PDUMP_BPF_H__
+
+/*
+ * This is based off of libpcap's cut-down version of bpf.h;
+ * it includes only  the stuff needed for the BPF interpreter.
+ *
+ * Note: this is the original classic BPF generated by libpcap
+ *  not the new eBPF used elsewhere.
+ */
+
+typedef	int bpf_int32;
+typedef	unsigned int bpf_u_int32;
+
+/*
+ * Alignment macros.  BPF_WORDALIGN rounds up to the next
+ * even multiple of BPF_ALIGNMENT.
+ */
+#define BPF_ALIGNMENT sizeof(bpf_int32)
+#define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
+
+/*
+ * Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
+ */
+#define BPF_MEMWORDS 16
+
+/*
+ * The instruction encodings.
+ *
+ * Please inform tcpdump-workers@lists.tcpdump.org if you use any
+ * of the reserved values, so that we can note that they're used
+ * (and perhaps implement it in the reference BPF implementation
+ * and encourage its implementation elsewhere).
+ */
+
+/*
+ * The upper 8 bits of the opcode aren't used. BSD/OS used 0x8000.
+ */
+
+/* instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define		BPF_LD		0x00
+#define		BPF_LDX		0x01
+#define		BPF_ST		0x02
+#define		BPF_STX		0x03
+#define		BPF_ALU		0x04
+#define		BPF_JMP		0x05
+#define		BPF_RET		0x06
+#define		BPF_MISC	0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code)	((code) & 0x18)
+#define		BPF_W		0x00
+#define		BPF_H		0x08
+#define		BPF_B		0x10
+/*				0x18	reserved; used by BSD/OS */
+#define BPF_MODE(code)	((code) & 0xe0)
+#define		BPF_IMM		0x00
+#define		BPF_ABS		0x20
+#define		BPF_IND		0x40
+#define		BPF_MEM		0x60
+#define		BPF_LEN		0x80
+#define		BPF_MSH		0xa0
+/*				0xc0	reserved; used by BSD/OS */
+/*				0xe0	reserved; used by BSD/OS */
+
+/* alu/jmp fields */
+#define BPF_OP(code)	((code) & 0xf0)
+#define		BPF_ADD		0x00
+#define		BPF_SUB		0x10
+#define		BPF_MUL		0x20
+#define		BPF_DIV		0x30
+#define		BPF_OR		0x40
+#define		BPF_AND		0x50
+#define		BPF_LSH		0x60
+#define		BPF_RSH		0x70
+#define		BPF_NEG		0x80
+#define		BPF_MOD		0x90
+#define		BPF_XOR		0xa0
+/*				0xb0	reserved */
+/*				0xc0	reserved */
+/*				0xd0	reserved */
+/*				0xe0	reserved */
+/*				0xf0	reserved */
+
+#define		BPF_JA		0x00
+#define		BPF_JEQ		0x10
+#define		BPF_JGT		0x20
+#define		BPF_JGE		0x30
+#define		BPF_JSET	0x40
+/*				0x50	reserved; used on BSD/OS */
+/*				0x60	reserved */
+/*				0x70	reserved */
+/*				0x80	reserved */
+/*				0x90	reserved */
+/*				0xa0	reserved */
+/*				0xb0	reserved */
+/*				0xc0	reserved */
+/*				0xd0	reserved */
+/*				0xe0	reserved */
+/*				0xf0	reserved */
+#define BPF_SRC(code)	((code) & 0x08)
+#define		BPF_K		0x00
+#define		BPF_X		0x08
+
+/* ret - BPF_K and BPF_X also apply */
+#define BPF_RVAL(code)	((code) & 0x18)
+#define		BPF_A		0x10
+/*				0x18	reserved */
+
+/* misc */
+#define BPF_MISCOP(code) ((code) & 0xf8)
+#define		BPF_TAX		0x00
+/*				0x08	reserved */
+/*				0x10	reserved */
+/*				0x18	reserved */
+/* #define	BPF_COP		0x20	NetBSD "coprocessor" extensions */
+/*				0x28	reserved */
+/*				0x30	reserved */
+/*				0x38	reserved */
+/* #define	BPF_COPX	0x40	NetBSD "coprocessor" extensions */
+/*					also used on BSD/OS */
+/*				0x48	reserved */
+/*				0x50	reserved */
+/*				0x58	reserved */
+/*				0x60	reserved */
+/*				0x68	reserved */
+/*				0x70	reserved */
+/*				0x78	reserved */
+#define		BPF_TXA		0x80
+/*				0x88	reserved */
+/*				0x90	reserved */
+/*				0x98	reserved */
+/*				0xa0	reserved */
+/*				0xa8	reserved */
+/*				0xb0	reserved */
+/*				0xb8	reserved */
+/*				0xc0	reserved; used on BSD/OS */
+/*				0xc8	reserved */
+/*				0xd0	reserved */
+/*				0xd8	reserved */
+/*				0xe0	reserved */
+/*				0xe8	reserved */
+/*				0xf0	reserved */
+/*				0xf8	reserved */
+
+/*
+ * The instruction data structure.
+ */
+struct bpf_insn {
+	u_short	code;
+	u_char	jt;
+	u_char	jf;
+	bpf_u_int32 k;
+};
+
+#endif /* _PDUMP_BPF_H_ */
diff --git a/lib/librte_pdump/rte_pcap_filter.c b/lib/librte_pdump/rte_pcap_filter.c
new file mode 100644
index 000000000000..1d8caeee6628
--- /dev/null
+++ b/lib/librte_pdump/rte_pcap_filter.c
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This is code is derived from the libpcap bpf_filter which
+ * in turn is derived from the Stanford/CMU enet packet filter,
+ * (net/enet.c) distributed as part of 4.3BSD, and code contributed
+ * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
+ * Berkeley Laboratory.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_pdump.h>
+
+#include "pdump_bpf.h"
+
+/* These magic values are used to do negative offset to find vlan */
+#define SKF_AD_OFF    (-0x1000)
+#define SKF_AD_VLAN_TAG	44
+#define SKF_AD_VLAN_TAG_PRESENT 48
+
+#define EXTRACT32(p) rte_be_to_cpu_32(*(const unaligned_uint32_t *)(p))
+#define EXTRACT16(p) rte_be_to_cpu_16(*(const unaligned_uint16_t *)(p))
+
+static inline u_short vlan_present(const struct rte_mbuf *m)
+{
+	return	(m->ol_flags & (PKT_TX_VLAN|PKT_RX_VLAN_STRIPPED)) != 0;
+}
+
+/*
+ * Execute the filter program starting at pc on the packet p
+ * wirelen is the length of the original packet
+ * buflen is the amount of data present
+ * aux_data is auxiliary data, currently used only when interpreting
+ * filters intended for the Linux kernel in cases where the kernel
+ * rejects the filter; it contains VLAN tag information
+ * For the kernel, p is assumed to be a pointer to an mbuf if buflen is 0,
+ * in all other cases, p is a pointer to a buffer and buflen is its size.
+ *
+ * Thanks to Ani Sinha <ani@arista.com> for providing initial implementation
+ */
+int
+rte_pcap_filter(const void *filter, const struct rte_mbuf *m)
+{
+	const struct bpf_insn *pc = filter;
+	uint32_t buflen = rte_pktmbuf_data_len(m);
+	uint32_t wirelen = rte_pktmbuf_pkt_len(m);
+	const uint8_t *p = rte_pktmbuf_mtod(m, const uint8_t *);
+	uint32_t A, X;
+	bpf_u_int32 k;
+	uint32_t mem[BPF_MEMWORDS];
+
+	/* No filter means accept all. */
+	if (pc == NULL)
+		return -1;
+
+	A = 0;
+	X = 0;
+	--pc;
+	for (;;) {
+		++pc;
+
+		switch (pc->code) {
+		default:
+			/* this must be caught by validation */
+			rte_panic("invalid BPF opcode\n");
+			return 0;
+
+		case BPF_RET|BPF_K:
+			return pc->k;
+
+		case BPF_RET|BPF_A:
+			return A;
+
+		case BPF_LD|BPF_W|BPF_ABS:
+			k = pc->k;
+			if (k > buflen || sizeof(int32_t) > buflen - k)
+				return 0;
+
+			A = EXTRACT32(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_H|BPF_ABS:
+			k = pc->k;
+			if (k > buflen || sizeof(int16_t) > buflen - k)
+				return 0;
+
+			A = EXTRACT16(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_B|BPF_ABS:
+			switch (pc->k) {
+			case SKF_AD_OFF + SKF_AD_VLAN_TAG:
+				A = m->vlan_tci;
+				break;
+			case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
+				A = vlan_present(m);
+				break;
+			default:
+				k = pc->k;
+				if (k >= buflen)
+					return 0;
+
+				A = p[k];
+				break;
+			}
+			continue;
+
+		case BPF_LD|BPF_W|BPF_LEN:
+			A = wirelen;
+			continue;
+
+		case BPF_LDX|BPF_W|BPF_LEN:
+			X = wirelen;
+			continue;
+
+		case BPF_LD|BPF_W|BPF_IND:
+			k = X + pc->k;
+			if (pc->k > buflen || X > buflen - pc->k ||
+			    sizeof(int32_t) > buflen - k) {
+				return 0;
+			}
+			A = EXTRACT32(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_H|BPF_IND:
+			k = X + pc->k;
+			if (X > buflen ||
+			    pc->k > buflen - X ||
+			    sizeof(int16_t) > buflen - k)
+				return 0;
+
+			A = EXTRACT16(&p[k]);
+			continue;
+
+		case BPF_LD|BPF_B|BPF_IND:
+			k = X + pc->k;
+			if (pc->k >= buflen || X >= buflen - pc->k)
+				return 0;
+
+			A = p[k];
+			continue;
+
+		case BPF_LDX|BPF_MSH|BPF_B:
+			k = pc->k;
+			if (k >= buflen)
+				return 0;
+
+			X = (p[pc->k] & 0xf) << 2;
+			continue;
+
+		case BPF_LD|BPF_IMM:
+			A = pc->k;
+			continue;
+
+		case BPF_LDX|BPF_IMM:
+			X = pc->k;
+			continue;
+
+		case BPF_LD|BPF_MEM:
+			A = mem[pc->k];
+			continue;
+
+		case BPF_LDX|BPF_MEM:
+			X = mem[pc->k];
+			continue;
+
+		case BPF_ST:
+			mem[pc->k] = A;
+			continue;
+
+		case BPF_STX:
+			mem[pc->k] = X;
+			continue;
+
+		case BPF_JMP|BPF_JA:
+			/*
+			 * XXX - we currently implement "ip6 protochain"
+			 * with backward jumps, so sign-extend pc->k.
+			 */
+			pc += (bpf_int32)pc->k;
+			continue;
+
+		case BPF_JMP|BPF_JGT|BPF_K:
+			pc += (pc->k < A) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGE|BPF_K:
+			pc += (pc->k <= A) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JEQ|BPF_K:
+			pc += (pc->k == A) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JSET|BPF_K:
+			pc += (A & pc->k) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGT|BPF_X:
+			pc += (A > X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JGE|BPF_X:
+			pc += (A >= X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JEQ|BPF_X:
+			pc += (A == X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_JMP|BPF_JSET|BPF_X:
+			pc += (A & X) ? pc->jt : pc->jf;
+			continue;
+
+		case BPF_ALU|BPF_ADD|BPF_X:
+			A += X;
+			continue;
+
+		case BPF_ALU|BPF_SUB|BPF_X:
+			A -= X;
+			continue;
+
+		case BPF_ALU|BPF_MUL|BPF_X:
+			A *= X;
+			continue;
+
+		case BPF_ALU|BPF_DIV|BPF_X:
+			if (X == 0)
+				return 0;
+			A /= X;
+			continue;
+
+		case BPF_ALU|BPF_MOD|BPF_X:
+			if (X == 0)
+				return 0;
+			A %= X;
+			continue;
+
+		case BPF_ALU|BPF_AND|BPF_X:
+			A &= X;
+			continue;
+
+		case BPF_ALU|BPF_OR|BPF_X:
+			A |= X;
+			continue;
+
+		case BPF_ALU|BPF_XOR|BPF_X:
+			A ^= X;
+			continue;
+
+		case BPF_ALU|BPF_LSH|BPF_X:
+			if (X < 32)
+				A <<= X;
+			else
+				A = 0;
+			continue;
+
+		case BPF_ALU|BPF_RSH|BPF_X:
+			if (X < 32)
+				A >>= X;
+			else
+				A = 0;
+			continue;
+
+		case BPF_ALU|BPF_ADD|BPF_K:
+			A += pc->k;
+			continue;
+
+		case BPF_ALU|BPF_SUB|BPF_K:
+			A -= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_MUL|BPF_K:
+			A *= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_DIV|BPF_K:
+			A /= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_MOD|BPF_K:
+			A %= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_AND|BPF_K:
+			A &= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_OR|BPF_K:
+			A |= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_XOR|BPF_K:
+			A ^= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_LSH|BPF_K:
+			A <<= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_RSH|BPF_K:
+			A >>= pc->k;
+			continue;
+
+		case BPF_ALU|BPF_NEG:
+			/*
+			 * Most BPF arithmetic is unsigned, but negation
+			 * can't be unsigned; respecify it as subtracting
+			 * the accumulator from 0U, so that 1) we don't
+			 * get compiler warnings about negating an unsigned
+			 * value and 2) don't get UBSan warnings about
+			 * the result of negating 0x80000000 being undefined.
+			 */
+			A = (0U - A);
+			continue;
+
+		case BPF_MISC|BPF_TAX:
+			X = A;
+			continue;
+
+		case BPF_MISC|BPF_TXA:
+			A = X;
+			continue;
+		}
+	}
+}
+
+/*
+ * Return true if the 'fcode' is a valid filter program.
+ * The constraints are that each jump be forward and to a valid
+ * code, that memory accesses are within valid ranges (to the
+ * extent that this can be checked statically; loads of packet
+ * data have to be, and are, also checked at run time), and that
+ * the code terminates with either an accept or reject.
+ */
+int
+rte_pcap_validate_filter(const void *filter, uint32_t len)
+{
+	const struct bpf_insn *f = filter;
+	unsigned int i, from;
+
+	if (len < 1)
+		return 0;
+
+	for (i = 0; i < len; ++i) {
+		const struct bpf_insn *p = &f[i];
+
+		switch (BPF_CLASS(p->code)) {
+		/*
+		 * Check that memory operations use valid addresses.
+		 */
+		case BPF_LD:
+		case BPF_LDX:
+			switch (BPF_MODE(p->code)) {
+			case BPF_IMM:
+				break;
+			case BPF_ABS:
+			case BPF_IND:
+			case BPF_MSH:
+				/*
+				 * There's no maximum packet data size
+				 * in userland.  The runtime packet length
+				 * check suffices.
+				 */
+				break;
+			case BPF_MEM:
+				if (p->k >= BPF_MEMWORDS)
+					return 0;
+				break;
+			case BPF_LEN:
+				break;
+			default:
+				return 0;
+			}
+			break;
+		case BPF_ST:
+		case BPF_STX:
+			if (p->k >= BPF_MEMWORDS)
+				return 0;
+			break;
+		case BPF_ALU:
+			switch (BPF_OP(p->code)) {
+			case BPF_ADD:
+			case BPF_SUB:
+			case BPF_MUL:
+			case BPF_OR:
+			case BPF_AND:
+			case BPF_XOR:
+			case BPF_LSH:
+			case BPF_RSH:
+			case BPF_NEG:
+				break;
+			case BPF_DIV:
+			case BPF_MOD:
+				/*
+				 * Check for constant division or modulus
+				 * by 0.
+				 */
+				if (BPF_SRC(p->code) == BPF_K && p->k == 0)
+					return 0;
+				break;
+			default:
+				return 0;
+			}
+			break;
+		case BPF_JMP:
+			/*
+			 * Check that jumps are within the code block,
+			 * and that unconditional branches don't go
+			 * backwards as a result of an overflow.
+			 * Unconditional branches have a 32-bit offset,
+			 * so they could overflow; we check to make
+			 * sure they don't.  Conditional branches have
+			 * an 8-bit offset, and the from address is <=
+			 * BPF_MAXINSNS, and we assume that BPF_MAXINSNS
+			 * is sufficiently small that adding 255 to it
+			 * won't overflow.
+			 *
+			 * We know that len is <= BPF_MAXINSNS, and we
+			 * assume that BPF_MAXINSNS is < the maximum size
+			 * of a unsigned int, so that i + 1 doesn't overflow.
+			 *
+			 * For userland, we don't know that the from
+			 * or len are <= BPF_MAXINSNS, but we know that
+			 * from <= len, and, except on a 64-bit system,
+			 * it's unlikely that len, if it truly reflects
+			 * the size of the program we've been handed,
+			 * will be anywhere near the maximum size of
+			 * a unsigned int.  We also don't check for backward
+			 * branches, as we currently support them in
+			 * userland for the protochain operation.
+			 */
+			from = i + 1;
+			switch (BPF_OP(p->code)) {
+			case BPF_JA:
+				if (from + p->k >= (unsigned int)len)
+					return 0;
+				break;
+			case BPF_JEQ:
+			case BPF_JGT:
+			case BPF_JGE:
+			case BPF_JSET:
+				if (from + p->jt >= (unsigned int)len ||
+				    from + p->jf >= (unsigned int)len)
+					return 0;
+				break;
+			default:
+				return 0;
+			}
+			break;
+		case BPF_RET:
+			break;
+		case BPF_MISC:
+			break;
+		default:
+			return 0;
+		}
+	}
+
+	return BPF_CLASS(f[len - 1].code) == BPF_RET;
+}
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index 41f2ec17a26b..1206671c6f60 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -8,11 +8,13 @@
 #include <rte_lcore.h>
 #include <rte_log.h>
 #include <rte_errno.h>
+#include <rte_malloc.h>
 #include <rte_string_fns.h>
 
 #include "rte_pdump.h"
 
 #define DEVICE_ID_SIZE 64
+#define BPF_INS_SIZE sizeof(uint64_t)
 
 /* Macro for printing using RTE_LOG */
 static int pdump_logtype;
@@ -32,6 +34,8 @@ enum pdump_version {
 	V1 = 1
 };
 
+#define PDUMP_FILTER_V1	0x7064756d7066696c
+
 struct pdump_request {
 	uint16_t ver;
 	uint16_t op;
@@ -42,14 +46,14 @@ struct pdump_request {
 			uint16_t queue;
 			struct rte_ring *ring;
 			struct rte_mempool *mp;
-			void *filter;
+			const void *filter;
 		} en_v1;
 		struct disable_v1 {
 			char device[DEVICE_ID_SIZE];
 			uint16_t queue;
 			struct rte_ring *ring;
 			struct rte_mempool *mp;
-			void *filter;
+			const void *filter;
 		} dis_v1;
 	} data;
 };
@@ -64,7 +68,7 @@ static struct pdump_rxtx_cbs {
 	struct rte_ring *ring;
 	struct rte_mempool *mp;
 	const struct rte_eth_rxtx_callback *cb;
-	void *filter;
+	const void *filter;
 } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT],
 tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
 
@@ -86,6 +90,9 @@ pdump_copy(uint16_t port, struct rte_mbuf **pkts,
 	ring = cbs->ring;
 	mp = cbs->mp;
 	for (i = 0; i < nb_pkts; i++) {
+		if (rte_pcap_filter(cbs->filter, pkts[i]) == 0)
+			continue;
+
 		p = rte_pktmbuf_copy(pkts[i], mp, 0, UINT32_MAX);
 		if (p) {
 			p->port = port;
@@ -124,8 +131,8 @@ pdump_tx(uint16_t port, uint16_t qidx __rte_unused,
 
 static int
 pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
-				struct rte_ring *ring, struct rte_mempool *mp,
-				uint16_t operation)
+			    struct rte_ring *ring, struct rte_mempool *mp,
+			    uint16_t operation, const void *filter)
 {
 	uint16_t qid;
 	struct pdump_rxtx_cbs *cbs = NULL;
@@ -143,6 +150,7 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
 			}
 			cbs->ring = ring;
 			cbs->mp = mp;
+			cbs->filter = filter;
 			cbs->cb = rte_eth_add_first_rx_callback(port, qid,
 								pdump_rx, cbs);
 			if (cbs->cb == NULL) {
@@ -178,8 +186,8 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
 
 static int
 pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
-				struct rte_ring *ring, struct rte_mempool *mp,
-				uint16_t operation)
+			    struct rte_ring *ring, struct rte_mempool *mp,
+			    uint16_t operation, const void *filter)
 {
 
 	uint16_t qid;
@@ -198,6 +206,7 @@ pdump_register_tx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
 			}
 			cbs->ring = ring;
 			cbs->mp = mp;
+			cbs->filter = filter;
 			cbs->cb = rte_eth_add_tx_callback(port, qid, pdump_tx,
 								cbs);
 			if (cbs->cb == NULL) {
@@ -241,6 +250,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
 	uint16_t operation;
 	struct rte_ring *ring;
 	struct rte_mempool *mp;
+	const void *filter = NULL;
 
 	flags = p->flags;
 	operation = p->op;
@@ -256,6 +266,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
 		queue = p->data.en_v1.queue;
 		ring = p->data.en_v1.ring;
 		mp = p->data.en_v1.mp;
+		filter = p->data.en_v1.filter;
 	} else {
 		ret = rte_eth_dev_get_port_by_name(p->data.dis_v1.device,
 				&port);
@@ -299,7 +310,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
 	if (flags & RTE_PDUMP_FLAG_RX) {
 		end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_rx_q : queue + 1;
 		ret = pdump_register_rx_callbacks(end_q, port, queue, ring, mp,
-							operation);
+						  operation, filter);
 		if (ret < 0)
 			return ret;
 	}
@@ -308,7 +319,7 @@ set_pdump_rxtx_cbs(const struct pdump_request *p)
 	if (flags & RTE_PDUMP_FLAG_TX) {
 		end_q = (queue == RTE_PDUMP_ALL_QUEUES) ? nb_tx_q : queue + 1;
 		ret = pdump_register_tx_callbacks(end_q, port, queue, ring, mp,
-							operation);
+						  operation, filter);
 		if (ret < 0)
 			return ret;
 	}
@@ -424,12 +435,41 @@ pdump_validate_port(uint16_t port, char *name)
 }
 
 static int
-pdump_prepare_client_request(char *device, uint16_t queue,
-				uint32_t flags,
-				uint16_t operation,
-				struct rte_ring *ring,
-				struct rte_mempool *mp,
-				void *filter)
+pdump_validate_filter(const void *filter, unsigned int len)
+{
+	size_t alloc_len;
+
+	if (filter == NULL)
+		return 0;
+
+	/* must be in malloc memory to be accesible in primary */
+	if (rte_malloc_validate(filter, &alloc_len) != 0) {
+		PDUMP_LOG(ERR, "filter is not in rte_malloc memory\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (len * BPF_INS_SIZE > alloc_len) {
+		PDUMP_LOG(ERR, "filter length error\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (!rte_pcap_validate_filter(filter, len)) {
+		PDUMP_LOG(ERR, "filter is not valid BPF code\n");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	return 0;
+}
+
+static int
+pdump_prepare_client_request(const char *device, uint16_t queue,
+			     uint32_t flags,
+			     uint16_t operation,
+			     struct rte_ring *ring,
+			     struct rte_mempool *mp,
+			     const void *filter)
 {
 	int ret = -1;
 	struct rte_mp_msg mp_req, *mp_rep;
@@ -476,14 +516,13 @@ pdump_prepare_client_request(char *device, uint16_t queue,
 }
 
 int
-rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags,
-			struct rte_ring *ring,
-			struct rte_mempool *mp,
-			void *filter)
+rte_pdump_enable_v1911(uint16_t port, uint16_t queue, uint32_t flags,
+		       struct rte_ring *ring, struct rte_mempool *mp,
+		       const void *filter, uint32_t filter_len)
 {
 
-	int ret = 0;
 	char name[DEVICE_ID_SIZE];
+	int ret;
 
 	ret = pdump_validate_port(port, name);
 	if (ret < 0)
@@ -492,36 +531,86 @@ rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags,
 	if (ret < 0)
 		return ret;
 	ret = pdump_validate_flags(flags);
+	if (ret < 0)
+		return ret;
+	ret = pdump_validate_filter(filter, filter_len);
 	if (ret < 0)
 		return ret;
 
 	ret = pdump_prepare_client_request(name, queue, flags,
-						ENABLE, ring, mp, filter);
+					   ENABLE, ring, mp, filter);
 
 	return ret;
 }
+BIND_DEFAULT_SYMBOL(rte_pdump_enable, _v1911, 19.11);
+MAP_STATIC_SYMBOL(int rte_pdump_enable(uint16_t port, uint16_t queue,
+				       uint32_t flags, struct rte_ring *ring,
+				       struct rte_mempool *mp,
+				       const void *filter, uint32_t len),
+		  rte_pdump_enable_v1911);
 
 int
-rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
-				uint32_t flags,
-				struct rte_ring *ring,
-				struct rte_mempool *mp,
-				void *filter)
+rte_pdump_enable_v1607(uint16_t port, uint16_t queue, uint32_t flags,
+		       struct rte_ring *ring,
+		       struct rte_mempool *mp,
+		       void *filter)
 {
-	int ret = 0;
+	if (filter != NULL)
+		PDUMP_LOG(WARNING, "filter not supported in this version\n");
+
+	return rte_pdump_enable_v1911(port, queue, flags, ring, mp,
+				      NULL, 0);
+}
+VERSION_SYMBOL(rte_pdump_enable, _v1607, 16.07);
+
+int
+rte_pdump_enable_by_deviceid_v1911(const char *device_id, uint16_t queue,
+				   uint32_t flags,
+				   struct rte_ring *ring,
+				   struct rte_mempool *mp,
+				   const void *filter, uint32_t filter_len)
+{
+	int ret;
 
 	ret = pdump_validate_ring_mp(ring, mp);
 	if (ret < 0)
 		return ret;
 	ret = pdump_validate_flags(flags);
+	if (ret < 0)
+		return ret;
+	ret = pdump_validate_filter(filter, filter_len);
 	if (ret < 0)
 		return ret;
 
 	ret = pdump_prepare_client_request(device_id, queue, flags,
-						ENABLE, ring, mp, filter);
+					   ENABLE, ring, mp, filter);
 
 	return ret;
 }
+BIND_DEFAULT_SYMBOL(rte_pdump_enable_by_deviceid, _v1911, 19.11);
+MAP_STATIC_SYMBOL(int rte_pdump_enable_by_deviceid(const char *device_id,
+						   uint16_t queue,
+						   uint32_t flags,
+						   struct rte_ring *ring,
+						   struct rte_mempool *mp,
+						   const void *filter,
+						   uint32_t len),
+		  rte_pdump_enable_by_deviceid_v1911);
+
+int
+rte_pdump_enable_by_deviceid_v1607(char *device_id, uint16_t queue,
+				   uint32_t flags,
+				   struct rte_ring *ring,
+				   struct rte_mempool *mp,
+				   void *filter)
+{
+	if (filter != NULL)
+		PDUMP_LOG(WARNING, "filter not supported in this version\n");
+
+	return rte_pdump_enable_by_deviceid_v1911(device_id, queue, flags,
+						  ring, mp, NULL, 0);
+}
+VERSION_SYMBOL(rte_pdump_enable_by_deviceid, _v1607, 16.07);
 
 int
 rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags)
diff --git a/lib/librte_pdump/rte_pdump.h b/lib/librte_pdump/rte_pdump.h
index 6b00fc17aeb2..12cb46f8b0e9 100644
--- a/lib/librte_pdump/rte_pdump.h
+++ b/lib/librte_pdump/rte_pdump.h
@@ -68,17 +68,25 @@ rte_pdump_uninit(void);
  * @param mp
  *  mempool on to which original packets will be mirrored or duplicated.
  * @param filter
- *  place holder for packet filtering.
+ *  filter to apply to incoming packet (classic BPF)
+ * @param len
+ *  length of filter (in BPF instructions)
  *
  * @return
  *    0 on success, -1 on error, rte_errno is set accordingly.
  */
-
 int
 rte_pdump_enable(uint16_t port, uint16_t queue, uint32_t flags,
-		struct rte_ring *ring,
-		struct rte_mempool *mp,
-		void *filter);
+		 struct rte_ring *ring, struct rte_mempool *mp,
+		 const void *filter, uint32_t len);
+int
+rte_pdump_enable_v1607(uint16_t port, uint16_t queue, uint32_t flags,
+		       struct rte_ring *ring, struct rte_mempool *mp,
+		       void *filter);
+int
+rte_pdump_enable_v1911(uint16_t port, uint16_t queue, uint32_t flags,
+		       struct rte_ring *ring, struct rte_mempool *mp,
+		       const void *filter, uint32_t len);
 
 /**
  * Disables packet capturing on given port and queue.
@@ -118,18 +126,29 @@ rte_pdump_disable(uint16_t port, uint16_t queue, uint32_t flags);
  * @param mp
  *  mempool on to which original packets will be mirrored or duplicated.
  * @param filter
- *  place holder for packet filtering.
+ *  filter to apply to incoming packet (classic BPF)
+ * @param len
+ *  length of filter (in BPF instructions)
  *
  * @return
  *    0 on success, -1 on error, rte_errno is set accordingly.
  */
-
 int
-rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
-				uint32_t flags,
-				struct rte_ring *ring,
-				struct rte_mempool *mp,
-				void *filter);
+rte_pdump_enable_by_deviceid(const char *device_id, uint16_t queue,
+			     uint32_t flags,
+			     struct rte_ring *ring,
+			     struct rte_mempool *mp,
+			     const void *filter, uint32_t len);
+int
+rte_pdump_enable_by_deviceid_v1607(char *device_id, uint16_t queue,
+				   uint32_t flags, struct rte_ring *ring,
+				   struct rte_mempool *mp,
+				   void *filter);
+int
+rte_pdump_enable_by_deviceid_v1911(const char *device_id, uint16_t queue,
+				   uint32_t flags, struct rte_ring *ring,
+				   struct rte_mempool *mp,
+				   const void *filter, uint32_t len);
 
 /**
  * Disables packet capturing on given device_id and queue.
@@ -151,7 +170,16 @@ rte_pdump_enable_by_deviceid(char *device_id, uint16_t queue,
  */
 int
 rte_pdump_disable_by_deviceid(char *device_id, uint16_t queue,
-				uint32_t flags);
+			      uint32_t flags);
+
+
+/* internal */
+int
+rte_pcap_filter(const void *filter, const struct rte_mbuf *m);
+
+/* internal */
+int
+rte_pcap_validate_filter(const void *filter, uint32_t len);
 
 #ifdef __cplusplus
 }
diff --git a/lib/librte_pdump/rte_pdump_version.map b/lib/librte_pdump/rte_pdump_version.map
index 3e744f30123c..e78ba5a8350a 100644
--- a/lib/librte_pdump/rte_pdump_version.map
+++ b/lib/librte_pdump/rte_pdump_version.map
@@ -10,3 +10,10 @@ DPDK_16.07 {
 
 	local: *;
 };
+
+DPDK_19.11 {
+	global:
+
+	rte_pdump_enable;
+	rte_pdump_enable_by_deviceid;
+} DPDK_16.07;
-- 
2.20.1


  parent reply	other threads:[~2019-10-07 16:53 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-07 16:52 [dpdk-dev] [RFC 0/8] Packet Capture enhancements Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 1/8] pdump: use new pktmbuf copy function Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 2/8] pdump: use dynamic logtype Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 3/8] pdump: tag copied mbuf with port Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 4/8] pdump: stamp packets with current timestamp Stephen Hemminger
2019-10-07 16:52 ` Stephen Hemminger [this message]
2019-10-07 17:07   ` [dpdk-dev] [RFC 5/8] pdump: add classic BPF filtering Jerin Jacob
2019-10-07 17:33     ` Stephen Hemminger
2019-10-07 19:33       ` Jerin Jacob
2019-10-07 21:45         ` Stephen Hemminger
2019-10-08  3:47           ` Jerin Jacob
2019-10-08  4:01             ` Stephen Hemminger
2019-10-08  4:15               ` Jerin Jacob
2019-10-08  4:22                 ` Stephen Hemminger
2019-10-08 21:08                   ` Morten Brørup
2019-10-09  8:21                     ` Ananyev, Konstantin
2019-10-09 14:59                       ` Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 6/8] pdump: add packet header truncation Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 7/8] pcapng: add new library for writing pcapng files Stephen Hemminger
2019-10-07 16:52 ` [dpdk-dev] [RFC 8/8] app/capture: add packet capture using pcapng Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191007165232.14535-6-stephen@networkplumber.org \
    --to=stephen@networkplumber.org \
    --cc=dev@dpdk.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.