Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH next v3 2/2] sctp: make use of SCTP_TRUNC4 macro
From: Marcelo Ricardo Leitner @ 2016-09-21 11:45 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Neil Horman, Vlad Yasevich, David Miller,
	David Laight
In-Reply-To: <cover.1474457954.git.marcelo.leitner@gmail.com>

And avoid the usage of '&~3'. This is the last place still not using
the macro.
Also break the line to make it easier to read.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
When I checked it the other day I thought I had this patch applied by
the moment but I hadn't.

v2: updated patch summary

 net/sctp/chunk.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 76eae828ec891076bc2360bf0ee89c3b650ef986..8afe2e90d003b9e1bbf233fd4e2fde0538ac65f6 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -195,9 +195,10 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	/* This is the biggest possible DATA chunk that can fit into
 	 * the packet
 	 */
-	max_data = (asoc->pathmtu -
-		sctp_sk(asoc->base.sk)->pf->af->net_header_len -
-		sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3;
+	max_data = asoc->pathmtu -
+		   sctp_sk(asoc->base.sk)->pf->af->net_header_len -
+		   sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk);
+	max_data = SCTP_TRUNC4(max_data);
 
 	max = asoc->frag_point;
 	/* If the the peer requested that we authenticate DATA chunks
-- 
2.7.4

^ permalink raw reply related

* [PATCH next v3 1/2] sctp: rename WORD_TRUNC/ROUND macros
From: Marcelo Ricardo Leitner @ 2016-09-21 11:45 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Neil Horman, Vlad Yasevich, David Miller,
	David Laight
In-Reply-To: <cover.1474457954.git.marcelo.leitner@gmail.com>

To something more meaningful these days, specially because this is
working on packet headers or lengths and which are not tied to any CPU
arch but to the protocol itself.

So, WORD_TRUNC becomes SCTP_TRUNC4 and WORD_ROUND becomes SCTP_PAD4.

Reported-by: David Laight <David.Laight@ACULAB.COM>
Reported-by: David Miller <davem@davemloft.net>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
v3:
- Name it SCTP_PAD4 instead of SCTP_ALIGN4, as suggested by David Laight

 include/net/sctp/sctp.h  | 10 +++++-----
 net/netfilter/xt_sctp.c  |  2 +-
 net/sctp/associola.c     |  2 +-
 net/sctp/chunk.c         |  6 +++---
 net/sctp/input.c         |  8 ++++----
 net/sctp/inqueue.c       |  2 +-
 net/sctp/output.c        | 12 ++++++------
 net/sctp/sm_make_chunk.c | 28 ++++++++++++++--------------
 net/sctp/sm_statefuns.c  |  6 +++---
 net/sctp/transport.c     |  4 ++--
 net/sctp/ulpevent.c      |  4 ++--
 11 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 632e205ca54bfe85124753e09445251056e19aa7..87a7f42e763963255afc32deaa774570bf03551a 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -83,9 +83,9 @@
 #endif
 
 /* Round an int up to the next multiple of 4.  */
-#define WORD_ROUND(s) (((s)+3)&~3)
+#define SCTP_PAD4(s) (((s)+3)&~3)
 /* Truncate to the previous multiple of 4.  */
-#define WORD_TRUNC(s) ((s)&~3)
+#define SCTP_TRUNC4(s) ((s)&~3)
 
 /*
  * Function declarations.
@@ -433,7 +433,7 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
 	if (asoc->user_frag)
 		frag = min_t(int, frag, asoc->user_frag);
 
-	frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN));
+	frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN));
 
 	return frag;
 }
@@ -462,7 +462,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
 for (pos.v = chunk->member;\
      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
      ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
-     pos.v += WORD_ROUND(ntohs(pos.p->length)))
+     pos.v += SCTP_PAD4(ntohs(pos.p->length)))
 
 #define sctp_walk_errors(err, chunk_hdr)\
 _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
@@ -472,7 +472,7 @@ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
 	    sizeof(sctp_chunkhdr_t));\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
-     err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length))))
+     err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
 
 #define sctp_walk_fwdtsn(pos, chunk)\
 _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk))
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index ef36a56a02c6881c58296b2bf45c4b99d3836456..4dedb96d1a067de6c9766017642e16f9b148d616 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -68,7 +68,7 @@ match_packet(const struct sk_buff *skb,
 			 ++i, offset, sch->type, htons(sch->length),
 			 sch->flags);
 #endif
-		offset += WORD_ROUND(ntohs(sch->length));
+		offset += SCTP_PAD4(ntohs(sch->length));
 
 		pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 1c23060c41a669f38f4e2d244cfb61c85a522be6..f10d3397f917986d25240fc42f6a33ae8049e7b7 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1408,7 +1408,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc)
 				transports) {
 		if (t->pmtu_pending && t->dst) {
 			sctp_transport_update_pmtu(sk, t,
-						   WORD_TRUNC(dst_mtu(t->dst)));
+						   SCTP_TRUNC4(dst_mtu(t->dst)));
 			t->pmtu_pending = 0;
 		}
 		if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index af9cc8055465b18e9754a5542fc7bd43f9dad240..76eae828ec891076bc2360bf0ee89c3b650ef986 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -208,8 +208,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 		struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc);
 
 		if (hmac_desc)
-			max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) +
-					    hmac_desc->hmac_len);
+			max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) +
+					      hmac_desc->hmac_len);
 	}
 
 	/* Now, check if we need to reduce our max */
@@ -229,7 +229,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	    asoc->outqueue.out_qlen == 0 &&
 	    list_empty(&asoc->outqueue.retransmit) &&
 	    msg_len > max)
-		max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t));
+		max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
 
 	/* Encourage Cookie-ECHO bundling. */
 	if (asoc->state < SCTP_STATE_COOKIE_ECHOED)
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 69444d32ecda6cd1a4924911172feba89c5ae976..a1d85065bfc0bb38cab03c925fe230103c13d593 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -605,7 +605,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
 		/* PMTU discovery (RFC1191) */
 		if (ICMP_FRAG_NEEDED == code) {
 			sctp_icmp_frag_needed(sk, asoc, transport,
-					      WORD_TRUNC(info));
+					      SCTP_TRUNC4(info));
 			goto out_unlock;
 		} else {
 			if (ICMP_PROT_UNREACH == code) {
@@ -673,7 +673,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
 		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
 			break;
 
-		ch_end = offset + WORD_ROUND(ntohs(ch->length));
+		ch_end = offset + SCTP_PAD4(ntohs(ch->length));
 		if (ch_end > skb->len)
 			break;
 
@@ -1121,7 +1121,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
 		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
 			break;
 
-		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+		ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
 		if (ch_end > skb_tail_pointer(skb))
 			break;
 
@@ -1190,7 +1190,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net,
 	 * that the chunk length doesn't cause overflow.  Otherwise, we'll
 	 * walk off the end.
 	 */
-	if (WORD_ROUND(ntohs(ch->length)) > skb->len)
+	if (SCTP_PAD4(ntohs(ch->length)) > skb->len)
 		return NULL;
 
 	/* If this is INIT/INIT-ACK look inside the chunk too. */
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 6437aa97cfd79f14c633499c2b131389204c435b..f731de3e8428c951583c3217586da0609bb25d3a 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -213,7 +213,7 @@ new_skb:
 	}
 
 	chunk->chunk_hdr = ch;
-	chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+	chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
 	skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
 	chunk->subh.v = NULL; /* Subheader is no longer valid.  */
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0c605ec74dc4ac2b300d7e52107597bb1be5fefd..2a5c1896d18fa674f0e0e84ff6787d04914b2b73 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -297,7 +297,7 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
 					      struct sctp_chunk *chunk)
 {
 	sctp_xmit_t retval = SCTP_XMIT_OK;
-	__u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length));
+	__u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length));
 
 	/* Check to see if this chunk will fit into the packet */
 	retval = sctp_packet_will_fit(packet, chunk, chunk_len);
@@ -508,7 +508,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 		if (gso) {
 			pkt_size = packet->overhead;
 			list_for_each_entry(chunk, &packet->chunk_list, list) {
-				int padded = WORD_ROUND(chunk->skb->len);
+				int padded = SCTP_PAD4(chunk->skb->len);
 
 				if (pkt_size + padded > tp->pathmtu)
 					break;
@@ -538,7 +538,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 		 * included in the chunk length field.  The sender should
 		 * never pad with more than 3 bytes.
 		 *
-		 * [This whole comment explains WORD_ROUND() below.]
+		 * [This whole comment explains SCTP_PAD4() below.]
 		 */
 
 		pkt_size -= packet->overhead;
@@ -560,7 +560,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 				has_data = 1;
 			}
 
-			padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
+			padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len;
 			if (padding)
 				memset(skb_put(chunk->skb, padding), 0, padding);
 
@@ -587,7 +587,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 			 * acknowledged or have failed.
 			 * Re-queue auth chunks if needed.
 			 */
-			pkt_size -= WORD_ROUND(chunk->skb->len);
+			pkt_size -= SCTP_PAD4(chunk->skb->len);
 
 			if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
 				sctp_chunk_free(chunk);
@@ -911,7 +911,7 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
 		 */
 		maxsize = pmtu - packet->overhead;
 		if (packet->auth)
-			maxsize -= WORD_ROUND(packet->auth->skb->len);
+			maxsize -= SCTP_PAD4(packet->auth->skb->len);
 		if (chunk_len > maxsize)
 			retval = SCTP_XMIT_PMTU_FULL;
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8c77b87a8565cb4f82c09cea65557dc9c8d1138f..79dd66079dd7f0f503c2493d66885b9939589ee4 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -253,7 +253,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	num_types = sp->pf->supported_addrs(sp, types);
 
 	chunksize = sizeof(init) + addrs_len;
-	chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
+	chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types));
 	chunksize += sizeof(ecap_param);
 
 	if (asoc->prsctp_enable)
@@ -283,14 +283,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 		/* Add HMACS parameter length if any were defined */
 		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
-			chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+			chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
 		/* Add CHUNKS parameter length */
 		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
-			chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+			chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
 		else
 			auth_chunks = NULL;
 
@@ -300,8 +300,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 
 	/* If we have any extensions to report, account for that */
 	if (num_ext)
-		chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
-					num_ext);
+		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+				       num_ext);
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
 	 *
@@ -443,13 +443,13 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 
 		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
-			chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+			chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
 		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
-			chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+			chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
 		else
 			auth_chunks = NULL;
 
@@ -458,8 +458,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	}
 
 	if (num_ext)
-		chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
-					num_ext);
+		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+				       num_ext);
 
 	/* Now allocate and fill out the chunk.  */
 	retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
@@ -1390,7 +1390,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc,
 	struct sock *sk;
 
 	/* No need to allocate LL here, as this is only a chunk. */
-	skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp);
+	skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp);
 	if (!skb)
 		goto nodata;
 
@@ -1482,7 +1482,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
 	void *target;
 	void *padding;
 	int chunklen = ntohs(chunk->chunk_hdr->length);
-	int padlen = WORD_ROUND(chunklen) - chunklen;
+	int padlen = SCTP_PAD4(chunklen) - chunklen;
 
 	padding = skb_put(chunk->skb, padlen);
 	target = skb_put(chunk->skb, len);
@@ -1900,7 +1900,7 @@ static int sctp_process_missing_param(const struct sctp_association *asoc,
 	struct __sctp_missing report;
 	__u16 len;
 
-	len = WORD_ROUND(sizeof(report));
+	len = SCTP_PAD4(sizeof(report));
 
 	/* Make an ERROR chunk, preparing enough room for
 	 * returning multiple unknown parameters.
@@ -2098,9 +2098,9 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc,
 
 		if (*errp) {
 			if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
-					WORD_ROUND(ntohs(param.p->length))))
+					SCTP_PAD4(ntohs(param.p->length))))
 				sctp_addto_chunk_fixed(*errp,
-						WORD_ROUND(ntohs(param.p->length)),
+						SCTP_PAD4(ntohs(param.p->length)),
 						param.v);
 		} else {
 			/* If there is no memory for generating the ERROR
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d88bb2b0b69913ad5962f9a5655d413f2c210ed0..026e3bca4a94bd34b418d5e6947f7182c1512358 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3454,7 +3454,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
 		}
 
 		/* Report violation if chunk len overflows */
-		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+		ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
 		if (ch_end > skb_tail_pointer(skb))
 			return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
@@ -4185,7 +4185,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
 		hdr = unk_chunk->chunk_hdr;
 		err_chunk = sctp_make_op_error(asoc, unk_chunk,
 					       SCTP_ERROR_UNKNOWN_CHUNK, hdr,
-					       WORD_ROUND(ntohs(hdr->length)),
+					       SCTP_PAD4(ntohs(hdr->length)),
 					       0);
 		if (err_chunk) {
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -4203,7 +4203,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net,
 		hdr = unk_chunk->chunk_hdr;
 		err_chunk = sctp_make_op_error(asoc, unk_chunk,
 					       SCTP_ERROR_UNKNOWN_CHUNK, hdr,
-					       WORD_ROUND(ntohs(hdr->length)),
+					       SCTP_PAD4(ntohs(hdr->length)),
 					       0);
 		if (err_chunk) {
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 81b86678be4d6fccc527d3c3e509c12576b2194c..ce54dce13ddb2cba959bca080b06f9285f2fe16d 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -233,7 +233,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 	}
 
 	if (transport->dst) {
-		transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
+		transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
 	} else
 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
@@ -287,7 +287,7 @@ void sctp_transport_route(struct sctp_transport *transport,
 		return;
 	}
 	if (transport->dst) {
-		transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
+		transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
 
 		/* Initialize sk->sk_rcv_saddr, if the transport is the
 		 * association's active path for getsockname().
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index d85b803da11d21202d876c95811bcab4e1fb507e..bea00058ce354b7c910cab96d3f2ec4747763a10 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -383,7 +383,7 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc,
 
 	ch = (sctp_errhdr_t *)(chunk->skb->data);
 	cause = ch->cause;
-	elen = WORD_ROUND(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
+	elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
 
 	/* Pull off the ERROR header.  */
 	skb_pull(chunk->skb, sizeof(sctp_errhdr_t));
@@ -688,7 +688,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 	 * MUST ignore the padding bytes.
 	 */
 	len = ntohs(chunk->chunk_hdr->length);
-	padding = WORD_ROUND(len) - len;
+	padding = SCTP_PAD4(len) - len;
 
 	/* Fixup cloned skb with just this chunks data.  */
 	skb_trim(skb, chunk->chunk_end - padding - skb->data);
-- 
2.7.4

^ permalink raw reply related

* [PATCH next v3 0/2] Rename WORD_TRUNC/ROUND macros and use them
From: Marcelo Ricardo Leitner @ 2016-09-21 11:45 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Neil Horman, Vlad Yasevich, David Miller,
	David Laight

This patchset aims to rename these macros to a non-confusing name, as
reported by David Laight and David Miller, and to update all remaining
places to make use of it, which was 1 last remaining spot.

v3:
- Name it SCTP_PAD4 instead of SCTP_ALIGN4, as suggested by David Laight
v2:
- fixed 2nd patch summary

Details on the specific changelogs.

Thanks!

Marcelo Ricardo Leitner (2):
  sctp: rename WORD_TRUNC/ROUND macros
  sctp: make use of WORD_TRUNC macro

 include/net/sctp/sctp.h  | 10 +++++-----
 net/netfilter/xt_sctp.c  |  2 +-
 net/sctp/associola.c     |  2 +-
 net/sctp/chunk.c         | 13 +++++++------
 net/sctp/input.c         |  8 ++++----
 net/sctp/inqueue.c       |  2 +-
 net/sctp/output.c        | 12 ++++++------
 net/sctp/sm_make_chunk.c | 28 ++++++++++++++--------------
 net/sctp/sm_statefuns.c  |  6 +++---
 net/sctp/transport.c     |  4 ++--
 net/sctp/ulpevent.c      |  4 ++--
 11 files changed, 46 insertions(+), 45 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [ANNOUNCE] ndiv: line-rate network traffic processing
From: Willy Tarreau @ 2016-09-21 11:28 UTC (permalink / raw)
  To: netdev

Hi,

Over the last 3 years I've been working a bit on high traffic processing
for various reasons. It started with the wish to capture line-rate GigE
traffic on very small fanless ARM machines and the framework has evolved
to be used at my company as a basis for our anti-DDoS engine capable of
dealing with multiple 10G links saturated with floods.

I know it comes a bit late now that there is XDP, but it's my first
vacation since then and I needed to have a bit of calm time to collect
the patches from the various branches and put them together. Anyway I'm
sending this here in case it can be of interest to anyone, for use or
just to study it.

I presented it in 2014 at kernel recipes :
  http://kernel-recipes.org/en/2014/ndiv-a-low-overhead-network-traffic-diverter/

It now supports drivers mvneta, ixgbe, e1000e, e1000 and igb. It is
very light, and retrieves the packets in the NIC's driver before they
are converted to an skb, then submits them to a registered RX handler
running in softirq context so we have the best of all worlds by
benefitting from CPU scalability, delayed processing, and not paying
the cost of switching to userland. Also an rx_done() function allows
handlers to batch their processing. The RX handler returns an action
among accepting the packet as-is, accepting it modified (eg: vlan or
tunnel decapsulation), dropping it, postponing the processing
(equivalent to EAGAIN), or building a new packet to send back.

This last function is the one requiring the most changes in existing
drivers, but offers the widest range of possibilities. We use it to
send SYN cookies, but I have also implemented a stateless HTTP server
supporting keep-alive using it, achieving line-rate traffic processing
on a single CPU core when the NIC supports it. It's very convenient to
test various stateful TCP components as it's easy to sustain millions
of connections per second on it.

It does not support forwarding between NICs. It was my first goal
because I wanted to implement a TAP with it, bridging the traffic
between two ports, but figured it was adding some complexity to the
system back then. However since then we've implemented traffic
capture in our product, exploiting this framework to capture without
losses at 14 Mpps. I may find some time to try to extract it later.
It uses the /sys API so that you can simply plug tcpdump -r on a
file there, though there's also an mmap version which uses less CPU
(that's important at 10G).

In its current form since the initial code's intent was to limit
core changes, it happens not to modify anything in the kernel by
default and to reuse the net_device's ax25_ptr to attach devices
(idea borrowed from netmap), so it can be used on an existing
kernel just by loading the patched network drivers (yes, I know
it's not a valid solution for the long term).

The current code is available here :

  http://git.kernel.org/cgit/linux/kernel/git/wtarreau/ndiv.git/

Please let me know if there could be some interest in rebasing it
on more recent versions (currently 3.10, 3.14 and 4.4 are supported).
I don't have much time to assign to it since it works fine as-is,
but will be glad to do so if that can be useful.

Also the stateless HTTP server provided in it definitely is a nice
use case for testing such a framework.

Regards,
Willy

^ permalink raw reply

* [PATCH 4/4] vti6: fix input path
From: Steffen Klassert @ 2016-09-21 11:05 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <1474455946-674-1-git-send-email-steffen.klassert@secunet.com>

From: Nicolas Dichtel <nicolas.dichtel@6wind.com>

Since commit 1625f4529957, vti6 is broken, all input packets are dropped
(LINUX_MIB_XFRMINNOSTATES is incremented).

XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 is set by vti6_rcv() before calling
xfrm6_rcv()/xfrm6_rcv_spi(), thus we cannot set to NULL that value in
xfrm6_rcv_spi().

A new function xfrm6_rcv_tnl() that enables to pass a value to
xfrm6_rcv_spi() is added, so that xfrm6_rcv() is not touched (this function
is used in several handlers).

CC: Alexey Kodanev <alexey.kodanev@oracle.com>
Fixes: 1625f4529957 ("net/xfrm_input: fix possible NULL deref of tunnel.ip6->parms.i_key")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h      |  4 +++-
 net/ipv6/ip6_vti.c      |  4 +---
 net/ipv6/xfrm6_input.c  | 16 +++++++++++-----
 net/ipv6/xfrm6_tunnel.c |  2 +-
 4 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index adfebd6..1793431 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1540,8 +1540,10 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
 void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
 int xfrm6_extract_header(struct sk_buff *skb);
 int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi);
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
+		  struct ip6_tnl *t);
 int xfrm6_transport_finish(struct sk_buff *skb, int async);
+int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t);
 int xfrm6_rcv(struct sk_buff *skb);
 int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 		     xfrm_address_t *saddr, u8 proto);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 52a2f73..5bd3afd 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -321,11 +321,9 @@ static int vti6_rcv(struct sk_buff *skb)
 			goto discard;
 		}
 
-		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
-
 		rcu_read_unlock();
 
-		return xfrm6_rcv(skb);
+		return xfrm6_rcv_tnl(skb, t);
 	}
 	rcu_read_unlock();
 	return -EINVAL;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 00a2d40..b578956 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -21,9 +21,10 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 	return xfrm6_extract_header(skb);
 }
 
-int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
+		  struct ip6_tnl *t)
 {
-	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
 	return xfrm_input(skb, nexthdr, spi, 0);
@@ -49,13 +50,18 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	return -1;
 }
 
-int xfrm6_rcv(struct sk_buff *skb)
+int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
 {
 	return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
-			     0);
+			     0, t);
 }
-EXPORT_SYMBOL(xfrm6_rcv);
+EXPORT_SYMBOL(xfrm6_rcv_tnl);
 
+int xfrm6_rcv(struct sk_buff *skb)
+{
+	return xfrm6_rcv_tnl(skb, NULL);
+}
+EXPORT_SYMBOL(xfrm6_rcv);
 int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 		     xfrm_address_t *saddr, u8 proto)
 {
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 5743044..e1c0bbe 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -236,7 +236,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb)
 	__be32 spi;
 
 	spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr);
-	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi);
+	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
 }
 
 static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-- 
1.9.1

^ permalink raw reply related

* [PATCH 2/4] vti: use right inner_mode for inbound inter address family policy checks
From: Steffen Klassert @ 2016-09-21 11:05 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <1474455946-674-1-git-send-email-steffen.klassert@secunet.com>

From: "thomas.zeitlhofer+lkml@ze-it.at" <thomas.zeitlhofer+lkml@ze-it.at>

In case of inter address family tunneling (IPv6 over vti4 or IPv4 over
vti6), the inbound policy checks in vti_rcv_cb() and vti6_rcv_cb() are
using the wrong address family. As a result, all inbound inter address
family traffic is dropped.

Use the xfrm_ip2inner_mode() helper, as done in xfrm_input() (i.e., also
increment LINUX_MIB_XFRMINSTATEMODEERROR in case of error), to select the
inner_mode that contains the right address family for the inbound policy
checks.

Signed-off-by: Thomas Zeitlhofer <thomas.zeitlhofer+lkml@ze-it.at>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/ip_vti.c  | 15 ++++++++++++++-
 net/ipv6/ip6_vti.c | 15 ++++++++++++++-
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index cc701fa..5d7944f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -88,6 +88,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
 	struct net_device *dev;
 	struct pcpu_sw_netstats *tstats;
 	struct xfrm_state *x;
+	struct xfrm_mode *inner_mode;
 	struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
 	u32 orig_mark = skb->mark;
 	int ret;
@@ -105,7 +106,19 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
 	}
 
 	x = xfrm_input_state(skb);
-	family = x->inner_mode->afinfo->family;
+
+	inner_mode = x->inner_mode;
+
+	if (x->sel.family == AF_UNSPEC) {
+		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+		if (inner_mode == NULL) {
+			XFRM_INC_STATS(dev_net(skb->dev),
+				       LINUX_MIB_XFRMINSTATEMODEERROR);
+			return -EINVAL;
+		}
+	}
+
+	family = inner_mode->afinfo->family;
 
 	skb->mark = be32_to_cpu(tunnel->parms.i_key);
 	ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d90a11f..52a2f73 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -340,6 +340,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
 	struct net_device *dev;
 	struct pcpu_sw_netstats *tstats;
 	struct xfrm_state *x;
+	struct xfrm_mode *inner_mode;
 	struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
 	u32 orig_mark = skb->mark;
 	int ret;
@@ -357,7 +358,19 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
 	}
 
 	x = xfrm_input_state(skb);
-	family = x->inner_mode->afinfo->family;
+
+	inner_mode = x->inner_mode;
+
+	if (x->sel.family == AF_UNSPEC) {
+		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+		if (inner_mode == NULL) {
+			XFRM_INC_STATS(dev_net(skb->dev),
+				       LINUX_MIB_XFRMINSTATEMODEERROR);
+			return -EINVAL;
+		}
+	}
+
+	family = inner_mode->afinfo->family;
 
 	skb->mark = be32_to_cpu(t->parms.i_key);
 	ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
-- 
1.9.1

^ permalink raw reply related

* [PATCH 3/4] xfrm: Fix memory leak of aead algorithm name
From: Steffen Klassert @ 2016-09-21 11:05 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <1474455946-674-1-git-send-email-steffen.klassert@secunet.com>

From: Ilan Tayari <ilant@mellanox.com>

commit 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms")
introduced aead. The function attach_aead kmemdup()s the algorithm
name during xfrm_state_construct().
However this memory is never freed.
Implementation has since been slightly modified in
commit ee5c23176fcc ("xfrm: Clone states properly on migration")
without resolving this leak.
This patch adds a kfree() call for the aead algorithm name.

Fixes: 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms")
Signed-off-by: Ilan Tayari <ilant@mellanox.com>
Acked-by: Rami Rosen <roszenrami@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_state.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9895a8c..a30f898d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -332,6 +332,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 {
 	tasklet_hrtimer_cancel(&x->mtimer);
 	del_timer_sync(&x->rtimer);
+	kfree(x->aead);
 	kfree(x->aalg);
 	kfree(x->ealg);
 	kfree(x->calg);
-- 
1.9.1

^ permalink raw reply related

* [PATCH 1/4] xfrm_user: propagate sec ctx allocation errors
From: Steffen Klassert @ 2016-09-21 11:05 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev
In-Reply-To: <1474455946-674-1-git-send-email-steffen.klassert@secunet.com>

From: Mathias Krause <minipli@googlemail.com>

When we fail to attach the security context in xfrm_state_construct()
we'll return 0 as error value which, in turn, will wrongly claim success
to userland when, in fact, we won't be adding / updating the XFRM state.

This is a regression introduced by commit fd21150a0fe1 ("[XFRM] netlink:
Inline attach_encap_tmpl(), attach_sec_ctx(), and attach_one_addr()").

Fix it by propagating the error returned by security_xfrm_state_alloc()
in this case.

Fixes: fd21150a0fe1 ("[XFRM] netlink: Inline attach_encap_tmpl()...")
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index cb65d91..0889209 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -581,9 +581,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 	if (err)
 		goto error;
 
-	if (attrs[XFRMA_SEC_CTX] &&
-	    security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX])))
-		goto error;
+	if (attrs[XFRMA_SEC_CTX]) {
+		err = security_xfrm_state_alloc(x,
+						nla_data(attrs[XFRMA_SEC_CTX]));
+		if (err)
+			goto error;
+	}
 
 	if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
 					       attrs[XFRMA_REPLAY_ESN_VAL])))
-- 
1.9.1

^ permalink raw reply related

* pull request (net): ipsec 2016-09-21
From: Steffen Klassert @ 2016-09-21 11:05 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Steffen Klassert, netdev

1) Propagate errors on security context allocation.
   From Mathias Krause.

2) Fix inbound policy checks for inter address family tunnels.
   From Thomas Zeitlhofer.

3) Fix an old memory leak on aead algorithm usage.
   From Ilan Tayari.

4) A recent patch fixed a possible NULL pointer dereference
   but broke the vti6 input path.
   Fix from Nicolas Dichtel.

Please pull or let me know if there are problems.

Thanks!

The following changes since commit 2c2c8e33e4aa6e46f19ef7bba8e559759a74a4db:

  Merge branch 'nfp-fixes' (2016-09-08 17:18:42 -0700)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git master

for you to fetch changes up to 63c43787d35e45562a6b5927e2edc8f4783d95b8:

  vti6: fix input path (2016-09-21 10:09:14 +0200)

----------------------------------------------------------------
Ilan Tayari (1):
      xfrm: Fix memory leak of aead algorithm name

Mathias Krause (1):
      xfrm_user: propagate sec ctx allocation errors

Nicolas Dichtel (1):
      vti6: fix input path

thomas.zeitlhofer+lkml@ze-it.at (1):
      vti: use right inner_mode for inbound inter address family policy checks

 include/net/xfrm.h      |  4 +++-
 net/ipv4/ip_vti.c       | 15 ++++++++++++++-
 net/ipv6/ip6_vti.c      | 19 +++++++++++++++----
 net/ipv6/xfrm6_input.c  | 16 +++++++++++-----
 net/ipv6/xfrm6_tunnel.c |  2 +-
 net/xfrm/xfrm_state.c   |  1 +
 net/xfrm/xfrm_user.c    |  9 ++++++---
 7 files changed, 51 insertions(+), 15 deletions(-)

^ permalink raw reply

* Re: [PATCH next 1/2] sctp: rename WORD_TRUNC/ROUND macros
From: Marcelo Ricardo Leitner @ 2016-09-21 11:04 UTC (permalink / raw)
  To: David Laight
  Cc: network dev, linux-sctp, Neil Horman, vyasevich, David Miller
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6DB0105BA5@AcuExch.aculab.com>

Em 21-09-2016 07:18, David Laight escreveu:
> From: Marcelo Ricardo Leitner
>> Sent: 20 September 2016 21:24
>> To something more meaningful these days, specially because this is
>> working on packet headers or lengths and which are not tied to any CPU
>> arch but to the protocol itself.
>>
>> So, WORD_TRUNC becomes SCTP_TRUNC4 and WORD_ROUND becomes SCTP_ALIGN4.
> ...
>>  /* Round an int up to the next multiple of 4.  */
>> -#define WORD_ROUND(s) (((s)+3)&~3)
>> +#define SCTP_ALIGN4(s) (((s)+3)&~3)
>
> SCTP_PAD4() might be a better name.

Right. Will post a v3 with it. Thanks

   Marcelo

^ permalink raw reply

* Re: [PATCH next 1/2] sctp: fix the handling of SACK Gap Ack blocks
From: 'Marcelo Ricardo Leitner' @ 2016-09-21 11:01 UTC (permalink / raw)
  To: David Laight
  Cc: netdev@vger.kernel.org, linux-sctp@vger.kernel.org, Neil Horman,
	Vlad Yasevich
In-Reply-To: <063D6719AE5E284EB5DD2968C1650D6DB0105BBB@AcuExch.aculab.com>

On Wed, Sep 21, 2016 at 10:21:43AM +0000, David Laight wrote:
> From: Marcelo Ricardo Leitner
> > Sent: 20 September 2016 22:19
> > sctp_acked() is using 32bit arithmetics on 16bits vars, via TSN_lte()
> > macros, which is weird and confusing.
> > 
> > Once the offset to ctsn is calculated, all wrapping is already handled
> > and thus to verify the Gap Ack blocks we can just use pure
> > less/big-or-equal than checks.
> > 
> > Also, rename gap variable to tsn_offset, so it's more meaningful, as
> > it doesn't point to any gap at all.
> > 
> > Even so, I don't think this discrepancy resulted in any practical bug.
> 
> I think it might if gab.start/end are greater than 32767

Why? Mind sharing a case that it goes wrong?

  Marcelo

> 
> ...
> > -	gap = tsn - ctsn;
> > -	for (i = 0; i < ntohs(sack->num_gap_ack_blocks); ++i) {
> > -		if (TSN_lte(ntohs(frags[i].gab.start), gap) &&
> > -		    TSN_lte(gap, ntohs(frags[i].gab.end)))
> > +	blocks = ntohs(sack->num_gap_ack_blocks);
> > +	tsn_offset = tsn - ctsn;
> > +	for (i = 0; i < blocks; ++i) {
> > +		if (tsn_offset >= ntohs(frags[i].gab.start) &&
> > +		    tsn_offset <= ntohs(frags[i].gab.end))
> ...
> 
> 	David
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* Re: [RFC PATCH kernel] PCI: Enable access to custom VPD for Chelsio devices (cxgb3)
From: Alexey Kardashevskiy @ 2016-09-21 10:53 UTC (permalink / raw)
  To: Alexander Duyck, Bjorn Helgaas
  Cc: Netdev, Bjorn Helgaas, Santosh Raspatur, Alex Williamson,
	Paul Mackerras, Hannes Reinecke, linux-kernel@vger.kernel.org,
	linux-pci@vger.kernel.org
In-Reply-To: <CAKgT0UfOn=eU4DaA7j01w7JMQS3ea4YKV7Ewavk1Mm96Szs60A@mail.gmail.com>

On 07/09/16 04:30, Alexander Duyck wrote:
> On Tue, Sep 6, 2016 at 8:48 AM, Bjorn Helgaas <helgaas@kernel.org> wrote:
>> Hi Alexey,
>>
>> On Thu, Aug 11, 2016 at 08:03:29PM +1000, Alexey Kardashevskiy wrote:
>>> There is at least one Chelsio 10Gb card which uses VPD area to store
>>> some custom blocks (example below). However pci_vpd_size() returns
>>> the length of the first block only assuming that there can be only
>>> one VPD "End Tag" and VFIO blocks access beyond that offset
>>> (since 4e1a63555) which leads to the situation when the guest "cxgb3"
>>> driver fails to probe the device. The host system does not have this
>>> problem as the drives accesses the config space directly without
>>> pci_read_vpd()/...
>>>
>>> This adds a quirk to override the VPD size to a bigger value.
>>>
>>> This is the controller:
>>> Ethernet controller [0200]: Chelsio Communications Inc T310 10GbE Single Port Adapter [1425:0030]
>>>
>>> This is its VPD:
>>> #0000 Large item 42 bytes; name 0x2 Identifier String
>>>       b'10 Gigabit Ethernet-SR PCI Express Adapter'
>>> #002d Large item 74 bytes; name 0x10
>>>       #00 [EC] len=7: b'D76809 '
>>>       #0a [FN] len=7: b'46K7897'
>>>       #14 [PN] len=7: b'46K7897'
>>>       #1e [MN] len=4: b'1037'
>>>       #25 [FC] len=4: b'5769'
>>>       #2c [SN] len=12: b'YL102035603V'
>>>       #3b [NA] len=12: b'00145E992ED1'
>>> #007a Small item 1 bytes; name 0xf End Tag
>>> ---
>>> #0c00 Large item 16 bytes; name 0x2 Identifier String
>>>       b'S310E-SR-X      '
>>> #0c13 Large item 234 bytes; name 0x10
>>>       #00 [PN] len=16: b'TBD             '
>>>       #13 [EC] len=16: b'110107730D2     '
>>>       #26 [SN] len=16: b'97YL102035603V  '
>>>       #39 [NA] len=12: b'00145E992ED1'
>>>       #48 [V0] len=6: b'175000'
>>>       #51 [V1] len=6: b'266666'
>>>       #5a [V2] len=6: b'266666'
>>>       #63 [V3] len=6: b'2000  '
>>>       #6c [V4] len=2: b'1 '
>>>       #71 [V5] len=6: b'c2    '
>>>       #7a [V6] len=6: b'0     '
>>>       #83 [V7] len=2: b'1 '
>>>       #88 [V8] len=2: b'0 '
>>>       #8d [V9] len=2: b'0 '
>>>       #92 [VA] len=2: b'0 '
>>>       #97 [RV] len=80: b's\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'...
>>> #0d00 Large item 252 bytes; name 0x11
>>>       #00 [VC] len=16: b'122310_1222 dp  '
>>>       #13 [VD] len=16: b'610-0001-00 H1\x00\x00'
>>>       #26 [VE] len=16: b'122310_1353 fp  '
>>>       #39 [VF] len=16: b'610-0001-00 H1\x00\x00'
>>>       #4c [RW] len=173: b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'...
>>> #0dff Small item 0 bytes; name 0xf End Tag
>>>
>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>> ---
>>>  drivers/pci/quirks.c | 12 ++++++++++++
>>>  1 file changed, 12 insertions(+)
>>>
>>> diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
>>> index ee72ebe1..94d3fb5 100644
>>> --- a/drivers/pci/quirks.c
>>> +++ b/drivers/pci/quirks.c
>>> @@ -3241,6 +3241,18 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C
>>>  DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PORT_RIDGE,
>>>                       quirk_thunderbolt_hotplug_msi);
>>>
>>> +static void quirk_chelsio_extend_vpd(struct pci_dev *dev)
>>> +{
>>> +     if (!dev->vpd || !dev->vpd->ops || !dev->vpd->ops->set_size)
>>> +             return;
>>> +
>>> +     dev->vpd->ops->set_size(dev, max_t(unsigned int, dev->vpd->len, 0xe00));
>>> +}
>>> +
> 
> So one thing you might want to look at doing is actually validating
> there is something there before increasing the size.  If you look at
> the get_vpd_params function from the cxgb3 driver you will see what
> they do is verify the first tag located at 0xC00 is 0x82 before they
> do any further reads.  You might do something similar just to verify
> there is something there before you open it up to access by anyone.
> 
> One option would be to modify pci_vpd_size so that you can use it
> outside of access.c and can pass it an offset.  Then you could update
> your quirk so that you call pci_vpd_size and pass it the offset of
> 0xC00.  It should then be able to walk from that starting point and
> reach the end of the list.  If you do then pci_vpd_size will return
> the total size, else it returns 0.  So if size comes back as a
> non-zero value then you could pass that into pci_set_vpd_size,
> otherwise we can assume the starting offset is 0 and let the existing
> code run its course.


I could do this but I can predict endless argument whether a generic
pci_vpd_size() should scan for something which looks like VPD but it is
not, etc. I'd read at 0xc00, if it is 0x82, then pci_set_vpd_size(0xe00)
and that's it.


> 
>>> +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO,
>>> +                     PCI_ANY_ID,
>>> +                     quirk_chelsio_extend_vpd);
>>
>> Do you really want this for *all* Chelsio devices?  

Well, no. When I did this, I just wanted opinions :) I'll have a list here
in next revision.


> If you only need it for certain devices, the quirk could probably go in the driver.

It cannot, the cxgb3 driver is not running on the host, vfio-pci controls it
.

>
>>
>> Can you use pci_set_vpd_size() instead?  There's already a use of that
>> in cxgb4.
>>
> 
> I would assume this quirk needs to support the same device IDs as
> supported by the cxgb3 driver.  If so you might just clone the ID list
> from cxgb3_pci_tbl for this quirk.
> 
> Also from the looks of it the cxgb3 driver probably needs to be
> updated to use the VPD accessor functions instead of just open coding
> it themselves.  Otherwise we run the risk of the driver having issues
> if it attempts to access the EEPROM at the same time as other
> applications attempting to access the VPD for the device.


In this particular case cxgb3 driver is not running, vfio-pci controls the
device on the host side and does all the filtering so I have to add a quirk
to  drivers/pci/quirks.c with all 13 PCI IDs of cxgb3 hardware OOOOOR
implement quirks in vfio-pci (it has one already but I seriously doubt we
should extend it).

Fixing cxgb3 in the guest won't make any difference.

So where should I put the quirks? Thanks.


-- 
Alexey

^ permalink raw reply

* Re: [PATCHv7 net-next 00/15] BPF hardware offload (cls_bpf for now)
From: Jakub Kicinski @ 2016-09-21 10:48 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

On Wed, 21 Sep 2016 11:43:52 +0100, Jakub Kicinski wrote:
> Hi!
> 
> Rebased and improved.
> 
> v7:
>  - fix patch 6.

s/6/4/

^ permalink raw reply

* [PATCHv7 net-next 05/15] bpf: expose internal verfier structures
From: Jakub Kicinski @ 2016-09-21 10:43 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

Move verifier's internal structures to a header file and
prefix their names with bpf_ to avoid potential namespace
conflicts.  Those structures will soon be used by external
analyzers.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
v5:
 - fix name of guard defines.
v4:
 - separate from adding the analyzer;
 - squash with the prefixing patch.
---
 include/linux/bpf_verifier.h |  79 +++++++++++++
 kernel/bpf/verifier.c        | 266 +++++++++++++++++--------------------------
 2 files changed, 182 insertions(+), 163 deletions(-)
 create mode 100644 include/linux/bpf_verifier.h

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
new file mode 100644
index 000000000000..9457a22fc6e0
--- /dev/null
+++ b/include/linux/bpf_verifier.h
@@ -0,0 +1,79 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _LINUX_BPF_VERIFIER_H
+#define _LINUX_BPF_VERIFIER_H 1
+
+#include <linux/bpf.h> /* for enum bpf_reg_type */
+#include <linux/filter.h> /* for MAX_BPF_STACK */
+
+struct bpf_reg_state {
+	enum bpf_reg_type type;
+	union {
+		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
+		s64 imm;
+
+		/* valid when type == PTR_TO_PACKET* */
+		struct {
+			u32 id;
+			u16 off;
+			u16 range;
+		};
+
+		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
+		 *   PTR_TO_MAP_VALUE_OR_NULL
+		 */
+		struct bpf_map *map_ptr;
+	};
+};
+
+enum bpf_stack_slot_type {
+	STACK_INVALID,    /* nothing was stored in this stack slot */
+	STACK_SPILL,      /* register spilled into stack */
+	STACK_MISC	  /* BPF program wrote some data into this slot */
+};
+
+#define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
+
+/* state of the program:
+ * type of all registers and stack info
+ */
+struct bpf_verifier_state {
+	struct bpf_reg_state regs[MAX_BPF_REG];
+	u8 stack_slot_type[MAX_BPF_STACK];
+	struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
+};
+
+/* linked list of verifier states used to prune search */
+struct bpf_verifier_state_list {
+	struct bpf_verifier_state state;
+	struct bpf_verifier_state_list *next;
+};
+
+struct bpf_insn_aux_data {
+	enum bpf_reg_type ptr_type;	/* pointer type for load/store insns */
+};
+
+#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+
+/* single container for all structs
+ * one verifier_env per bpf_check() call
+ */
+struct bpf_verifier_env {
+	struct bpf_prog *prog;		/* eBPF program being verified */
+	struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
+	int stack_size;			/* number of states to be processed */
+	struct bpf_verifier_state cur_state; /* current verifier state */
+	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
+	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
+	u32 used_map_cnt;		/* number of used maps */
+	u32 id_gen;			/* used to generate unique reg IDs */
+	bool allow_ptr_leaks;
+	bool seen_direct_write;
+	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
+};
+
+#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a9542d89f293..dca2b9b1d02e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
 #include <linux/filter.h>
 #include <net/netlink.h>
 #include <linux/file.h>
@@ -126,82 +127,16 @@
  * are set to NOT_INIT to indicate that they are no longer readable.
  */
 
-struct reg_state {
-	enum bpf_reg_type type;
-	union {
-		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
-		s64 imm;
-
-		/* valid when type == PTR_TO_PACKET* */
-		struct {
-			u32 id;
-			u16 off;
-			u16 range;
-		};
-
-		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
-		 *   PTR_TO_MAP_VALUE_OR_NULL
-		 */
-		struct bpf_map *map_ptr;
-	};
-};
-
-enum bpf_stack_slot_type {
-	STACK_INVALID,    /* nothing was stored in this stack slot */
-	STACK_SPILL,      /* register spilled into stack */
-	STACK_MISC	  /* BPF program wrote some data into this slot */
-};
-
-#define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
-
-/* state of the program:
- * type of all registers and stack info
- */
-struct verifier_state {
-	struct reg_state regs[MAX_BPF_REG];
-	u8 stack_slot_type[MAX_BPF_STACK];
-	struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
-};
-
-/* linked list of verifier states used to prune search */
-struct verifier_state_list {
-	struct verifier_state state;
-	struct verifier_state_list *next;
-};
-
 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
-struct verifier_stack_elem {
+struct bpf_verifier_stack_elem {
 	/* verifer state is 'st'
 	 * before processing instruction 'insn_idx'
 	 * and after processing instruction 'prev_insn_idx'
 	 */
-	struct verifier_state st;
+	struct bpf_verifier_state st;
 	int insn_idx;
 	int prev_insn_idx;
-	struct verifier_stack_elem *next;
-};
-
-struct bpf_insn_aux_data {
-	enum bpf_reg_type ptr_type;	/* pointer type for load/store insns */
-};
-
-#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
-
-/* single container for all structs
- * one verifier_env per bpf_check() call
- */
-struct verifier_env {
-	struct bpf_prog *prog;		/* eBPF program being verified */
-	struct verifier_stack_elem *head; /* stack of verifier states to be processed */
-	int stack_size;			/* number of states to be processed */
-	struct verifier_state cur_state; /* current verifier state */
-	struct verifier_state_list **explored_states; /* search pruning optimization */
-	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
-	u32 used_map_cnt;		/* number of used maps */
-	u32 id_gen;			/* used to generate unique reg IDs */
-	bool allow_ptr_leaks;
-	bool seen_direct_write;
-	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
+	struct bpf_verifier_stack_elem *next;
 };
 
 #define BPF_COMPLEXITY_LIMIT_INSNS	65536
@@ -254,9 +189,9 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_PACKET_END]	= "pkt_end",
 };
 
-static void print_verifier_state(struct verifier_state *state)
+static void print_verifier_state(struct bpf_verifier_state *state)
 {
-	struct reg_state *reg;
+	struct bpf_reg_state *reg;
 	enum bpf_reg_type t;
 	int i;
 
@@ -432,9 +367,9 @@ static void print_bpf_insn(struct bpf_insn *insn)
 	}
 }
 
-static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
+static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
 {
-	struct verifier_stack_elem *elem;
+	struct bpf_verifier_stack_elem *elem;
 	int insn_idx;
 
 	if (env->head == NULL)
@@ -451,12 +386,12 @@ static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
 	return insn_idx;
 }
 
-static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
-					 int prev_insn_idx)
+static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
+					     int insn_idx, int prev_insn_idx)
 {
-	struct verifier_stack_elem *elem;
+	struct bpf_verifier_stack_elem *elem;
 
-	elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
+	elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 	if (!elem)
 		goto err;
 
@@ -482,7 +417,7 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
-static void init_reg_state(struct reg_state *regs)
+static void init_reg_state(struct bpf_reg_state *regs)
 {
 	int i;
 
@@ -498,7 +433,7 @@ static void init_reg_state(struct reg_state *regs)
 	regs[BPF_REG_1].type = PTR_TO_CTX;
 }
 
-static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
+static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
 {
 	BUG_ON(regno >= MAX_BPF_REG);
 	regs[regno].type = UNKNOWN_VALUE;
@@ -511,7 +446,7 @@ enum reg_arg_type {
 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
 };
 
-static int check_reg_arg(struct reg_state *regs, u32 regno,
+static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
 			 enum reg_arg_type t)
 {
 	if (regno >= MAX_BPF_REG) {
@@ -571,8 +506,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 /* check_stack_read/write functions track spill/fill of registers,
  * stack boundary and alignment are checked in check_mem_access()
  */
-static int check_stack_write(struct verifier_state *state, int off, int size,
-			     int value_regno)
+static int check_stack_write(struct bpf_verifier_state *state, int off,
+			     int size, int value_regno)
 {
 	int i;
 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -597,7 +532,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
 	} else {
 		/* regular write of data into stack */
 		state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
-			(struct reg_state) {};
+			(struct bpf_reg_state) {};
 
 		for (i = 0; i < size; i++)
 			state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
@@ -605,7 +540,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
 	return 0;
 }
 
-static int check_stack_read(struct verifier_state *state, int off, int size,
+static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
 			    int value_regno)
 {
 	u8 *slot_type;
@@ -646,7 +581,7 @@ static int check_stack_read(struct verifier_state *state, int off, int size,
 }
 
 /* check read/write into map element returned by bpf_map_lookup_elem() */
-static int check_map_access(struct verifier_env *env, u32 regno, int off,
+static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 			    int size)
 {
 	struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
@@ -661,7 +596,7 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off,
 
 #define MAX_PACKET_OFF 0xffff
 
-static bool may_access_direct_pkt_data(struct verifier_env *env,
+static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 				       const struct bpf_call_arg_meta *meta)
 {
 	switch (env->prog->type) {
@@ -678,11 +613,11 @@ static bool may_access_direct_pkt_data(struct verifier_env *env,
 	}
 }
 
-static int check_packet_access(struct verifier_env *env, u32 regno, int off,
+static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 			       int size)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *reg = &regs[regno];
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *reg = &regs[regno];
 
 	off += reg->off;
 	if (off < 0 || size <= 0 || off + size > reg->range) {
@@ -694,7 +629,7 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off,
 }
 
 /* check access to 'struct bpf_context' fields */
-static int check_ctx_access(struct verifier_env *env, int off, int size,
+static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
 	if (env->prog->aux->ops->is_valid_access &&
@@ -709,7 +644,7 @@ static int check_ctx_access(struct verifier_env *env, int off, int size,
 	return -EACCES;
 }
 
-static bool is_pointer_value(struct verifier_env *env, int regno)
+static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 {
 	if (env->allow_ptr_leaks)
 		return false;
@@ -723,12 +658,13 @@ static bool is_pointer_value(struct verifier_env *env, int regno)
 	}
 }
 
-static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
-			       int off, int size)
+static int check_ptr_alignment(struct bpf_verifier_env *env,
+			       struct bpf_reg_state *reg, int off, int size)
 {
 	if (reg->type != PTR_TO_PACKET) {
 		if (off % size != 0) {
-			verbose("misaligned access off %d size %d\n", off, size);
+			verbose("misaligned access off %d size %d\n",
+				off, size);
 			return -EACCES;
 		} else {
 			return 0;
@@ -769,12 +705,12 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
  * if t==write && value_regno==-1, some unknown value is stored into memory
  * if t==read && value_regno==-1, don't care what we read from memory
  */
-static int check_mem_access(struct verifier_env *env, u32 regno, int off,
+static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 			    int bpf_size, enum bpf_access_type t,
 			    int value_regno)
 {
-	struct verifier_state *state = &env->cur_state;
-	struct reg_state *reg = &state->regs[regno];
+	struct bpf_verifier_state *state = &env->cur_state;
+	struct bpf_reg_state *reg = &state->regs[regno];
 	int size, err = 0;
 
 	if (reg->type == PTR_TO_STACK)
@@ -860,9 +796,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
 	return err;
 }
 
-static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
+static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	int err;
 
 	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
@@ -896,12 +832,12 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
  * bytes from that pointer, make sure that it's within stack boundary
  * and all elements of stack are initialized
  */
-static int check_stack_boundary(struct verifier_env *env, int regno,
+static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 				int access_size, bool zero_size_allowed,
 				struct bpf_call_arg_meta *meta)
 {
-	struct verifier_state *state = &env->cur_state;
-	struct reg_state *regs = state->regs;
+	struct bpf_verifier_state *state = &env->cur_state;
+	struct bpf_reg_state *regs = state->regs;
 	int off, i;
 
 	if (regs[regno].type != PTR_TO_STACK) {
@@ -940,11 +876,11 @@ static int check_stack_boundary(struct verifier_env *env, int regno,
 	return 0;
 }
 
-static int check_func_arg(struct verifier_env *env, u32 regno,
+static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			  enum bpf_arg_type arg_type,
 			  struct bpf_call_arg_meta *meta)
 {
-	struct reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
+	struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
 	enum bpf_reg_type expected_type, type = reg->type;
 	int err = 0;
 
@@ -1149,10 +1085,10 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
 	return count > 1 ? -EINVAL : 0;
 }
 
-static void clear_all_pkt_pointers(struct verifier_env *env)
+static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 {
-	struct verifier_state *state = &env->cur_state;
-	struct reg_state *regs = state->regs, *reg;
+	struct bpf_verifier_state *state = &env->cur_state;
+	struct bpf_reg_state *regs = state->regs, *reg;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++)
@@ -1172,12 +1108,12 @@ static void clear_all_pkt_pointers(struct verifier_env *env)
 	}
 }
 
-static int check_call(struct verifier_env *env, int func_id)
+static int check_call(struct bpf_verifier_env *env, int func_id)
 {
-	struct verifier_state *state = &env->cur_state;
+	struct bpf_verifier_state *state = &env->cur_state;
 	const struct bpf_func_proto *fn = NULL;
-	struct reg_state *regs = state->regs;
-	struct reg_state *reg;
+	struct bpf_reg_state *regs = state->regs;
+	struct bpf_reg_state *reg;
 	struct bpf_call_arg_meta meta;
 	bool changes_data;
 	int i, err;
@@ -1280,12 +1216,13 @@ static int check_call(struct verifier_env *env, int func_id)
 	return 0;
 }
 
-static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn)
+static int check_packet_ptr_add(struct bpf_verifier_env *env,
+				struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *dst_reg = &regs[insn->dst_reg];
-	struct reg_state *src_reg = &regs[insn->src_reg];
-	struct reg_state tmp_reg;
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
+	struct bpf_reg_state tmp_reg;
 	s32 imm;
 
 	if (BPF_SRC(insn->code) == BPF_K) {
@@ -1353,10 +1290,10 @@ static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn)
 	return 0;
 }
 
-static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
 	u8 opcode = BPF_OP(insn->code);
 	s64 imm_log2;
 
@@ -1366,7 +1303,7 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
 	 */
 
 	if (BPF_SRC(insn->code) == BPF_X) {
-		struct reg_state *src_reg = &regs[insn->src_reg];
+		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
 
 		if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
 		    dst_reg->imm && opcode == BPF_ADD) {
@@ -1455,11 +1392,12 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
 	return 0;
 }
 
-static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
+				struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *dst_reg = &regs[insn->dst_reg];
-	struct reg_state *src_reg = &regs[insn->src_reg];
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
 	u8 opcode = BPF_OP(insn->code);
 
 	/* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn.
@@ -1476,9 +1414,9 @@ static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
 }
 
 /* check validity of 32-bit and 64-bit arithmetic operations */
-static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
+static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs, *dst_reg;
+	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
@@ -1652,10 +1590,10 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
 	return 0;
 }
 
-static void find_good_pkt_pointers(struct verifier_state *state,
-				   const struct reg_state *dst_reg)
+static void find_good_pkt_pointers(struct bpf_verifier_state *state,
+				   struct bpf_reg_state *dst_reg)
 {
-	struct reg_state *regs = state->regs, *reg;
+	struct bpf_reg_state *regs = state->regs, *reg;
 	int i;
 
 	/* LLVM can generate two kind of checks:
@@ -1701,11 +1639,11 @@ static void find_good_pkt_pointers(struct verifier_state *state,
 	}
 }
 
-static int check_cond_jmp_op(struct verifier_env *env,
+static int check_cond_jmp_op(struct bpf_verifier_env *env,
 			     struct bpf_insn *insn, int *insn_idx)
 {
-	struct verifier_state *other_branch, *this_branch = &env->cur_state;
-	struct reg_state *regs = this_branch->regs, *dst_reg;
+	struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
+	struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
@@ -1767,7 +1705,7 @@ static int check_cond_jmp_op(struct verifier_env *env,
 	if (!other_branch)
 		return -EFAULT;
 
-	/* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
+	/* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
 	if (BPF_SRC(insn->code) == BPF_K &&
 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
 	    dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
@@ -1809,9 +1747,9 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
 }
 
 /* verify BPF_LD_IMM64 instruction */
-static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	int err;
 
 	if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -1866,11 +1804,11 @@ static bool may_access_skb(enum bpf_prog_type type)
  * Output:
  *   R0 - 8/16/32-bit skb data converted to cpu endianness
  */
-static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	u8 mode = BPF_MODE(insn->code);
-	struct reg_state *reg;
+	struct bpf_reg_state *reg;
 	int i, err;
 
 	if (!may_access_skb(env->prog->type)) {
@@ -1956,7 +1894,7 @@ enum {
 	BRANCH = 2,
 };
 
-#define STATE_LIST_MARK ((struct verifier_state_list *) -1L)
+#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
 
 static int *insn_stack;	/* stack of insns to process */
 static int cur_stack;	/* current stack index */
@@ -1967,7 +1905,7 @@ static int *insn_state;
  * w - next instruction
  * e - edge
  */
-static int push_insn(int t, int w, int e, struct verifier_env *env)
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
 {
 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
 		return 0;
@@ -2008,7 +1946,7 @@ static int push_insn(int t, int w, int e, struct verifier_env *env)
 /* non-recursive depth-first-search to detect loops in BPF program
  * loop == back-edge in directed graph
  */
-static int check_cfg(struct verifier_env *env)
+static int check_cfg(struct bpf_verifier_env *env)
 {
 	struct bpf_insn *insns = env->prog->insnsi;
 	int insn_cnt = env->prog->len;
@@ -2117,7 +2055,8 @@ static int check_cfg(struct verifier_env *env)
 /* the following conditions reduce the number of explored insns
  * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
  */
-static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
+static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
+				   struct bpf_reg_state *cur)
 {
 	if (old->id != cur->id)
 		return false;
@@ -2192,9 +2131,10 @@ static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
  * whereas register type in current state is meaningful, it means that
  * the current state will reach 'bpf_exit' instruction safely
  */
-static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
+static bool states_equal(struct bpf_verifier_state *old,
+			 struct bpf_verifier_state *cur)
 {
-	struct reg_state *rold, *rcur;
+	struct bpf_reg_state *rold, *rcur;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++) {
@@ -2234,9 +2174,9 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
 			 * the same, check that stored pointers types
 			 * are the same as well.
 			 * Ex: explored safe path could have stored
-			 * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
 			 * but current path has stored:
-			 * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
 			 * such verifier states are not equivalent.
 			 * return false to continue verification of this path
 			 */
@@ -2247,10 +2187,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
 	return true;
 }
 
-static int is_state_visited(struct verifier_env *env, int insn_idx)
+static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 {
-	struct verifier_state_list *new_sl;
-	struct verifier_state_list *sl;
+	struct bpf_verifier_state_list *new_sl;
+	struct bpf_verifier_state_list *sl;
 
 	sl = env->explored_states[insn_idx];
 	if (!sl)
@@ -2274,7 +2214,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
 	 * it will be rejected. Since there are no loops, we won't be
 	 * seeing this 'insn_idx' instruction again on the way to bpf_exit
 	 */
-	new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER);
+	new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
 	if (!new_sl)
 		return -ENOMEM;
 
@@ -2285,11 +2225,11 @@ static int is_state_visited(struct verifier_env *env, int insn_idx)
 	return 0;
 }
 
-static int do_check(struct verifier_env *env)
+static int do_check(struct bpf_verifier_env *env)
 {
-	struct verifier_state *state = &env->cur_state;
+	struct bpf_verifier_state *state = &env->cur_state;
 	struct bpf_insn *insns = env->prog->insnsi;
-	struct reg_state *regs = state->regs;
+	struct bpf_reg_state *regs = state->regs;
 	int insn_cnt = env->prog->len;
 	int insn_idx, prev_insn_idx = 0;
 	int insn_processed = 0;
@@ -2572,7 +2512,7 @@ static int check_map_prog_compatibility(struct bpf_map *map,
 /* look for pseudo eBPF instructions that access map FDs and
  * replace them with actual map pointers
  */
-static int replace_map_fd_with_map_ptr(struct verifier_env *env)
+static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 {
 	struct bpf_insn *insn = env->prog->insnsi;
 	int insn_cnt = env->prog->len;
@@ -2669,7 +2609,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 }
 
 /* drop refcnt of maps used by the rejected program */
-static void release_maps(struct verifier_env *env)
+static void release_maps(struct bpf_verifier_env *env)
 {
 	int i;
 
@@ -2678,7 +2618,7 @@ static void release_maps(struct verifier_env *env)
 }
 
 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
-static void convert_pseudo_ld_imm64(struct verifier_env *env)
+static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
 {
 	struct bpf_insn *insn = env->prog->insnsi;
 	int insn_cnt = env->prog->len;
@@ -2692,7 +2632,7 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
 /* convert load instructions that access fields of 'struct __sk_buff'
  * into sequence of instructions that access fields of 'struct sk_buff'
  */
-static int convert_ctx_accesses(struct verifier_env *env)
+static int convert_ctx_accesses(struct bpf_verifier_env *env)
 {
 	const struct bpf_verifier_ops *ops = env->prog->aux->ops;
 	const int insn_cnt = env->prog->len;
@@ -2757,9 +2697,9 @@ static int convert_ctx_accesses(struct verifier_env *env)
 	return 0;
 }
 
-static void free_states(struct verifier_env *env)
+static void free_states(struct bpf_verifier_env *env)
 {
-	struct verifier_state_list *sl, *sln;
+	struct bpf_verifier_state_list *sl, *sln;
 	int i;
 
 	if (!env->explored_states)
@@ -2782,16 +2722,16 @@ static void free_states(struct verifier_env *env)
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 {
 	char __user *log_ubuf = NULL;
-	struct verifier_env *env;
+	struct bpf_verifier_env *env;
 	int ret = -EINVAL;
 
 	if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
 		return -E2BIG;
 
-	/* 'struct verifier_env' can be global, but since it's not small,
+	/* 'struct bpf_verifier_env' can be global, but since it's not small,
 	 * allocate/free it every time bpf_check() is called
 	 */
-	env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
+	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
 	if (!env)
 		return -ENOMEM;
 
@@ -2833,7 +2773,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 		goto skip_full_check;
 
 	env->explored_states = kcalloc(env->prog->len,
-				       sizeof(struct verifier_state_list *),
+				       sizeof(struct bpf_verifier_state_list *),
 				       GFP_USER);
 	ret = -ENOMEM;
 	if (!env->explored_states)
-- 
1.9.1

^ permalink raw reply related

* [PATCHv7 net-next 15/15] nfp: bpf: add offload of TC direct action mode
From: Jakub Kicinski @ 2016-09-21 10:44 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

Add offload of TC in direct action mode.  We just need
to provide appropriate checks in the verifier and
a new outro block to translate the exit codes to what
data path expects

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/nfp_bpf.h       |  1 +
 drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c   | 66 ++++++++++++++++++++++
 .../net/ethernet/netronome/nfp/nfp_bpf_verifier.c  | 11 +++-
 .../net/ethernet/netronome/nfp/nfp_net_offload.c   |  6 +-
 4 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
index adbe0235d98e..fc220cd04115 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
@@ -61,6 +61,7 @@ enum static_regs {
 enum nfp_bpf_action_type {
 	NN_ACT_TC_DROP,
 	NN_ACT_TC_REDIR,
+	NN_ACT_DIRECT,
 };
 
 /* Software register representation, hardware encoding in asm.h */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
index 434bef975c58..3de819acd68c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
@@ -321,6 +321,16 @@ __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
 	nfp_prog_push(nfp_prog, insn);
 }
 
+static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer)
+{
+	if (defer > 2) {
+		pr_err("BUG: branch defer out of bounds %d\n", defer);
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+	__emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer);
+}
+
 static void
 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
 {
@@ -1465,9 +1475,65 @@ static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog)
 		      SHF_SC_L_SHF, 16);
 }
 
+static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
+{
+	/* TC direct-action mode:
+	 *   0,1   ok        NOT SUPPORTED[1]
+	 *   2   drop  0x22 -> drop,  count as stat1
+	 *   4,5 nuke  0x02 -> drop
+	 *   7  redir  0x44 -> redir, count as stat2
+	 *   * unspec  0x11 -> pass,  count as stat0
+	 *
+	 * [1] We can't support OK and RECLASSIFY because we can't tell TC
+	 *     the exact decision made.  We are forced to support UNSPEC
+	 *     to handle aborts so that's the only one we handle for passing
+	 *     packets up the stack.
+	 */
+	/* Target for aborts */
+	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
+
+	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+	emit_alu(nfp_prog, reg_a(0),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
+
+	/* Target for normal exits */
+	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
+
+	/* if R0 > 7 jump to abort */
+	emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
+	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
+	emit_alu(nfp_prog, reg_a(0),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+
+	wrp_immed(nfp_prog, reg_b(2), 0x41221211);
+	wrp_immed(nfp_prog, reg_b(3), 0x41001211);
+
+	emit_shf(nfp_prog, reg_a(1),
+		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
+
+	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
+	emit_shf(nfp_prog, reg_a(2),
+		 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
+
+	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
+	emit_shf(nfp_prog, reg_b(2),
+		 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
+
+	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+	emit_shf(nfp_prog, reg_b(2),
+		 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
+}
+
 static void nfp_outro(struct nfp_prog *nfp_prog)
 {
 	switch (nfp_prog->act) {
+	case NN_ACT_DIRECT:
+		nfp_outro_tc_da(nfp_prog);
+		break;
 	case NN_ACT_TC_DROP:
 	case NN_ACT_TC_REDIR:
 		nfp_outro_tc_legacy(nfp_prog);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
index ef6775b54168..144cae87f63a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
@@ -86,7 +86,16 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
 		return -EINVAL;
 	}
 
-	if (reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
+	if (nfp_prog->act != NN_ACT_DIRECT &&
+	    reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
+		pr_info("unsupported exit state: %d, imm: %llx\n",
+			reg0->type, reg0->imm);
+		return -EINVAL;
+	}
+
+	if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT &&
+	    reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN &&
+	    reg0->imm != TC_ACT_QUEUED) {
 		pr_info("unsupported exit state: %d, imm: %llx\n",
 			reg0->type, reg0->imm);
 		return -EINVAL;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
index 1ec8e5b74651..43f42f842eda 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
@@ -112,8 +112,12 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
 	LIST_HEAD(actions);
 
 	/* TC direct action */
-	if (cls_bpf->exts_integrated)
+	if (cls_bpf->exts_integrated) {
+		if (tc_no_actions(cls_bpf->exts))
+			return NN_ACT_DIRECT;
+
 		return -ENOTSUPP;
+	}
 
 	/* TC legacy mode */
 	if (!tc_single_action(cls_bpf->exts))
-- 
1.9.1

^ permalink raw reply related

* [PATCHv7 net-next 08/15] nfp: add BPF to NFP code translator
From: Jakub Kicinski @ 2016-09-21 10:44 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

Add translator for JITing eBPF to operations which
can be executed on NFP's programmable engines.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
v6:
 - explicitly check for registers >= MAX_BPF_REG;
 - fix mem leak.
v4:
 - use bitfield.h directly.
v3:
 - don't clone the program for the verifier (no longer needed);
 - temporarily add a local copy of macros from bitfield.h.

NOTE: this one will probably trigger buildbot failures because
      it depends on pull request from wireless-drivers-next.
---
 drivers/net/ethernet/netronome/nfp/Makefile        |    6 +
 drivers/net/ethernet/netronome/nfp/nfp_asm.h       |  233 +++
 drivers/net/ethernet/netronome/nfp/nfp_bpf.h       |  208 +++
 drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c   | 1724 ++++++++++++++++++++
 .../net/ethernet/netronome/nfp/nfp_bpf_verifier.c  |  162 ++
 5 files changed, 2333 insertions(+)
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_asm.h
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf.h
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c

diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 68178819ff12..5f12689bf523 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -5,4 +5,10 @@ nfp_netvf-objs := \
 	    nfp_net_ethtool.o \
 	    nfp_netvf_main.o
 
+ifeq ($(CONFIG_BPF_SYSCALL),y)
+nfp_netvf-objs += \
+	    nfp_bpf_verifier.o \
+	    nfp_bpf_jit.o
+endif
+
 nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
new file mode 100644
index 000000000000..22484b6fd3e8
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_ASM_H__
+#define __NFP_ASM_H__ 1
+
+#include "nfp_bpf.h"
+
+#define REG_NONE	0
+
+#define RE_REG_NO_DST	0x020
+#define RE_REG_IMM	0x020
+#define RE_REG_IMM_encode(x)					\
+	(RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1))
+#define RE_REG_IMM_MAX	 0x07fULL
+#define RE_REG_XFR	0x080
+
+#define UR_REG_XFR	0x180
+#define UR_REG_NN	0x280
+#define UR_REG_NO_DST	0x300
+#define UR_REG_IMM	UR_REG_NO_DST
+#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x))
+#define UR_REG_IMM_MAX	 0x0ffULL
+
+#define OP_BR_BASE	0x0d800000020ULL
+#define OP_BR_BASE_MASK	0x0f8000c3ce0ULL
+#define OP_BR_MASK	0x0000000001fULL
+#define OP_BR_EV_PIP	0x00000000300ULL
+#define OP_BR_CSS	0x0000003c000ULL
+#define OP_BR_DEFBR	0x00000300000ULL
+#define OP_BR_ADDR_LO	0x007ffc00000ULL
+#define OP_BR_ADDR_HI	0x10000000000ULL
+
+#define nfp_is_br(_insn)				\
+	(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
+
+enum br_mask {
+	BR_BEQ = 0x00,
+	BR_BNE = 0x01,
+	BR_BHS = 0x04,
+	BR_BLO = 0x05,
+	BR_BGE = 0x08,
+	BR_UNC = 0x18,
+};
+
+enum br_ev_pip {
+	BR_EV_PIP_UNCOND = 0,
+	BR_EV_PIP_COND = 1,
+};
+
+enum br_ctx_signal_state {
+	BR_CSS_NONE = 2,
+};
+
+#define OP_BBYTE_BASE	0x0c800000000ULL
+#define OP_BB_A_SRC	0x000000000ffULL
+#define OP_BB_BYTE	0x00000000300ULL
+#define OP_BB_B_SRC	0x0000003fc00ULL
+#define OP_BB_I8	0x00000040000ULL
+#define OP_BB_EQ	0x00000080000ULL
+#define OP_BB_DEFBR	0x00000300000ULL
+#define OP_BB_ADDR_LO	0x007ffc00000ULL
+#define OP_BB_ADDR_HI	0x10000000000ULL
+
+#define OP_BALU_BASE	0x0e800000000ULL
+#define OP_BA_A_SRC	0x000000003ffULL
+#define OP_BA_B_SRC	0x000000ffc00ULL
+#define OP_BA_DEFBR	0x00000300000ULL
+#define OP_BA_ADDR_HI	0x0007fc00000ULL
+
+#define OP_IMMED_A_SRC	0x000000003ffULL
+#define OP_IMMED_B_SRC	0x000000ffc00ULL
+#define OP_IMMED_IMM	0x0000ff00000ULL
+#define OP_IMMED_WIDTH	0x00060000000ULL
+#define OP_IMMED_INV	0x00080000000ULL
+#define OP_IMMED_SHIFT	0x00600000000ULL
+#define OP_IMMED_BASE	0x0f000000000ULL
+#define OP_IMMED_WR_AB	0x20000000000ULL
+
+enum immed_width {
+	IMMED_WIDTH_ALL = 0,
+	IMMED_WIDTH_BYTE = 1,
+	IMMED_WIDTH_WORD = 2,
+};
+
+enum immed_shift {
+	IMMED_SHIFT_0B = 0,
+	IMMED_SHIFT_1B = 1,
+	IMMED_SHIFT_2B = 2,
+};
+
+#define OP_SHF_BASE	0x08000000000ULL
+#define OP_SHF_A_SRC	0x000000000ffULL
+#define OP_SHF_SC	0x00000000300ULL
+#define OP_SHF_B_SRC	0x0000003fc00ULL
+#define OP_SHF_I8	0x00000040000ULL
+#define OP_SHF_SW	0x00000080000ULL
+#define OP_SHF_DST	0x0000ff00000ULL
+#define OP_SHF_SHIFT	0x001f0000000ULL
+#define OP_SHF_OP	0x00e00000000ULL
+#define OP_SHF_DST_AB	0x01000000000ULL
+#define OP_SHF_WR_AB	0x20000000000ULL
+
+enum shf_op {
+	SHF_OP_NONE = 0,
+	SHF_OP_AND = 2,
+	SHF_OP_OR = 5,
+};
+
+enum shf_sc {
+	SHF_SC_R_ROT = 0,
+	SHF_SC_R_SHF = 1,
+	SHF_SC_L_SHF = 2,
+	SHF_SC_R_DSHF = 3,
+};
+
+#define OP_ALU_A_SRC	0x000000003ffULL
+#define OP_ALU_B_SRC	0x000000ffc00ULL
+#define OP_ALU_DST	0x0003ff00000ULL
+#define OP_ALU_SW	0x00040000000ULL
+#define OP_ALU_OP	0x00f80000000ULL
+#define OP_ALU_DST_AB	0x01000000000ULL
+#define OP_ALU_BASE	0x0a000000000ULL
+#define OP_ALU_WR_AB	0x20000000000ULL
+
+enum alu_op {
+	ALU_OP_NONE	= 0x00,
+	ALU_OP_ADD	= 0x01,
+	ALU_OP_NEG	= 0x04,
+	ALU_OP_AND	= 0x08,
+	ALU_OP_SUB_C	= 0x0d,
+	ALU_OP_ADD_C	= 0x11,
+	ALU_OP_OR	= 0x14,
+	ALU_OP_SUB	= 0x15,
+	ALU_OP_XOR	= 0x18,
+};
+
+enum alu_dst_ab {
+	ALU_DST_A = 0,
+	ALU_DST_B = 1,
+};
+
+#define OP_LDF_BASE	0x0c000000000ULL
+#define OP_LDF_A_SRC	0x000000000ffULL
+#define OP_LDF_SC	0x00000000300ULL
+#define OP_LDF_B_SRC	0x0000003fc00ULL
+#define OP_LDF_I8	0x00000040000ULL
+#define OP_LDF_SW	0x00000080000ULL
+#define OP_LDF_ZF	0x00000100000ULL
+#define OP_LDF_BMASK	0x0000f000000ULL
+#define OP_LDF_SHF	0x001f0000000ULL
+#define OP_LDF_WR_AB	0x20000000000ULL
+
+#define OP_CMD_A_SRC	 0x000000000ffULL
+#define OP_CMD_CTX	 0x00000000300ULL
+#define OP_CMD_B_SRC	 0x0000003fc00ULL
+#define OP_CMD_TOKEN	 0x000000c0000ULL
+#define OP_CMD_XFER	 0x00001f00000ULL
+#define OP_CMD_CNT	 0x0000e000000ULL
+#define OP_CMD_SIG	 0x000f0000000ULL
+#define OP_CMD_TGT_CMD	 0x07f00000000ULL
+#define OP_CMD_MODE	0x1c0000000000ULL
+
+struct cmd_tgt_act {
+	u8 token;
+	u8 tgt_cmd;
+};
+
+enum cmd_tgt_map {
+	CMD_TGT_READ8,
+	CMD_TGT_WRITE8,
+	CMD_TGT_READ_LE,
+	CMD_TGT_READ_SWAP_LE,
+	__CMD_TGT_MAP_SIZE,
+};
+
+enum cmd_mode {
+	CMD_MODE_40b_AB	= 0,
+	CMD_MODE_40b_BA	= 1,
+	CMD_MODE_32b	= 4,
+};
+
+enum cmd_ctx_swap {
+	CMD_CTX_SWAP = 0,
+	CMD_CTX_NO_SWAP = 3,
+};
+
+#define OP_LCSR_BASE	0x0fc00000000ULL
+#define OP_LCSR_A_SRC	0x000000003ffULL
+#define OP_LCSR_B_SRC	0x000000ffc00ULL
+#define OP_LCSR_WRITE	0x00000200000ULL
+#define OP_LCSR_ADDR	0x001ffc00000ULL
+
+enum lcsr_wr_src {
+	LCSR_WR_AREG,
+	LCSR_WR_BREG,
+	LCSR_WR_IMM,
+};
+
+#define OP_CARB_BASE	0x0e000000000ULL
+#define OP_CARB_OR	0x00000010000ULL
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
new file mode 100644
index 000000000000..3726421e353f
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_BPF_H__
+#define __NFP_BPF_H__ 1
+
+#include <linux/bitfield.h>
+#include <linux/bpf.h>
+#include <linux/list.h>
+#include <linux/types.h>
+
+#define FIELD_FIT(mask, val)  (!((((u64)val) << __bf_shf(mask)) & ~(mask)))
+
+/* For branch fixup logic use up-most byte of branch instruction as scratch
+ * area.  Remember to clear this before sending instructions to HW!
+ */
+#define OP_BR_SPECIAL	0xff00000000000000ULL
+
+enum br_special {
+	OP_BR_NORMAL = 0,
+	OP_BR_GO_OUT,
+	OP_BR_GO_ABORT,
+};
+
+enum static_regs {
+	STATIC_REG_PKT		= 1,
+#define REG_PKT_BANK	ALU_DST_A
+	STATIC_REG_IMM		= 2, /* Bank AB */
+};
+
+enum nfp_bpf_action_type {
+	NN_ACT_TC_DROP,
+};
+
+/* Software register representation, hardware encoding in asm.h */
+#define NN_REG_TYPE	GENMASK(31, 24)
+#define NN_REG_VAL	GENMASK(7, 0)
+
+enum nfp_bpf_reg_type {
+	NN_REG_GPR_A =	BIT(0),
+	NN_REG_GPR_B =	BIT(1),
+	NN_REG_NNR =	BIT(2),
+	NN_REG_XFER =	BIT(3),
+	NN_REG_IMM =	BIT(4),
+	NN_REG_NONE =	BIT(5),
+};
+
+#define NN_REG_GPR_BOTH	(NN_REG_GPR_A | NN_REG_GPR_B)
+
+#define reg_both(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH))
+#define reg_a(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A))
+#define reg_b(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B))
+#define reg_nnr(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR))
+#define reg_xfer(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER))
+#define reg_imm(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM))
+#define reg_none()	(FIELD_PREP(NN_REG_TYPE, NN_REG_NONE))
+
+#define pkt_reg(np)	reg_a((np)->regs_per_thread - STATIC_REG_PKT)
+#define imm_a(np)	reg_a((np)->regs_per_thread - STATIC_REG_IMM)
+#define imm_b(np)	reg_b((np)->regs_per_thread - STATIC_REG_IMM)
+#define imm_both(np)	reg_both((np)->regs_per_thread - STATIC_REG_IMM)
+
+#define NFP_BPF_ABI_FLAGS	reg_nnr(0)
+#define NFP_BPF_ABI_PKT		reg_nnr(2)
+#define NFP_BPF_ABI_LEN		reg_nnr(3)
+
+struct nfp_prog;
+struct nfp_insn_meta;
+typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
+
+#define nfp_prog_first_meta(nfp_prog)					\
+	list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
+#define nfp_prog_last_meta(nfp_prog)					\
+	list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
+#define nfp_meta_next(meta)	list_next_entry(meta, l)
+#define nfp_meta_prev(meta)	list_prev_entry(meta, l)
+
+/**
+ * struct nfp_insn_meta - BPF instruction wrapper
+ * @insn: BPF instruction
+ * @off: index of first generated machine instruction (in nfp_prog.prog)
+ * @n: eBPF instruction number
+ * @skip: skip this instruction (optimized out)
+ * @double_cb: callback for second part of the instruction
+ * @l: link on nfp_prog->insns list
+ */
+struct nfp_insn_meta {
+	struct bpf_insn insn;
+	unsigned int off;
+	unsigned short n;
+	bool skip;
+	instr_cb_t double_cb;
+
+	struct list_head l;
+};
+
+#define BPF_SIZE_MASK	0x18
+
+static inline u8 mbpf_class(const struct nfp_insn_meta *meta)
+{
+	return BPF_CLASS(meta->insn.code);
+}
+
+static inline u8 mbpf_src(const struct nfp_insn_meta *meta)
+{
+	return BPF_SRC(meta->insn.code);
+}
+
+static inline u8 mbpf_op(const struct nfp_insn_meta *meta)
+{
+	return BPF_OP(meta->insn.code);
+}
+
+static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
+{
+	return BPF_MODE(meta->insn.code);
+}
+
+/**
+ * struct nfp_prog - nfp BPF program
+ * @prog: machine code
+ * @prog_len: number of valid instructions in @prog array
+ * @__prog_alloc_len: alloc size of @prog array
+ * @act: BPF program/action type (TC DA, TC with action, XDP etc.)
+ * @num_regs: number of registers used by this program
+ * @regs_per_thread: number of basic registers allocated per thread
+ * @start_off: address of the first instruction in the memory
+ * @tgt_out: jump target for normal exit
+ * @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
+ * @tgt_done: jump target to get the next packet
+ * @n_translated: number of successfully translated instructions (for errors)
+ * @error: error code if something went wrong
+ * @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
+ */
+struct nfp_prog {
+	u64 *prog;
+	unsigned int prog_len;
+	unsigned int __prog_alloc_len;
+
+	enum nfp_bpf_action_type act;
+
+	unsigned int num_regs;
+	unsigned int regs_per_thread;
+
+	unsigned int start_off;
+	unsigned int tgt_out;
+	unsigned int tgt_abort;
+	unsigned int tgt_done;
+
+	unsigned int n_translated;
+	int error;
+
+	struct list_head insns;
+};
+
+struct nfp_bpf_result {
+	unsigned int n_instr;
+	bool dense_mode;
+};
+
+#ifdef CONFIG_BPF_SYSCALL
+int
+nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
+	    unsigned int prog_start, unsigned int prog_done,
+	    unsigned int prog_sz, struct nfp_bpf_result *res);
+#else
+int
+nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
+	    unsigned int prog_start, unsigned int prog_done,
+	    unsigned int prog_sz, struct nfp_bpf_result *res)
+{
+	return -ENOTSUPP;
+}
+#endif
+
+int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog);
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
new file mode 100644
index 000000000000..cfbf53607fc9
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
@@ -0,0 +1,1724 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt)	"NFP net bpf: " fmt
+
+#include <linux/kernel.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/pkt_cls.h>
+#include <linux/unistd.h>
+
+#include "nfp_asm.h"
+#include "nfp_bpf.h"
+
+/* --- NFP prog --- */
+/* Foreach "multiple" entries macros provide pos and next<n> pointers.
+ * It's safe to modify the next pointers (but not pos).
+ */
+#define nfp_for_each_insn_walk2(nfp_prog, pos, next)			\
+	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
+	     next = list_next_entry(pos, l);			\
+	     &(nfp_prog)->insns != &pos->l &&			\
+	     &(nfp_prog)->insns != &next->l;			\
+	     pos = nfp_meta_next(pos),				\
+	     next = nfp_meta_next(pos))
+
+#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2)		\
+	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
+	     next = list_next_entry(pos, l),			\
+	     next2 = list_next_entry(next, l);			\
+	     &(nfp_prog)->insns != &pos->l &&			\
+	     &(nfp_prog)->insns != &next->l &&			\
+	     &(nfp_prog)->insns != &next2->l;			\
+	     pos = nfp_meta_next(pos),				\
+	     next = nfp_meta_next(pos),				\
+	     next2 = nfp_meta_next(next))
+
+static bool
+nfp_meta_has_next(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return meta->l.next != &nfp_prog->insns;
+}
+
+static bool
+nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return meta->l.prev != &nfp_prog->insns;
+}
+
+static void nfp_prog_free(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta, *tmp;
+
+	list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) {
+		list_del(&meta->l);
+		kfree(meta);
+	}
+	kfree(nfp_prog);
+}
+
+static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
+{
+	if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) {
+		nfp_prog->error = -ENOSPC;
+		return;
+	}
+
+	nfp_prog->prog[nfp_prog->prog_len] = insn;
+	nfp_prog->prog_len++;
+}
+
+static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
+{
+	return nfp_prog->start_off + nfp_prog->prog_len;
+}
+
+static unsigned int
+nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset)
+{
+	return offset - nfp_prog->start_off;
+}
+
+/* --- SW reg --- */
+struct nfp_insn_ur_regs {
+	enum alu_dst_ab dst_ab;
+	u16 dst;
+	u16 areg, breg;
+	bool swap;
+	bool wr_both;
+};
+
+struct nfp_insn_re_regs {
+	enum alu_dst_ab dst_ab;
+	u8 dst;
+	u8 areg, breg;
+	bool swap;
+	bool wr_both;
+	bool i8;
+};
+
+static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst)
+{
+	u16 val = FIELD_GET(NN_REG_VAL, swreg);
+
+	switch (FIELD_GET(NN_REG_TYPE, swreg)) {
+	case NN_REG_GPR_A:
+	case NN_REG_GPR_B:
+	case NN_REG_GPR_BOTH:
+		return val;
+	case NN_REG_NNR:
+		return UR_REG_NN | val;
+	case NN_REG_XFER:
+		return UR_REG_XFR | val;
+	case NN_REG_IMM:
+		if (val & ~0xff) {
+			pr_err("immediate too large\n");
+			return 0;
+		}
+		return UR_REG_IMM_encode(val);
+	case NN_REG_NONE:
+		return is_dst ? UR_REG_NO_DST : REG_NONE;
+	default:
+		pr_err("unrecognized reg encoding %08x\n", swreg);
+		return 0;
+	}
+}
+
+static int
+swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg)
+{
+	memset(reg, 0, sizeof(*reg));
+
+	/* Decode destination */
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
+		reg->dst_ab = ALU_DST_B;
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
+		reg->wr_both = true;
+	reg->dst = nfp_swreg_to_unreg(dst, true);
+
+	/* Decode source operands */
+	if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
+	    FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
+		reg->areg = nfp_swreg_to_unreg(rreg, false);
+		reg->breg = nfp_swreg_to_unreg(lreg, false);
+		reg->swap = true;
+	} else {
+		reg->areg = nfp_swreg_to_unreg(lreg, false);
+		reg->breg = nfp_swreg_to_unreg(rreg, false);
+	}
+
+	return 0;
+}
+
+static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8)
+{
+	u16 val = FIELD_GET(NN_REG_VAL, swreg);
+
+	switch (FIELD_GET(NN_REG_TYPE, swreg)) {
+	case NN_REG_GPR_A:
+	case NN_REG_GPR_B:
+	case NN_REG_GPR_BOTH:
+		return val;
+	case NN_REG_XFER:
+		return RE_REG_XFR | val;
+	case NN_REG_IMM:
+		if (val & ~(0x7f | has_imm8 << 7)) {
+			pr_err("immediate too large\n");
+			return 0;
+		}
+		*i8 = val & 0x80;
+		return RE_REG_IMM_encode(val & 0x7f);
+	case NN_REG_NONE:
+		return is_dst ? RE_REG_NO_DST : REG_NONE;
+	default:
+		pr_err("unrecognized reg encoding\n");
+		return 0;
+	}
+}
+
+static int
+swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg,
+		    bool has_imm8)
+{
+	memset(reg, 0, sizeof(*reg));
+
+	/* Decode destination */
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
+		reg->dst_ab = ALU_DST_B;
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
+		reg->wr_both = true;
+	reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL);
+
+	/* Decode source operands */
+	if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
+	    FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
+		reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+		reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+		reg->swap = true;
+	} else {
+		reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+		reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+	}
+
+	return 0;
+}
+
+/* --- Emitters --- */
+static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
+	[CMD_TGT_WRITE8] =		{ 0x00, 0x42 },
+	[CMD_TGT_READ8] =		{ 0x01, 0x43 },
+	[CMD_TGT_READ_LE] =		{ 0x01, 0x40 },
+	[CMD_TGT_READ_SWAP_LE] =	{ 0x03, 0x40 },
+};
+
+static void
+__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
+	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync)
+{
+	enum cmd_ctx_swap ctx;
+	u64 insn;
+
+	if (sync)
+		ctx = CMD_CTX_SWAP;
+	else
+		ctx = CMD_CTX_NO_SWAP;
+
+	insn =	FIELD_PREP(OP_CMD_A_SRC, areg) |
+		FIELD_PREP(OP_CMD_CTX, ctx) |
+		FIELD_PREP(OP_CMD_B_SRC, breg) |
+		FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
+		FIELD_PREP(OP_CMD_XFER, xfer) |
+		FIELD_PREP(OP_CMD_CNT, size) |
+		FIELD_PREP(OP_CMD_SIG, sync) |
+		FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
+		FIELD_PREP(OP_CMD_MODE, mode);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
+	 u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+	if (reg.swap) {
+		pr_err("cmd can't swap arguments\n");
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+
+	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync);
+}
+
+static void
+__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
+	  enum br_ctx_signal_state css, u16 addr, u8 defer)
+{
+	u16 addr_lo, addr_hi;
+	u64 insn;
+
+	addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
+	addr_hi = addr != addr_lo;
+
+	insn = OP_BR_BASE |
+		FIELD_PREP(OP_BR_MASK, mask) |
+		FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
+		FIELD_PREP(OP_BR_CSS, css) |
+		FIELD_PREP(OP_BR_DEFBR, defer) |
+		FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
+		FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
+{
+	__emit_br(nfp_prog, mask,
+		  mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
+		  BR_CSS_NONE, addr, defer);
+}
+
+static void
+__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8,
+	       u8 byte, bool equal, u16 addr, u8 defer)
+{
+	u16 addr_lo, addr_hi;
+	u64 insn;
+
+	addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO));
+	addr_hi = addr != addr_lo;
+
+	insn = OP_BBYTE_BASE |
+		FIELD_PREP(OP_BB_A_SRC, areg) |
+		FIELD_PREP(OP_BB_BYTE, byte) |
+		FIELD_PREP(OP_BB_B_SRC, breg) |
+		FIELD_PREP(OP_BB_I8, imm8) |
+		FIELD_PREP(OP_BB_EQ, equal) |
+		FIELD_PREP(OP_BB_DEFBR, defer) |
+		FIELD_PREP(OP_BB_ADDR_LO, addr_lo) |
+		FIELD_PREP(OP_BB_ADDR_HI, addr_hi);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_br_byte_neq(struct nfp_prog *nfp_prog,
+		 u32 dst, u8 imm, u8 byte, u16 addr, u8 defer)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), &reg, true);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr,
+		       defer);
+}
+
+static void
+__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
+	     enum immed_width width, bool invert,
+	     enum immed_shift shift, bool wr_both)
+{
+	u64 insn;
+
+	insn = OP_IMMED_BASE |
+		FIELD_PREP(OP_IMMED_A_SRC, areg) |
+		FIELD_PREP(OP_IMMED_B_SRC, breg) |
+		FIELD_PREP(OP_IMMED_IMM, imm_hi) |
+		FIELD_PREP(OP_IMMED_WIDTH, width) |
+		FIELD_PREP(OP_IMMED_INV, invert) |
+		FIELD_PREP(OP_IMMED_SHIFT, shift) |
+		FIELD_PREP(OP_IMMED_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm,
+	   enum immed_width width, bool invert, enum immed_shift shift)
+{
+	struct nfp_insn_ur_regs reg;
+	int err;
+
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) {
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+
+	err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width,
+		     invert, shift, reg.wr_both);
+}
+
+static void
+__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
+	   enum shf_sc sc, u8 shift,
+	   u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both)
+{
+	u64 insn;
+
+	if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+
+	if (sc == SHF_SC_L_SHF)
+		shift = 32 - shift;
+
+	insn = OP_SHF_BASE |
+		FIELD_PREP(OP_SHF_A_SRC, areg) |
+		FIELD_PREP(OP_SHF_SC, sc) |
+		FIELD_PREP(OP_SHF_B_SRC, breg) |
+		FIELD_PREP(OP_SHF_I8, i8) |
+		FIELD_PREP(OP_SHF_SW, sw) |
+		FIELD_PREP(OP_SHF_DST, dst) |
+		FIELD_PREP(OP_SHF_SHIFT, shift) |
+		FIELD_PREP(OP_SHF_OP, op) |
+		FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
+		FIELD_PREP(OP_SHF_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg,
+	 enum shf_sc sc, u8 shift)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
+		   reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both);
+}
+
+static void
+__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
+	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both)
+{
+	u64 insn;
+
+	insn = OP_ALU_BASE |
+		FIELD_PREP(OP_ALU_A_SRC, areg) |
+		FIELD_PREP(OP_ALU_B_SRC, breg) |
+		FIELD_PREP(OP_ALU_DST, dst) |
+		FIELD_PREP(OP_ALU_SW, swap) |
+		FIELD_PREP(OP_ALU_OP, op) |
+		FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
+		FIELD_PREP(OP_ALU_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg)
+{
+	struct nfp_insn_ur_regs reg;
+	int err;
+
+	err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_alu(nfp_prog, reg.dst, reg.dst_ab,
+		   reg.areg, op, reg.breg, reg.swap, reg.wr_both);
+}
+
+static void
+__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
+		u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
+		bool zero, bool swap, bool wr_both)
+{
+	u64 insn;
+
+	insn = OP_LDF_BASE |
+		FIELD_PREP(OP_LDF_A_SRC, areg) |
+		FIELD_PREP(OP_LDF_SC, sc) |
+		FIELD_PREP(OP_LDF_B_SRC, breg) |
+		FIELD_PREP(OP_LDF_I8, imm8) |
+		FIELD_PREP(OP_LDF_SW, swap) |
+		FIELD_PREP(OP_LDF_ZF, zero) |
+		FIELD_PREP(OP_LDF_BMASK, bmask) |
+		FIELD_PREP(OP_LDF_SHF, shift) |
+		FIELD_PREP(OP_LDF_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift,
+		  u32 dst, u8 bmask, u32 src, bool zero)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(reg_none(), dst, src, &reg, true);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
+			reg.i8, zero, reg.swap, reg.wr_both);
+}
+
+static void
+emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src,
+	      enum shf_sc sc, u8 shift)
+{
+	emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false);
+}
+
+/* --- Wrappers --- */
+static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
+{
+	if (!(imm & 0xffff0000)) {
+		*val = imm;
+		*shift = IMMED_SHIFT_0B;
+	} else if (!(imm & 0xff0000ff)) {
+		*val = imm >> 8;
+		*shift = IMMED_SHIFT_1B;
+	} else if (!(imm & 0x0000ffff)) {
+		*val = imm >> 16;
+		*shift = IMMED_SHIFT_2B;
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
+static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm)
+{
+	enum immed_shift shift;
+	u16 val;
+
+	if (pack_immed(imm, &val, &shift)) {
+		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
+	} else if (pack_immed(~imm, &val, &shift)) {
+		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
+	} else {
+		emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
+			   false, IMMED_SHIFT_0B);
+		emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
+			   false, IMMED_SHIFT_2B);
+	}
+}
+
+/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
+ * If the @imm is small enough encode it directly in operand and return
+ * otherwise load @imm to a spare register and return its encoding.
+ */
+static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+{
+	if (FIELD_FIT(UR_REG_IMM_MAX, imm))
+		return reg_imm(imm);
+
+	wrp_immed(nfp_prog, tmp_reg, imm);
+	return tmp_reg;
+}
+
+/* re_load_imm_any() - encode immediate or use tmp register (restricted)
+ * If the @imm is small enough encode it directly in operand and return
+ * otherwise load @imm to a spare register and return its encoding.
+ */
+static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+{
+	if (FIELD_FIT(RE_REG_IMM_MAX, imm))
+		return reg_imm(imm);
+
+	wrp_immed(nfp_prog, tmp_reg, imm);
+	return tmp_reg;
+}
+
+static void
+wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask,
+	       enum br_special special)
+{
+	emit_br(nfp_prog, mask, 0, 0);
+
+	nfp_prog->prog[nfp_prog->prog_len - 1] |=
+		FIELD_PREP(OP_BR_SPECIAL, special);
+}
+
+static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
+{
+	emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src));
+}
+
+static int
+construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset,
+		      u16 src, bool src_valid, u8 size)
+{
+	unsigned int i;
+	u16 shift, sz;
+	u32 tmp_reg;
+
+	/* We load the value from the address indicated in @offset and then
+	 * shift out the data we don't need.  Note: this is big endian!
+	 */
+	sz = size < 4 ? 4 : size;
+	shift = size < 4 ? 4 - size : 0;
+
+	if (src_valid) {
+		/* Calculate the true offset (src_reg + imm) */
+		tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+		emit_alu(nfp_prog, imm_both(nfp_prog),
+			 reg_a(src), ALU_OP_ADD, tmp_reg);
+		/* Check packet length (size guaranteed to fit b/c it's u8) */
+		emit_alu(nfp_prog, imm_a(nfp_prog),
+			 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
+		emit_alu(nfp_prog, reg_none(),
+			 NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog));
+		wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+		/* Load data */
+		emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
+			 pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true);
+	} else {
+		/* Check packet length */
+		tmp_reg = ur_load_imm_any(nfp_prog, offset + size,
+					  imm_a(nfp_prog));
+		emit_alu(nfp_prog, reg_none(),
+			 NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg);
+		wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+		/* Load data */
+		tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+		emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
+			 pkt_reg(nfp_prog), tmp_reg, sz - 1, true);
+	}
+
+	i = 0;
+	if (shift)
+		emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE,
+			 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
+	else
+		for (; i * 4 < size; i++)
+			emit_alu(nfp_prog, reg_both(i),
+				 reg_none(), ALU_OP_NONE, reg_xfer(i));
+
+	if (i < 2)
+		wrp_immed(nfp_prog, reg_both(1), 0);
+
+	return 0;
+}
+
+static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
+{
+	return construct_data_ind_ld(nfp_prog, offset, 0, false, size);
+}
+
+static void
+wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
+{
+	u32 tmp_reg;
+
+	if (alu_op == ALU_OP_AND) {
+		if (!imm)
+			wrp_immed(nfp_prog, reg_both(dst), 0);
+		if (!imm || !~imm)
+			return;
+	}
+	if (alu_op == ALU_OP_OR) {
+		if (!~imm)
+			wrp_immed(nfp_prog, reg_both(dst), ~0U);
+		if (!imm || !~imm)
+			return;
+	}
+	if (alu_op == ALU_OP_XOR) {
+		if (!~imm)
+			emit_alu(nfp_prog, reg_both(dst), reg_none(),
+				 ALU_OP_NEG, reg_b(dst));
+		if (!imm || !~imm)
+			return;
+	}
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
+	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
+}
+
+static int
+wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op, bool skip)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+
+	if (skip) {
+		meta->skip = true;
+		return 0;
+	}
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
+
+	return 0;
+}
+
+static int
+wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op)
+{
+	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
+
+	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
+	emit_alu(nfp_prog, reg_both(dst + 1),
+		 reg_a(dst + 1), alu_op, reg_b(src + 1));
+
+	return 0;
+}
+
+static int
+wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op, bool skip)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (skip) {
+		meta->skip = true;
+		return 0;
+	}
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int
+wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op)
+{
+	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
+
+	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static void
+wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
+		 enum br_mask br_mask, u16 off)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
+	emit_br(nfp_prog, br_mask, off, 0);
+}
+
+static int
+wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	     enum alu_op alu_op, enum br_mask br_mask)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
+			 insn->src_reg * 2, br_mask, insn->off);
+	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
+			 insn->src_reg * 2 + 1, br_mask, insn->off);
+
+	return 0;
+}
+
+static int
+wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	    enum br_mask br_mask, bool swap)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u8 reg = insn->dst_reg * 2;
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+	if (!swap)
+		emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
+	else
+		emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+	if (!swap)
+		emit_alu(nfp_prog, reg_none(),
+			 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
+	else
+		emit_alu(nfp_prog, reg_none(),
+			 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
+
+	emit_br(nfp_prog, br_mask, insn->off, 0);
+
+	return 0;
+}
+
+static int
+wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	    enum br_mask br_mask, bool swap)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (swap) {
+		areg ^= breg;
+		breg ^= areg;
+		areg ^= breg;
+	}
+
+	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
+	emit_alu(nfp_prog, reg_none(),
+		 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
+	emit_br(nfp_prog, br_mask, insn->off, 0);
+
+	return 0;
+}
+
+/* --- Callbacks --- */
+static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2);
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1);
+
+	return 0;
+}
+
+static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	u64 imm = meta->insn.imm; /* sign extend */
+
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
+
+	return 0;
+}
+
+static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
+}
+
+static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
+}
+
+static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
+}
+
+static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
+}
+
+static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
+}
+
+static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
+}
+
+static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
+		 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
+		 reg_b(insn->src_reg * 2));
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
+		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
+		 reg_b(insn->src_reg * 2 + 1));
+
+	return 0;
+}
+
+static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
+
+	return 0;
+}
+
+static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
+		 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
+		 reg_b(insn->src_reg * 2));
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
+		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
+		 reg_b(insn->src_reg * 2 + 1));
+
+	return 0;
+}
+
+static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
+
+	return 0;
+}
+
+static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->imm != 32)
+		return 1; /* TODO */
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0);
+
+	return 0;
+}
+
+static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->imm != 32)
+		return 1; /* TODO */
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2,  insn->src_reg * 2);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
+}
+
+static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
+}
+
+static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
+}
+
+static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
+}
+
+static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
+}
+
+static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
+}
+
+static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
+}
+
+static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
+}
+
+static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
+}
+
+static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
+}
+
+static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (!insn->imm)
+		return 1; /* TODO: zero shift means indirect */
+
+	emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
+		 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
+		 SHF_SC_L_SHF, insn->imm);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1),
+		  meta->insn.imm);
+
+	return 0;
+}
+
+static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	meta->double_cb = imm_ld8_part2;
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
+
+	return 0;
+}
+
+static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ld(nfp_prog, meta->insn.imm, 1);
+}
+
+static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ld(nfp_prog, meta->insn.imm, 2);
+}
+
+static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ld(nfp_prog, meta->insn.imm, 4);
+}
+
+static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+				     meta->insn.src_reg * 2, true, 1);
+}
+
+static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+				     meta->insn.src_reg * 2, true, 2);
+}
+
+static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+				     meta->insn.src_reg * 2, true, 4);
+}
+
+static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (meta->insn.off == offsetof(struct sk_buff, len))
+		emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+			 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN);
+	else
+		return -ENOTSUPP;
+
+	return 0;
+}
+
+static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (meta->insn.off < 0) /* TODO */
+		return -ENOTSUPP;
+	emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
+
+	return 0;
+}
+
+static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1);
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (imm & ~0U) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+		emit_alu(nfp_prog, imm_a(nfp_prog),
+			 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
+		or1 = imm_a(nfp_prog);
+	}
+
+	if (imm >> 32) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+		emit_alu(nfp_prog, imm_b(nfp_prog),
+			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
+		or2 = imm_b(nfp_prog);
+	}
+
+	emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
+	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
+
+	return 0;
+}
+
+static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
+}
+
+static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
+}
+
+static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (!imm) {
+		meta->skip = true;
+		return 0;
+	}
+
+	if (imm & ~0U) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+		emit_alu(nfp_prog, reg_none(),
+			 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
+		emit_br(nfp_prog, BR_BNE, insn->off, 0);
+	}
+
+	if (imm >> 32) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+		emit_alu(nfp_prog, reg_none(),
+			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
+		emit_br(nfp_prog, BR_BNE, insn->off, 0);
+	}
+
+	return 0;
+}
+
+static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (!imm) {
+		emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
+			 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
+		emit_br(nfp_prog, BR_BNE, insn->off, 0);
+	}
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+	emit_alu(nfp_prog, reg_none(),
+		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
+	emit_br(nfp_prog, BR_BNE, insn->off, 0);
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+	emit_alu(nfp_prog, reg_none(),
+		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
+	emit_br(nfp_prog, BR_BNE, insn->off, 0);
+
+	return 0;
+}
+
+static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
+		 ALU_OP_XOR, reg_b(insn->src_reg * 2));
+	emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
+		 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
+	emit_alu(nfp_prog, reg_none(),
+		 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
+	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
+
+	return 0;
+}
+
+static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
+}
+
+static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
+}
+
+static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
+}
+
+static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
+}
+
+static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT);
+
+	return 0;
+}
+
+static const instr_cb_t instr_cb[256] = {
+	[BPF_ALU64 | BPF_MOV | BPF_X] =	mov_reg64,
+	[BPF_ALU64 | BPF_MOV | BPF_K] =	mov_imm64,
+	[BPF_ALU64 | BPF_XOR | BPF_X] =	xor_reg64,
+	[BPF_ALU64 | BPF_XOR | BPF_K] =	xor_imm64,
+	[BPF_ALU64 | BPF_AND | BPF_X] =	and_reg64,
+	[BPF_ALU64 | BPF_AND | BPF_K] =	and_imm64,
+	[BPF_ALU64 | BPF_OR | BPF_X] =	or_reg64,
+	[BPF_ALU64 | BPF_OR | BPF_K] =	or_imm64,
+	[BPF_ALU64 | BPF_ADD | BPF_X] =	add_reg64,
+	[BPF_ALU64 | BPF_ADD | BPF_K] =	add_imm64,
+	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
+	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
+	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
+	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
+	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
+	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
+	[BPF_ALU | BPF_XOR | BPF_X] =	xor_reg,
+	[BPF_ALU | BPF_XOR | BPF_K] =	xor_imm,
+	[BPF_ALU | BPF_AND | BPF_X] =	and_reg,
+	[BPF_ALU | BPF_AND | BPF_K] =	and_imm,
+	[BPF_ALU | BPF_OR | BPF_X] =	or_reg,
+	[BPF_ALU | BPF_OR | BPF_K] =	or_imm,
+	[BPF_ALU | BPF_ADD | BPF_X] =	add_reg,
+	[BPF_ALU | BPF_ADD | BPF_K] =	add_imm,
+	[BPF_ALU | BPF_SUB | BPF_X] =	sub_reg,
+	[BPF_ALU | BPF_SUB | BPF_K] =	sub_imm,
+	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
+	[BPF_LD | BPF_IMM | BPF_DW] =	imm_ld8,
+	[BPF_LD | BPF_ABS | BPF_B] =	data_ld1,
+	[BPF_LD | BPF_ABS | BPF_H] =	data_ld2,
+	[BPF_LD | BPF_ABS | BPF_W] =	data_ld4,
+	[BPF_LD | BPF_IND | BPF_B] =	data_ind_ld1,
+	[BPF_LD | BPF_IND | BPF_H] =	data_ind_ld2,
+	[BPF_LD | BPF_IND | BPF_W] =	data_ind_ld4,
+	[BPF_LDX | BPF_MEM | BPF_W] =	mem_ldx4,
+	[BPF_JMP | BPF_JA | BPF_K] =	jump,
+	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
+	[BPF_JMP | BPF_JGT | BPF_K] =	jgt_imm,
+	[BPF_JMP | BPF_JGE | BPF_K] =	jge_imm,
+	[BPF_JMP | BPF_JSET | BPF_K] =	jset_imm,
+	[BPF_JMP | BPF_JNE | BPF_K] =	jne_imm,
+	[BPF_JMP | BPF_JEQ | BPF_X] =	jeq_reg,
+	[BPF_JMP | BPF_JGT | BPF_X] =	jgt_reg,
+	[BPF_JMP | BPF_JGE | BPF_X] =	jge_reg,
+	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
+	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
+	[BPF_JMP | BPF_EXIT] =		goto_out,
+};
+
+/* --- Misc code --- */
+static void br_set_offset(u64 *instr, u16 offset)
+{
+	u16 addr_lo, addr_hi;
+
+	addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
+	addr_hi = offset != addr_lo;
+	*instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO);
+	*instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
+	*instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo);
+}
+
+/* --- Assembler logic --- */
+static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta, *next;
+	u32 off, br_idx;
+	u32 idx;
+
+	nfp_for_each_insn_walk2(nfp_prog, meta, next) {
+		if (meta->skip)
+			continue;
+		if (BPF_CLASS(meta->insn.code) != BPF_JMP)
+			continue;
+
+		br_idx = nfp_prog_offset_to_index(nfp_prog, next->off) - 1;
+		if (!nfp_is_br(nfp_prog->prog[br_idx])) {
+			pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
+			       br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
+			return -ELOOP;
+		}
+		/* Leave special branches for later */
+		if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]))
+			continue;
+
+		/* Find the target offset in assembler realm */
+		off = meta->insn.off;
+		if (!off) {
+			pr_err("Fixup found zero offset!!\n");
+			return -ELOOP;
+		}
+
+		while (off && nfp_meta_has_next(nfp_prog, next)) {
+			next = nfp_meta_next(next);
+			off--;
+		}
+		if (off) {
+			pr_err("Fixup found too large jump!! %d\n", off);
+			return -ELOOP;
+		}
+
+		if (next->skip) {
+			pr_err("Branch landing on removed instruction!!\n");
+			return -ELOOP;
+		}
+
+		for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off);
+		     idx <= br_idx; idx++) {
+			if (!nfp_is_br(nfp_prog->prog[idx]))
+				continue;
+			br_set_offset(&nfp_prog->prog[idx], next->off);
+		}
+	}
+
+	/* Fixup 'goto out's separately, they can be scattered around */
+	for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) {
+		enum br_special special;
+
+		if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE)
+			continue;
+
+		special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]);
+		switch (special) {
+		case OP_BR_NORMAL:
+			break;
+		case OP_BR_GO_OUT:
+			br_set_offset(&nfp_prog->prog[br_idx],
+				      nfp_prog->tgt_out);
+			break;
+		case OP_BR_GO_ABORT:
+			br_set_offset(&nfp_prog->prog[br_idx],
+				      nfp_prog->tgt_abort);
+			break;
+		}
+
+		nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL;
+	}
+
+	return 0;
+}
+
+static void nfp_intro(struct nfp_prog *nfp_prog)
+{
+	emit_alu(nfp_prog, pkt_reg(nfp_prog),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT);
+}
+
+static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog)
+{
+	const u8 act2code[] = {
+		[NN_ACT_TC_DROP]  = 0x22,
+	};
+	/* Target for aborts */
+	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
+	wrp_immed(nfp_prog, reg_both(0), 0);
+
+	/* Target for normal exits */
+	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
+	/* Legacy TC mode:
+	 *   0        0x11 -> pass,  count as stat0
+	 *  -1  drop  0x22 -> drop,  count as stat1
+	 *     redir  0x24 -> redir, count as stat1
+	 *  ife mark  0x21 -> pass,  count as stat1
+	 *  ife + tx  0x24 -> redir, count as stat1
+	 */
+	emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2);
+	emit_alu(nfp_prog, reg_a(0),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
+
+	emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]),
+		      SHF_SC_L_SHF, 16);
+}
+
+static void nfp_outro(struct nfp_prog *nfp_prog)
+{
+	switch (nfp_prog->act) {
+	case NN_ACT_TC_DROP:
+		nfp_outro_tc_legacy(nfp_prog);
+		break;
+	}
+}
+
+static int nfp_translate(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta;
+	int err;
+
+	nfp_intro(nfp_prog);
+	if (nfp_prog->error)
+		return nfp_prog->error;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		instr_cb_t cb = instr_cb[meta->insn.code];
+
+		meta->off = nfp_prog_current_offset(nfp_prog);
+
+		if (meta->skip) {
+			nfp_prog->n_translated++;
+			continue;
+		}
+
+		if (nfp_meta_has_prev(nfp_prog, meta) &&
+		    nfp_meta_prev(meta)->double_cb)
+			cb = nfp_meta_prev(meta)->double_cb;
+		if (!cb)
+			return -ENOENT;
+		err = cb(nfp_prog, meta);
+		if (err)
+			return err;
+
+		nfp_prog->n_translated++;
+	}
+
+	nfp_outro(nfp_prog);
+	if (nfp_prog->error)
+		return nfp_prog->error;
+
+	return nfp_fixup_branches(nfp_prog);
+}
+
+static int
+nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
+		 unsigned int cnt)
+{
+	unsigned int i;
+
+	for (i = 0; i < cnt; i++) {
+		struct nfp_insn_meta *meta;
+
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			return -ENOMEM;
+
+		meta->insn = prog[i];
+		meta->n = i;
+
+		list_add_tail(&meta->l, &nfp_prog->insns);
+	}
+
+	return 0;
+}
+
+/* --- Optimizations --- */
+static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		struct bpf_insn insn = meta->insn;
+
+		/* Programs converted from cBPF start with register xoring */
+		if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
+		    insn.src_reg == insn.dst_reg)
+			continue;
+
+		/* Programs start with R6 = R1 but we ignore the skb pointer */
+		if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
+		    insn.src_reg == 1 && insn.dst_reg == 6)
+			meta->skip = true;
+
+		/* Return as soon as something doesn't match */
+		if (!meta->skip)
+			return;
+	}
+}
+
+/* Try to rename registers so that program uses only low ones */
+static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog)
+{
+	bool reg_used[MAX_BPF_REG] = {};
+	u8 tgt_reg[MAX_BPF_REG] = {};
+	struct nfp_insn_meta *meta;
+	unsigned int i, j;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		if (meta->skip)
+			continue;
+
+		reg_used[meta->insn.src_reg] = true;
+		reg_used[meta->insn.dst_reg] = true;
+	}
+
+	for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) {
+		if (!reg_used[i])
+			continue;
+
+		tgt_reg[i] = j++;
+	}
+	nfp_prog->num_regs = j;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		meta->insn.src_reg = tgt_reg[meta->insn.src_reg];
+		meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg];
+	}
+
+	return 0;
+}
+
+/* Remove masking after load since our load guarantees this is not needed */
+static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta1, *meta2;
+	const s32 exp_mask[] = {
+		[BPF_B] = 0x000000ffU,
+		[BPF_H] = 0x0000ffffU,
+		[BPF_W] = 0xffffffffU,
+	};
+
+	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
+		struct bpf_insn insn, next;
+
+		insn = meta1->insn;
+		next = meta2->insn;
+
+		if (BPF_CLASS(insn.code) != BPF_LD)
+			continue;
+		if (BPF_MODE(insn.code) != BPF_ABS &&
+		    BPF_MODE(insn.code) != BPF_IND)
+			continue;
+
+		if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
+			continue;
+
+		if (!exp_mask[BPF_SIZE(insn.code)])
+			continue;
+		if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
+			continue;
+
+		if (next.src_reg || next.dst_reg)
+			continue;
+
+		meta2->skip = true;
+	}
+}
+
+static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta1, *meta2, *meta3;
+
+	nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
+		struct bpf_insn insn, next1, next2;
+
+		insn = meta1->insn;
+		next1 = meta2->insn;
+		next2 = meta3->insn;
+
+		if (BPF_CLASS(insn.code) != BPF_LD)
+			continue;
+		if (BPF_MODE(insn.code) != BPF_ABS &&
+		    BPF_MODE(insn.code) != BPF_IND)
+			continue;
+		if (BPF_SIZE(insn.code) != BPF_W)
+			continue;
+
+		if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
+		      next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
+		    !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
+		      next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
+			continue;
+
+		if (next1.src_reg || next1.dst_reg ||
+		    next2.src_reg || next2.dst_reg)
+			continue;
+
+		if (next1.imm != 0x20 || next2.imm != 0x20)
+			continue;
+
+		meta2->skip = true;
+		meta3->skip = true;
+	}
+}
+
+static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
+{
+	int ret;
+
+	nfp_bpf_opt_reg_init(nfp_prog);
+
+	ret = nfp_bpf_opt_reg_rename(nfp_prog);
+	if (ret)
+		return ret;
+
+	nfp_bpf_opt_ld_mask(nfp_prog);
+	nfp_bpf_opt_ld_shift(nfp_prog);
+
+	return 0;
+}
+
+/**
+ * nfp_bpf_jit() - translate BPF code into NFP assembly
+ * @filter:	kernel BPF filter struct
+ * @prog_mem:	memory to store assembler instructions
+ * @act:	action attached to this eBPF program
+ * @prog_start:	offset of the first instruction when loaded
+ * @prog_done:	where to jump on exit
+ * @prog_sz:	size of @prog_mem in instructions
+ * @res:	achieved parameters of translation results
+ */
+int
+nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem,
+	    enum nfp_bpf_action_type act,
+	    unsigned int prog_start, unsigned int prog_done,
+	    unsigned int prog_sz, struct nfp_bpf_result *res)
+{
+	struct nfp_prog *nfp_prog;
+	int ret;
+
+	nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL);
+	if (!nfp_prog)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&nfp_prog->insns);
+	nfp_prog->act = act;
+	nfp_prog->start_off = prog_start;
+	nfp_prog->tgt_done = prog_done;
+
+	ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len);
+	if (ret)
+		goto out;
+
+	ret = nfp_prog_verify(nfp_prog, filter);
+	if (ret)
+		goto out;
+
+	ret = nfp_bpf_optimize(nfp_prog);
+	if (ret)
+		goto out;
+
+	if (nfp_prog->num_regs <= 7)
+		nfp_prog->regs_per_thread = 16;
+	else
+		nfp_prog->regs_per_thread = 32;
+
+	nfp_prog->prog = prog_mem;
+	nfp_prog->__prog_alloc_len = prog_sz;
+
+	ret = nfp_translate(nfp_prog);
+	if (ret) {
+		pr_err("Translation failed with error %d (translated: %u)\n",
+		       ret, nfp_prog->n_translated);
+		ret = -EINVAL;
+	}
+
+	res->n_instr = nfp_prog->prog_len;
+	res->dense_mode = nfp_prog->num_regs <= 7;
+out:
+	nfp_prog_free(nfp_prog);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
new file mode 100644
index 000000000000..ef6775b54168
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt)	"NFP net bpf: " fmt
+
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/kernel.h>
+#include <linux/pkt_cls.h>
+
+#include "nfp_bpf.h"
+
+/* Analyzer/verifier definitions */
+struct nfp_bpf_analyzer_priv {
+	struct nfp_prog *prog;
+	struct nfp_insn_meta *meta;
+};
+
+static struct nfp_insn_meta *
+nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		  unsigned int insn_idx, unsigned int n_insns)
+{
+	unsigned int forward, backward, i;
+
+	backward = meta->n - insn_idx;
+	forward = insn_idx - meta->n;
+
+	if (min(forward, backward) > n_insns - insn_idx - 1) {
+		backward = n_insns - insn_idx - 1;
+		meta = nfp_prog_last_meta(nfp_prog);
+	}
+	if (min(forward, backward) > insn_idx && backward > insn_idx) {
+		forward = insn_idx;
+		meta = nfp_prog_first_meta(nfp_prog);
+	}
+
+	if (forward < backward)
+		for (i = 0; i < forward; i++)
+			meta = nfp_meta_next(meta);
+	else
+		for (i = 0; i < backward; i++)
+			meta = nfp_meta_prev(meta);
+
+	return meta;
+}
+
+static int
+nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
+		   const struct bpf_verifier_env *env)
+{
+	const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
+
+	if (reg0->type != CONST_IMM) {
+		pr_info("unsupported exit state: %d, imm: %llx\n",
+			reg0->type, reg0->imm);
+		return -EINVAL;
+	}
+
+	if (reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
+		pr_info("unsupported exit state: %d, imm: %llx\n",
+			reg0->type, reg0->imm);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog,
+		      const struct bpf_verifier_env *env, u8 reg)
+{
+	if (env->cur_state.regs[reg].type != PTR_TO_CTX)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
+{
+	struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv;
+	struct nfp_insn_meta *meta = priv->meta;
+
+	meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len);
+	priv->meta = meta;
+
+	if (meta->insn.src_reg == BPF_REG_10 ||
+	    meta->insn.dst_reg == BPF_REG_10) {
+		pr_err("stack not yet supported\n");
+		return -EINVAL;
+	}
+	if (meta->insn.src_reg >= MAX_BPF_REG ||
+	    meta->insn.dst_reg >= MAX_BPF_REG) {
+		pr_err("program uses extended registers - jit hardening?\n");
+		return -EINVAL;
+	}
+
+	if (meta->insn.code == (BPF_JMP | BPF_EXIT))
+		return nfp_bpf_check_exit(priv->prog, env);
+
+	if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM))
+		return nfp_bpf_check_ctx_ptr(priv->prog, env,
+					     meta->insn.src_reg);
+	if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM))
+		return nfp_bpf_check_ctx_ptr(priv->prog, env,
+					     meta->insn.dst_reg);
+
+	return 0;
+}
+
+static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = {
+	.insn_hook = nfp_verify_insn,
+};
+
+int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog)
+{
+	struct nfp_bpf_analyzer_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->prog = nfp_prog;
+	priv->meta = nfp_prog_first_meta(nfp_prog);
+
+	ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv);
+
+	kfree(priv);
+
+	return ret;
+}
-- 
1.9.1

^ permalink raw reply related

* [PATCHv7 net-next 00/15] BPF hardware offload (cls_bpf for now)
From: Jakub Kicinski @ 2016-09-21 10:43 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski

Hi!

Rebased and improved.

v7:
 - fix patch 6.
v6 (patch 8 only):
 - explicitly check for registers >= MAX_BPF_REG;
 - fix leaky error path.
v5:
 - fix names of guard defines in bpf_verfier.h.
v4:
 - rename parser -> analyzer;
 - reorganize the analyzer patches a bit;
 - use bitfield.h directly.

--- merge blurb:
In the last year a lot of progress have been made on offloading
simpler TC classifiers.  There is also growing interest in using
BPF for generic high-speed packet processing in the kernel.
It seems beneficial to tie those two trends together and think
about hardware offloads of BPF programs.  This patch set presents
such offload to Netronome smart NICs.  cls_bpf is extended with
hardware offload capabilities and NFP driver gets a JIT translator
which in presence of capable firmware can be used to offload
the BPF program onto the card.

BPF JIT implementation is not 100% complete (e.g. missing instructions)
but it is functional.  Encouragingly it should be possible to
offload most (if not all) advanced BPF features onto the NIC - 
including packet modification, maps, tunnel encap/decap etc.

Example of basic tests I used:
  __section_cls_entry
  int cls_entry(struct __sk_buff *skb)
  {
	if (load_byte(skb, 0) != 0x0)
		return 0;

	if (load_byte(skb, 4) != 0x1)
		return 0;

	skb->mark = 0xcafe;

	if (load_byte(skb, 50) != 0xff)
		return 0;

	return ~0U;
  }

Above code can be compiled with Clang and loaded like this:
# ethtool -K p1p1 hw-tc-offload on
# tc qdisc add dev p1p1 ingress
# tc filter add dev p1p1 parent ffff:  bpf obj prog.o action drop

This set implements the basic transparent offload, the skip_{sw,hw}
flags and reporting statistics for cls_bpf.

Jakub Kicinski (15):
  net: cls_bpf: add hardware offload
  net: cls_bpf: limit hardware offload by software-only flag
  net: cls_bpf: add support for marking filters as hardware-only
  bpf: don't (ab)use instructions to store state
  bpf: expose internal verfier structures
  bpf: enable non-core use of the verfier
  bpf: recognize 64bit immediate loads as consts
  nfp: add BPF to NFP code translator
  nfp: bpf: add hardware bpf offload
  net: cls_bpf: allow offloaded filters to update stats
  nfp: bpf: allow offloaded filters to update stats
  nfp: bpf: add packet marking support
  net: act_mirred: allow statistic updates from offloaded actions
  nfp: bpf: add support for legacy redirect action
  nfp: bpf: add offload of TC direct action mode

 drivers/net/ethernet/netronome/nfp/Makefile        |    7 +
 drivers/net/ethernet/netronome/nfp/nfp_asm.h       |  233 +++
 drivers/net/ethernet/netronome/nfp/nfp_bpf.h       |  212 +++
 drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c   | 1811 ++++++++++++++++++++
 .../net/ethernet/netronome/nfp/nfp_bpf_verifier.c  |  171 ++
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |   47 +-
 .../net/ethernet/netronome/nfp/nfp_net_common.c    |  134 +-
 drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h  |   51 +-
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   |   12 +
 .../net/ethernet/netronome/nfp/nfp_net_offload.c   |  291 ++++
 .../net/ethernet/netronome/nfp/nfp_netvf_main.c    |    2 +-
 include/linux/bpf_verifier.h                       |   90 +
 include/linux/netdevice.h                          |    2 +
 include/net/pkt_cls.h                              |   16 +
 include/uapi/linux/pkt_cls.h                       |    1 +
 kernel/bpf/verifier.c                              |  406 +++--
 net/sched/act_mirred.c                             |    8 +
 net/sched/cls_bpf.c                                |  117 +-
 18 files changed, 3392 insertions(+), 219 deletions(-)
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_asm.h
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf.h
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
 create mode 100644 include/linux/bpf_verifier.h

-- 
1.9.1

^ permalink raw reply

* [PATCHv7 net-next 06/15] bpf: enable non-core use of the verfier
From: Jakub Kicinski @ 2016-09-21 10:43 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

Advanced JIT compilers and translators may want to use
eBPF verifier as a base for parsers or to perform custom
checks and validations.

Add ability for external users to invoke the verifier
and provide callbacks to be invoked for every intruction
checked.  For now only add most basic callback for
per-instruction pre-interpretation checks is added.  More
advanced users may also like to have per-instruction post
callback and state comparison callback.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
v4:
 - separate from the header split patch.
---
 include/linux/bpf_verifier.h | 11 +++++++
 kernel/bpf/verifier.c        | 68 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9457a22fc6e0..c5cb661712c9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -59,6 +59,12 @@ struct bpf_insn_aux_data {
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
 
+struct bpf_verifier_env;
+struct bpf_ext_analyzer_ops {
+	int (*insn_hook)(struct bpf_verifier_env *env,
+			 int insn_idx, int prev_insn_idx);
+};
+
 /* single container for all structs
  * one verifier_env per bpf_check() call
  */
@@ -68,6 +74,8 @@ struct bpf_verifier_env {
 	int stack_size;			/* number of states to be processed */
 	struct bpf_verifier_state cur_state; /* current verifier state */
 	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
+	const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
+	void *analyzer_priv; /* pointer to external analyzer's private data */
 	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
 	u32 used_map_cnt;		/* number of used maps */
 	u32 id_gen;			/* used to generate unique reg IDs */
@@ -76,4 +84,7 @@ struct bpf_verifier_env {
 	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
 };
 
+int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
+		 void *priv);
+
 #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index dca2b9b1d02e..ee86a77dc40b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -632,6 +632,10 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
+	/* for analyzer ctx accesses are already validated and converted */
+	if (env->analyzer_ops)
+		return 0;
+
 	if (env->prog->aux->ops->is_valid_access &&
 	    env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
 		/* remember the offset of last byte accessed in ctx */
@@ -2225,6 +2229,15 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	return 0;
 }
 
+static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
+				  int insn_idx, int prev_insn_idx)
+{
+	if (!env->analyzer_ops || !env->analyzer_ops->insn_hook)
+		return 0;
+
+	return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx);
+}
+
 static int do_check(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *state = &env->cur_state;
@@ -2283,6 +2296,10 @@ static int do_check(struct bpf_verifier_env *env)
 			print_bpf_insn(insn);
 		}
 
+		err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
+		if (err)
+			return err;
+
 		if (class == BPF_ALU || class == BPF_ALU64) {
 			err = check_alu_op(env, insn);
 			if (err)
@@ -2845,3 +2862,54 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 	kfree(env);
 	return ret;
 }
+
+int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
+		 void *priv)
+{
+	struct bpf_verifier_env *env;
+	int ret;
+
+	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
+	if (!env)
+		return -ENOMEM;
+
+	env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+				     prog->len);
+	ret = -ENOMEM;
+	if (!env->insn_aux_data)
+		goto err_free_env;
+	env->prog = prog;
+	env->analyzer_ops = ops;
+	env->analyzer_priv = priv;
+
+	/* grab the mutex to protect few globals used by verifier */
+	mutex_lock(&bpf_verifier_lock);
+
+	log_level = 0;
+
+	env->explored_states = kcalloc(env->prog->len,
+				       sizeof(struct bpf_verifier_state_list *),
+				       GFP_KERNEL);
+	ret = -ENOMEM;
+	if (!env->explored_states)
+		goto skip_full_check;
+
+	ret = check_cfg(env);
+	if (ret < 0)
+		goto skip_full_check;
+
+	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+
+	ret = do_check(env);
+
+skip_full_check:
+	while (pop_stack(env, NULL) >= 0);
+	free_states(env);
+
+	mutex_unlock(&bpf_verifier_lock);
+	vfree(env->insn_aux_data);
+err_free_env:
+	kfree(env);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bpf_analyzer);
-- 
1.9.1

^ permalink raw reply related

* [PATCHv7 net-next 11/15] nfp: bpf: allow offloaded filters to update stats
From: Jakub Kicinski @ 2016-09-21 10:44 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

Periodically poll stats and call into offloaded actions
to update them.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
v3:
 - add missing hunk with ethtool stats.
---
 drivers/net/ethernet/netronome/nfp/nfp_net.h       | 19 +++++++
 .../net/ethernet/netronome/nfp/nfp_net_common.c    |  3 ++
 .../net/ethernet/netronome/nfp/nfp_net_ethtool.c   | 12 +++++
 .../net/ethernet/netronome/nfp/nfp_net_offload.c   | 63 ++++++++++++++++++++++
 4 files changed, 97 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index ea6f5e667f27..13c6a9001b4d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -62,6 +62,9 @@
 /* Max time to wait for NFP to respond on updates (in seconds) */
 #define NFP_NET_POLL_TIMEOUT	5
 
+/* Interval for reading offloaded filter stats */
+#define NFP_NET_STAT_POLL_IVL	msecs_to_jiffies(100)
+
 /* Bar allocation */
 #define NFP_NET_CTRL_BAR	0
 #define NFP_NET_Q0_BAR		2
@@ -405,6 +408,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
 	       fw_ver->minor == minor;
 }
 
+struct nfp_stat_pair {
+	u64 pkts;
+	u64 bytes;
+};
+
 /**
  * struct nfp_net - NFP network device structure
  * @pdev:               Backpointer to PCI device
@@ -428,6 +436,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
  * @rss_cfg:            RSS configuration
  * @rss_key:            RSS secret key
  * @rss_itbl:           RSS indirection table
+ * @rx_filter:		Filter offload statistics - dropped packets/bytes
+ * @rx_filter_prev:	Filter offload statistics - values from previous update
+ * @rx_filter_change:	Jiffies when statistics last changed
+ * @rx_filter_stats_timer:  Timer for polling filter offload statistics
+ * @rx_filter_lock:	Lock protecting timer state changes (teardown)
  * @max_tx_rings:       Maximum number of TX rings supported by the Firmware
  * @max_rx_rings:       Maximum number of RX rings supported by the Firmware
  * @num_tx_rings:       Currently configured number of TX rings
@@ -504,6 +517,11 @@ struct nfp_net {
 	u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ];
 	u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ];
 
+	struct nfp_stat_pair rx_filter, rx_filter_prev;
+	unsigned long rx_filter_change;
+	struct timer_list rx_filter_stats_timer;
+	spinlock_t rx_filter_lock;
+
 	int max_tx_rings;
 	int max_rx_rings;
 
@@ -775,6 +793,7 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn)
 }
 #endif /* CONFIG_NFP_NET_DEBUG */
 
+void nfp_net_filter_stats_timer(unsigned long data);
 int
 nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
 		    struct tc_cls_bpf_offload *cls_bpf);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 51978dfe883b..f091eb758ca2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2703,10 +2703,13 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
 	nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
 
 	spin_lock_init(&nn->reconfig_lock);
+	spin_lock_init(&nn->rx_filter_lock);
 	spin_lock_init(&nn->link_status_lock);
 
 	setup_timer(&nn->reconfig_timer,
 		    nfp_net_reconfig_timer, (unsigned long)nn);
+	setup_timer(&nn->rx_filter_stats_timer,
+		    nfp_net_filter_stats_timer, (unsigned long)nn);
 
 	return nn;
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 4c9897220969..3418f2277e9d 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -106,6 +106,18 @@ static const struct _nfp_net_et_stats nfp_net_et_stats[] = {
 	{"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)},
 	{"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)},
 	{"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)},
+
+	{"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)},
+	{"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)},
+	/* see comments in outro functions in nfp_bpf_jit.c to find out
+	 * how different BPF modes use app-specific counters
+	 */
+	{"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)},
+	{"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)},
+	{"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)},
+	{"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)},
+	{"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)},
+	{"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)},
 };
 
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
index 313988cd3a43..0537a53e2174 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
@@ -51,6 +51,60 @@
 #include "nfp_net_ctrl.h"
 #include "nfp_net.h"
 
+void nfp_net_filter_stats_timer(unsigned long data)
+{
+	struct nfp_net *nn = (void *)data;
+	struct nfp_stat_pair latest;
+
+	spin_lock_bh(&nn->rx_filter_lock);
+
+	if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
+		mod_timer(&nn->rx_filter_stats_timer,
+			  jiffies + NFP_NET_STAT_POLL_IVL);
+
+	spin_unlock_bh(&nn->rx_filter_lock);
+
+	latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
+	latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
+
+	if (latest.pkts != nn->rx_filter.pkts)
+		nn->rx_filter_change = jiffies;
+
+	nn->rx_filter = latest;
+}
+
+static void nfp_net_bpf_stats_reset(struct nfp_net *nn)
+{
+	nn->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
+	nn->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
+	nn->rx_filter_prev = nn->rx_filter;
+	nn->rx_filter_change = jiffies;
+}
+
+static int
+nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
+{
+	struct tc_action *a;
+	LIST_HEAD(actions);
+	u64 bytes, pkts;
+
+	pkts = nn->rx_filter.pkts - nn->rx_filter_prev.pkts;
+	bytes = nn->rx_filter.bytes - nn->rx_filter_prev.bytes;
+	bytes -= pkts * ETH_HLEN;
+
+	nn->rx_filter_prev = nn->rx_filter;
+
+	preempt_disable();
+
+	tcf_exts_to_list(cls_bpf->exts, &actions);
+	list_for_each_entry(a, &actions, list)
+		tcf_action_stats_update(a, bytes, pkts, nn->rx_filter_change);
+
+	preempt_enable();
+
+	return 0;
+}
+
 static int
 nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
 {
@@ -147,6 +201,9 @@ nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags,
 		nn_err(nn, "FW command error while enabling BPF: %d\n", err);
 
 	dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr);
+
+	nfp_net_bpf_stats_reset(nn);
+	mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL);
 }
 
 static int nfp_net_bpf_stop(struct nfp_net *nn)
@@ -154,9 +211,12 @@ static int nfp_net_bpf_stop(struct nfp_net *nn)
 	if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF))
 		return 0;
 
+	spin_lock_bh(&nn->rx_filter_lock);
 	nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF;
+	spin_unlock_bh(&nn->rx_filter_lock);
 	nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
 
+	del_timer_sync(&nn->rx_filter_stats_timer);
 	nn->bpf_offload_skip_sw = 0;
 
 	return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
@@ -214,6 +274,9 @@ nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
 	case TC_CLSBPF_DESTROY:
 		return nfp_net_bpf_stop(nn);
 
+	case TC_CLSBPF_STATS:
+		return nfp_net_bpf_stats_update(nn, cls_bpf);
+
 	default:
 		return -ENOTSUPP;
 	}
-- 
1.9.1

^ permalink raw reply related

* [PATCHv7 net-next 12/15] nfp: bpf: add packet marking support
From: Jakub Kicinski @ 2016-09-21 10:44 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

Add missing ABI defines and eBPF instructions to allow
mark to be passed on and extend prepend parsing on the
RX path to pick it up from packet metadata.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
v3:
 - change metadata format.
---
 drivers/net/ethernet/netronome/nfp/nfp_bpf.h       |  2 +
 drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c   | 19 +++++
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |  2 +
 .../net/ethernet/netronome/nfp/nfp_net_common.c    | 91 +++++++++++++++++-----
 drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h  |  7 ++
 .../net/ethernet/netronome/nfp/nfp_netvf_main.c    |  2 +-
 6 files changed, 101 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
index 3726421e353f..2adb1d80c7b7 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
@@ -91,6 +91,8 @@ enum nfp_bpf_reg_type {
 #define imm_both(np)	reg_both((np)->regs_per_thread - STATIC_REG_IMM)
 
 #define NFP_BPF_ABI_FLAGS	reg_nnr(0)
+#define   NFP_BPF_ABI_FLAG_MARK	1
+#define NFP_BPF_ABI_MARK	reg_nnr(1)
 #define NFP_BPF_ABI_PKT		reg_nnr(2)
 #define NFP_BPF_ABI_LEN		reg_nnr(3)
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
index cfbf53607fc9..368381f0357f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
@@ -674,6 +674,16 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
 	return construct_data_ind_ld(nfp_prog, offset, 0, false, size);
 }
 
+static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src)
+{
+	emit_alu(nfp_prog, NFP_BPF_ABI_MARK,
+		 reg_none(), ALU_OP_NONE, reg_b(src));
+	emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS,
+		 NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK));
+
+	return 0;
+}
+
 static void
 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
 {
@@ -1117,6 +1127,14 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return 0;
 }
 
+static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (meta->insn.off == offsetof(struct sk_buff, mark))
+		return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2);
+
+	return -ENOTSUPP;
+}
+
 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	if (meta->insn.off < 0) /* TODO */
@@ -1306,6 +1324,7 @@ static const instr_cb_t instr_cb[256] = {
 	[BPF_LD | BPF_IND | BPF_H] =	data_ind_ld2,
 	[BPF_LD | BPF_IND | BPF_W] =	data_ind_ld4,
 	[BPF_LDX | BPF_MEM | BPF_W] =	mem_ldx4,
+	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
 	[BPF_JMP | BPF_JA | BPF_K] =	jump,
 	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
 	[BPF_JMP | BPF_JGT | BPF_K] =	jgt_imm,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 13c6a9001b4d..ed824e11a1e3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -269,6 +269,8 @@ struct nfp_net_rx_desc {
 	};
 };
 
+#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0)
+
 struct nfp_net_rx_hash {
 	__be32 hash_type;
 	__be32 hash;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index f091eb758ca2..415691edcaa5 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1293,38 +1293,72 @@ static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 	}
 }
 
-/**
- * nfp_net_set_hash() - Set SKB hash data
- * @netdev: adapter's net_device structure
- * @skb:   SKB to set the hash data on
- * @rxd:   RX descriptor
- *
- * The RSS hash and hash-type are pre-pended to the packet data.
- * Extract and decode it and set the skb fields.
- */
 static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
-			     struct nfp_net_rx_desc *rxd)
+			     unsigned int type, __be32 *hash)
 {
-	struct nfp_net_rx_hash *rx_hash;
-
-	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) ||
-	    !(netdev->features & NETIF_F_RXHASH))
+	if (!(netdev->features & NETIF_F_RXHASH))
 		return;
 
-	rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
-
-	switch (be32_to_cpu(rx_hash->hash_type)) {
+	switch (type) {
 	case NFP_NET_RSS_IPV4:
 	case NFP_NET_RSS_IPV6:
 	case NFP_NET_RSS_IPV6_EX:
-		skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3);
+		skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3);
 		break;
 	default:
-		skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4);
+		skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4);
 		break;
 	}
 }
 
+static void
+nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
+		      struct nfp_net_rx_desc *rxd)
+{
+	struct nfp_net_rx_hash *rx_hash;
+
+	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
+		return;
+
+	rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
+
+	nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type),
+			 &rx_hash->hash);
+}
+
+static void *
+nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
+		   int meta_len)
+{
+	u8 *data = skb->data - meta_len;
+	u32 meta_info;
+
+	meta_info = get_unaligned_be32(data);
+	data += 4;
+
+	while (meta_info) {
+		switch (meta_info & NFP_NET_META_FIELD_MASK) {
+		case NFP_NET_META_HASH:
+			meta_info >>= NFP_NET_META_FIELD_SIZE;
+			nfp_net_set_hash(netdev, skb,
+					 meta_info & NFP_NET_META_FIELD_MASK,
+					 (__be32 *)data);
+			data += 4;
+			break;
+		case NFP_NET_META_MARK:
+			skb->mark = get_unaligned_be32(data);
+			data += 4;
+			break;
+		default:
+			return NULL;
+		}
+
+		meta_info >>= NFP_NET_META_FIELD_SIZE;
+	}
+
+	return data;
+}
+
 /**
  * nfp_net_rx() - receive up to @budget packets on @rx_ring
  * @rx_ring:   RX ring to receive from
@@ -1439,14 +1473,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 			skb_reserve(skb, nn->rx_offset);
 		skb_put(skb, data_len - meta_len);
 
-		nfp_net_set_hash(nn->netdev, skb, rxd);
-
 		/* Stats update */
 		u64_stats_update_begin(&r_vec->rx_sync);
 		r_vec->rx_pkts++;
 		r_vec->rx_bytes += skb->len;
 		u64_stats_update_end(&r_vec->rx_sync);
 
+		if (nn->fw_ver.major <= 3) {
+			nfp_net_set_hash_desc(nn->netdev, skb, rxd);
+		} else if (meta_len) {
+			void *end;
+
+			end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
+			if (unlikely(end != skb->data)) {
+				u64_stats_update_begin(&r_vec->rx_sync);
+				r_vec->rx_drops++;
+				u64_stats_update_end(&r_vec->rx_sync);
+
+				dev_kfree_skb_any(skb);
+				nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
+				continue;
+			}
+		}
+
 		skb_record_rx_queue(skb, rx_ring->idx);
 		skb->protocol = eth_type_trans(skb, nn->netdev);
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 7aa11f3c5ef0..93b10b441acb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -66,6 +66,13 @@
 #define NFP_NET_LSO_MAX_HDR_SZ		255
 
 /**
+ * Prepend field types
+ */
+#define NFP_NET_META_FIELD_SIZE		4
+#define NFP_NET_META_HASH		1 /* next field carries hash type */
+#define NFP_NET_META_MARK		2
+
+/**
  * Hash type pre-pended when a RSS hash was computed
  */
 #define NFP_NET_RSS_NONE                0
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
index f7062cb648e1..2800bbf65a89 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
@@ -148,7 +148,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 		dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n");
 	} else {
 		switch (fw_ver.major) {
-		case 1 ... 3:
+		case 1 ... 4:
 			if (is_nfp3200) {
 				stride = 2;
 				tx_bar_no = NFP_NET_Q0_BAR;
-- 
1.9.1

^ permalink raw reply related

* [PATCHv7 net-next 03/15] net: cls_bpf: add support for marking filters as hardware-only
From: Jakub Kicinski @ 2016-09-21 10:43 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

Add cls_bpf support for the TCA_CLS_FLAGS_SKIP_SW flag.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/sched/cls_bpf.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index ebf01f7c1470..1becc2fe1bc5 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -28,7 +28,7 @@ MODULE_DESCRIPTION("TC BPF based classifier");
 
 #define CLS_BPF_NAME_LEN	256
 #define CLS_BPF_SUPPORTED_GEN_FLAGS		\
-	TCA_CLS_FLAGS_SKIP_HW
+	(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)
 
 struct cls_bpf_head {
 	struct list_head plist;
@@ -96,7 +96,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 
 		qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
 
-		if (at_ingress) {
+		if (tc_skip_sw(prog->gen_flags)) {
+			filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0;
+		} else if (at_ingress) {
 			/* It is safe to push/pull even if skb_shared() */
 			__skb_push(skb, skb->mac_len);
 			bpf_compute_data_end(skb);
@@ -164,32 +166,42 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 					     tp->protocol, &offload);
 }
 
-static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
-			    struct cls_bpf_prog *oldprog)
+static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+			   struct cls_bpf_prog *oldprog)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
 	struct cls_bpf_prog *obj = prog;
 	enum tc_clsbpf_command cmd;
+	bool skip_sw;
+	int ret;
+
+	skip_sw = tc_skip_sw(prog->gen_flags) ||
+		(oldprog && tc_skip_sw(oldprog->gen_flags));
 
 	if (oldprog && oldprog->offloaded) {
 		if (tc_should_offload(dev, tp, prog->gen_flags)) {
 			cmd = TC_CLSBPF_REPLACE;
-		} else {
+		} else if (!tc_skip_sw(prog->gen_flags)) {
 			obj = oldprog;
 			cmd = TC_CLSBPF_DESTROY;
+		} else {
+			return -EINVAL;
 		}
 	} else {
 		if (!tc_should_offload(dev, tp, prog->gen_flags))
-			return;
+			return skip_sw ? -EINVAL : 0;
 		cmd = TC_CLSBPF_ADD;
 	}
 
-	if (cls_bpf_offload_cmd(tp, obj, cmd))
-		return;
+	ret = cls_bpf_offload_cmd(tp, obj, cmd);
+	if (ret)
+		return skip_sw ? ret : 0;
 
 	obj->offloaded = true;
 	if (oldprog)
 		oldprog->offloaded = false;
+
+	return 0;
 }
 
 static void cls_bpf_stop_offload(struct tcf_proto *tp,
@@ -498,7 +510,11 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 	if (ret < 0)
 		goto errout;
 
-	cls_bpf_offload(tp, prog, oldprog);
+	ret = cls_bpf_offload(tp, prog, oldprog);
+	if (ret) {
+		cls_bpf_delete_prog(tp, prog);
+		return ret;
+	}
 
 	if (oldprog) {
 		list_replace_rcu(&oldprog->link, &prog->link);
-- 
1.9.1

^ permalink raw reply related

* [PATCHv7 net-next 10/15] net: cls_bpf: allow offloaded filters to update stats
From: Jakub Kicinski @ 2016-09-21 10:44 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

Call into offloaded filters to update stats.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/pkt_cls.h |  1 +
 net/sched/cls_bpf.c   | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 57af9f3032ff..5ccaa4be7d96 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -490,6 +490,7 @@ enum tc_clsbpf_command {
 	TC_CLSBPF_ADD,
 	TC_CLSBPF_REPLACE,
 	TC_CLSBPF_DESTROY,
+	TC_CLSBPF_STATS,
 };
 
 struct tc_cls_bpf_offload {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 1becc2fe1bc5..bb1d5a487081 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -221,6 +221,15 @@ static void cls_bpf_stop_offload(struct tcf_proto *tp,
 	prog->offloaded = false;
 }
 
+static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
+					 struct cls_bpf_prog *prog)
+{
+	if (!prog->offloaded)
+		return;
+
+	cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+}
+
 static int cls_bpf_init(struct tcf_proto *tp)
 {
 	struct cls_bpf_head *head;
@@ -577,6 +586,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
 	tm->tcm_handle = prog->handle;
 
+	cls_bpf_offload_update_stats(tp, prog);
+
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
-- 
1.9.1

^ permalink raw reply related

* [PATCHv7 net-next 13/15] net: act_mirred: allow statistic updates from offloaded actions
From: Jakub Kicinski @ 2016-09-21 10:44 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

Implement .stats_update() callback.  The implementation
is generic and can be reused by other simple actions if
needed.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 net/sched/act_mirred.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1c76387c5d9c..667dc382df82 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -204,6 +204,13 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 	return retval;
 }
 
+static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+			     u64 lastuse)
+{
+	tcf_lastuse_update(&a->tcfa_tm);
+	_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+}
+
 static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
 			   int ref)
 {
@@ -281,6 +288,7 @@ static struct tc_action_ops act_mirred_ops = {
 	.type		=	TCA_ACT_MIRRED,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_mirred,
+	.stats_update	=	tcf_stats_update,
 	.dump		=	tcf_mirred_dump,
 	.cleanup	=	tcf_mirred_release,
 	.init		=	tcf_mirred_init,
-- 
1.9.1

^ permalink raw reply related

* [PATCHv7 net-next 09/15] nfp: bpf: add hardware bpf offload
From: Jakub Kicinski @ 2016-09-21 10:44 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

Add hardware bpf offload on our smart NICs.  Detect if
capable firmware is loaded and use it to load the code JITed
with just added translator onto programmable engines.

This commit only supports offloading cls_bpf in legacy mode
(non-direct action).

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/Makefile        |   1 +
 drivers/net/ethernet/netronome/nfp/nfp_net.h       |  26 ++-
 .../net/ethernet/netronome/nfp/nfp_net_common.c    |  40 +++-
 drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h  |  44 ++++-
 .../net/ethernet/netronome/nfp/nfp_net_offload.c   | 220 +++++++++++++++++++++
 5 files changed, 324 insertions(+), 7 deletions(-)
 create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_net_offload.c

diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 5f12689bf523..0efb2ba9a558 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_NFP_NETVF)	+= nfp_netvf.o
 nfp_netvf-objs := \
 	    nfp_net_common.o \
 	    nfp_net_ethtool.o \
+	    nfp_net_offload.o \
 	    nfp_netvf_main.o
 
 ifeq ($(CONFIG_BPF_SYSCALL),y)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 690635660195..ea6f5e667f27 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -220,7 +220,7 @@ struct nfp_net_tx_ring {
 #define PCIE_DESC_RX_I_TCP_CSUM_OK	cpu_to_le16(BIT(11))
 #define PCIE_DESC_RX_I_UDP_CSUM		cpu_to_le16(BIT(10))
 #define PCIE_DESC_RX_I_UDP_CSUM_OK	cpu_to_le16(BIT(9))
-#define PCIE_DESC_RX_SPARE		cpu_to_le16(BIT(8))
+#define PCIE_DESC_RX_BPF		cpu_to_le16(BIT(8))
 #define PCIE_DESC_RX_EOP		cpu_to_le16(BIT(7))
 #define PCIE_DESC_RX_IP4_CSUM		cpu_to_le16(BIT(6))
 #define PCIE_DESC_RX_IP4_CSUM_OK	cpu_to_le16(BIT(5))
@@ -413,6 +413,7 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
  * @is_vf:              Is the driver attached to a VF?
  * @is_nfp3200:         Is the driver for a NFP-3200 card?
  * @fw_loaded:          Is the firmware loaded?
+ * @bpf_offload_skip_sw:  Offloaded BPF program will not be rerun by cls_bpf
  * @ctrl:               Local copy of the control register/word.
  * @fl_bufsz:           Currently configured size of the freelist buffers
  * @rx_offset:		Offset in the RX buffers where packet data starts
@@ -473,6 +474,7 @@ struct nfp_net {
 	unsigned is_vf:1;
 	unsigned is_nfp3200:1;
 	unsigned fw_loaded:1;
+	unsigned bpf_offload_skip_sw:1;
 
 	u32 ctrl;
 	u32 fl_bufsz;
@@ -561,12 +563,28 @@ struct nfp_net {
 /* Functions to read/write from/to a BAR
  * Performs any endian conversion necessary.
  */
+static inline u16 nn_readb(struct nfp_net *nn, int off)
+{
+	return readb(nn->ctrl_bar + off);
+}
+
 static inline void nn_writeb(struct nfp_net *nn, int off, u8 val)
 {
 	writeb(val, nn->ctrl_bar + off);
 }
 
-/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */
+/* NFP-3200 can't handle 16-bit accesses too well */
+static inline u16 nn_readw(struct nfp_net *nn, int off)
+{
+	WARN_ON_ONCE(nn->is_nfp3200);
+	return readw(nn->ctrl_bar + off);
+}
+
+static inline void nn_writew(struct nfp_net *nn, int off, u16 val)
+{
+	WARN_ON_ONCE(nn->is_nfp3200);
+	writew(val, nn->ctrl_bar + off);
+}
 
 static inline u32 nn_readl(struct nfp_net *nn, int off)
 {
@@ -757,4 +775,8 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn)
 }
 #endif /* CONFIG_NFP_NET_DEBUG */
 
+int
+nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
+		    struct tc_cls_bpf_offload *cls_bpf);
+
 #endif /* _NFP_NET_H_ */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 252e4924de0f..51978dfe883b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -60,6 +60,7 @@
 
 #include <linux/ktime.h>
 
+#include <net/pkt_cls.h>
 #include <net/vxlan.h>
 
 #include "nfp_net_ctrl.h"
@@ -2382,6 +2383,31 @@ static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev,
 	return stats;
 }
 
+static bool nfp_net_ebpf_capable(struct nfp_net *nn)
+{
+	if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
+	    nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
+		return true;
+	return false;
+}
+
+static int
+nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+		 struct tc_to_netdev *tc)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+		return -ENOTSUPP;
+	if (proto != htons(ETH_P_ALL))
+		return -ENOTSUPP;
+
+	if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn))
+		return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf);
+
+	return -EINVAL;
+}
+
 static int nfp_net_set_features(struct net_device *netdev,
 				netdev_features_t features)
 {
@@ -2436,6 +2462,11 @@ static int nfp_net_set_features(struct net_device *netdev,
 			new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
 	}
 
+	if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
+		nn_err(nn, "Cannot disable HW TC offload while in use\n");
+		return -EBUSY;
+	}
+
 	nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
 	       netdev->features, features, changed);
 
@@ -2585,6 +2616,7 @@ static const struct net_device_ops nfp_net_netdev_ops = {
 	.ndo_stop		= nfp_net_netdev_close,
 	.ndo_start_xmit		= nfp_net_tx,
 	.ndo_get_stats64	= nfp_net_stat64,
+	.ndo_setup_tc		= nfp_net_setup_tc,
 	.ndo_tx_timeout		= nfp_net_tx_timeout,
 	.ndo_set_rx_mode	= nfp_net_set_rx_mode,
 	.ndo_change_mtu		= nfp_net_change_mtu,
@@ -2610,7 +2642,7 @@ void nfp_net_info(struct nfp_net *nn)
 		nn->fw_ver.resv, nn->fw_ver.class,
 		nn->fw_ver.major, nn->fw_ver.minor,
 		nn->max_mtu);
-	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
 		nn->cap,
 		nn->cap & NFP_NET_CFG_CTRL_PROMISC  ? "PROMISC "  : "",
 		nn->cap & NFP_NET_CFG_CTRL_L2BC     ? "L2BCFILT " : "",
@@ -2627,7 +2659,8 @@ void nfp_net_info(struct nfp_net *nn)
 		nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
 		nn->cap & NFP_NET_CFG_CTRL_IRQMOD   ? "IRQMOD "   : "",
 		nn->cap & NFP_NET_CFG_CTRL_VXLAN    ? "VXLAN "    : "",
-		nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "	  : "");
+		nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "	  : "",
+		nfp_net_ebpf_capable(nn)            ? "BPF "	  : "");
 }
 
 /**
@@ -2795,6 +2828,9 @@ int nfp_net_netdev_init(struct net_device *netdev)
 
 	netdev->features = netdev->hw_features;
 
+	if (nfp_net_ebpf_capable(nn))
+		netdev->hw_features |= NETIF_F_HW_TC;
+
 	/* Advertise but disable TSO by default. */
 	netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index ad6c4e31cedd..7aa11f3c5ef0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -123,6 +123,7 @@
 #define   NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
 #define   NFP_NET_CFG_CTRL_VXLAN	  (0x1 << 24) /* VXLAN tunnel support */
 #define   NFP_NET_CFG_CTRL_NVGRE	  (0x1 << 25) /* NVGRE tunnel support */
+#define   NFP_NET_CFG_CTRL_BPF		  (0x1 << 27) /* BPF offload capable */
 #define NFP_NET_CFG_UPDATE              0x0004
 #define   NFP_NET_CFG_UPDATE_GEN          (0x1 <<  0) /* General update */
 #define   NFP_NET_CFG_UPDATE_RING         (0x1 <<  1) /* Ring config change */
@@ -134,6 +135,7 @@
 #define   NFP_NET_CFG_UPDATE_RESET        (0x1 <<  7) /* Update due to FLR */
 #define   NFP_NET_CFG_UPDATE_IRQMOD       (0x1 <<  8) /* IRQ mod change */
 #define   NFP_NET_CFG_UPDATE_VXLAN	  (0x1 <<  9) /* VXLAN port change */
+#define   NFP_NET_CFG_UPDATE_BPF	  (0x1 << 10) /* BPF program load */
 #define   NFP_NET_CFG_UPDATE_ERR          (0x1 << 31) /* A error occurred */
 #define NFP_NET_CFG_TXRS_ENABLE         0x0008
 #define NFP_NET_CFG_RXRS_ENABLE         0x0010
@@ -196,10 +198,37 @@
 #define NFP_NET_CFG_VXLAN_SZ		  0x0008
 
 /**
- * 64B reserved for future use (0x0080 - 0x00c0)
+ * NFP6000 - BPF section
+ * @NFP_NET_CFG_BPF_ABI:	BPF ABI version
+ * @NFP_NET_CFG_BPF_CAP:	BPF capabilities
+ * @NFP_NET_CFG_BPF_MAX_LEN:	Maximum size of JITed BPF code in bytes
+ * @NFP_NET_CFG_BPF_START:	Offset at which BPF will be loaded
+ * @NFP_NET_CFG_BPF_DONE:	Offset to jump to on exit
+ * @NFP_NET_CFG_BPF_STACK_SZ:	Total size of stack area in 64B chunks
+ * @NFP_NET_CFG_BPF_INL_MTU:	Packet data split offset in 64B chunks
+ * @NFP_NET_CFG_BPF_SIZE:	Size of the JITed BPF code in instructions
+ * @NFP_NET_CFG_BPF_ADDR:	DMA address of the buffer with JITed BPF code
  */
-#define NFP_NET_CFG_RESERVED            0x0080
-#define NFP_NET_CFG_RESERVED_SZ         0x0040
+#define NFP_NET_CFG_BPF_ABI		0x0080
+#define   NFP_NET_BPF_ABI		1
+#define NFP_NET_CFG_BPF_CAP		0x0081
+#define   NFP_NET_BPF_CAP_RELO		(1 << 0) /* seamless reload */
+#define NFP_NET_CFG_BPF_MAX_LEN		0x0082
+#define NFP_NET_CFG_BPF_START		0x0084
+#define NFP_NET_CFG_BPF_DONE		0x0086
+#define NFP_NET_CFG_BPF_STACK_SZ	0x0088
+#define NFP_NET_CFG_BPF_INL_MTU		0x0089
+#define NFP_NET_CFG_BPF_SIZE		0x008e
+#define NFP_NET_CFG_BPF_ADDR		0x0090
+#define   NFP_NET_CFG_BPF_CFG_8CTX	(1 << 0) /* 8ctx mode */
+#define   NFP_NET_CFG_BPF_CFG_MASK	7ULL
+#define   NFP_NET_CFG_BPF_ADDR_MASK	(~NFP_NET_CFG_BPF_CFG_MASK)
+
+/**
+ * 40B reserved for future use (0x0098 - 0x00c0)
+ */
+#define NFP_NET_CFG_RESERVED            0x0098
+#define NFP_NET_CFG_RESERVED_SZ         0x0028
 
 /**
  * RSS configuration (0x0100 - 0x01ac):
@@ -303,6 +332,15 @@
 #define NFP_NET_CFG_STATS_TX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x80)
 #define NFP_NET_CFG_STATS_TX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x88)
 
+#define NFP_NET_CFG_STATS_APP0_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x90)
+#define NFP_NET_CFG_STATS_APP0_BYTES	(NFP_NET_CFG_STATS_BASE + 0x98)
+#define NFP_NET_CFG_STATS_APP1_FRAMES	(NFP_NET_CFG_STATS_BASE + 0xa0)
+#define NFP_NET_CFG_STATS_APP1_BYTES	(NFP_NET_CFG_STATS_BASE + 0xa8)
+#define NFP_NET_CFG_STATS_APP2_FRAMES	(NFP_NET_CFG_STATS_BASE + 0xb0)
+#define NFP_NET_CFG_STATS_APP2_BYTES	(NFP_NET_CFG_STATS_BASE + 0xb8)
+#define NFP_NET_CFG_STATS_APP3_FRAMES	(NFP_NET_CFG_STATS_BASE + 0xc0)
+#define NFP_NET_CFG_STATS_APP3_BYTES	(NFP_NET_CFG_STATS_BASE + 0xc8)
+
 /**
  * Per ring stats (0x1000 - 0x1800)
  * options, 64bit per entry
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
new file mode 100644
index 000000000000..313988cd3a43
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * nfp_net_offload.c
+ * Netronome network device driver: TC offload functions for PF and VF
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include "nfp_bpf.h"
+#include "nfp_net_ctrl.h"
+#include "nfp_net.h"
+
+static int
+nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
+{
+	const struct tc_action *a;
+	LIST_HEAD(actions);
+
+	/* TC direct action */
+	if (cls_bpf->exts_integrated)
+		return -ENOTSUPP;
+
+	/* TC legacy mode */
+	if (!tc_single_action(cls_bpf->exts))
+		return -ENOTSUPP;
+
+	tcf_exts_to_list(cls_bpf->exts, &actions);
+	list_for_each_entry(a, &actions, list) {
+		if (is_tcf_gact_shot(a))
+			return NN_ACT_TC_DROP;
+	}
+
+	return -ENOTSUPP;
+}
+
+static int
+nfp_net_bpf_offload_prepare(struct nfp_net *nn,
+			    struct tc_cls_bpf_offload *cls_bpf,
+			    struct nfp_bpf_result *res,
+			    void **code, dma_addr_t *dma_addr, u16 max_instr)
+{
+	unsigned int code_sz = max_instr * sizeof(u64);
+	enum nfp_bpf_action_type act;
+	u16 start_off, done_off;
+	unsigned int max_mtu;
+	int ret;
+
+	ret = nfp_net_bpf_get_act(nn, cls_bpf);
+	if (ret < 0)
+		return ret;
+	act = ret;
+
+	max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
+	if (max_mtu < nn->netdev->mtu) {
+		nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n");
+		return -ENOTSUPP;
+	}
+
+	start_off = nn_readw(nn, NFP_NET_CFG_BPF_START);
+	done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE);
+
+	*code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr,
+				    GFP_KERNEL);
+	if (!*code)
+		return -ENOMEM;
+
+	ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off,
+			  max_instr, res);
+	if (ret)
+		goto out;
+
+	return 0;
+
+out:
+	dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr);
+	return ret;
+}
+
+static void
+nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags,
+			   void *code, dma_addr_t dma_addr,
+			   unsigned int code_sz, unsigned int n_instr,
+			   bool dense_mode)
+{
+	u64 bpf_addr = dma_addr;
+	int err;
+
+	nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW);
+
+	if (dense_mode)
+		bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX;
+
+	nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr);
+	nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr);
+
+	/* Load up the JITed code */
+	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF);
+	if (err)
+		nn_err(nn, "FW command error while loading BPF: %d\n", err);
+
+	/* Enable passing packets through BPF function */
+	nn->ctrl |= NFP_NET_CFG_CTRL_BPF;
+	nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
+	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
+	if (err)
+		nn_err(nn, "FW command error while enabling BPF: %d\n", err);
+
+	dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr);
+}
+
+static int nfp_net_bpf_stop(struct nfp_net *nn)
+{
+	if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF))
+		return 0;
+
+	nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF;
+	nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
+
+	nn->bpf_offload_skip_sw = 0;
+
+	return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
+}
+
+int
+nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
+		    struct tc_cls_bpf_offload *cls_bpf)
+{
+	struct nfp_bpf_result res;
+	dma_addr_t dma_addr;
+	u16 max_instr;
+	void *code;
+	int err;
+
+	max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
+
+	switch (cls_bpf->command) {
+	case TC_CLSBPF_REPLACE:
+		/* There is nothing stopping us from implementing seamless
+		 * replace but the simple method of loading I adopted in
+		 * the firmware does not handle atomic replace (i.e. we have to
+		 * stop the BPF offload and re-enable it).  Leaking-in a few
+		 * frames which didn't have BPF applied in the hardware should
+		 * be fine if software fallback is available, though.
+		 */
+		if (nn->bpf_offload_skip_sw)
+			return -EBUSY;
+
+		err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
+						  &dma_addr, max_instr);
+		if (err)
+			return err;
+
+		nfp_net_bpf_stop(nn);
+		nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
+					   dma_addr, max_instr * sizeof(u64),
+					   res.n_instr, res.dense_mode);
+		return 0;
+
+	case TC_CLSBPF_ADD:
+		if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
+			return -EBUSY;
+
+		err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
+						  &dma_addr, max_instr);
+		if (err)
+			return err;
+
+		nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
+					   dma_addr, max_instr * sizeof(u64),
+					   res.n_instr, res.dense_mode);
+		return 0;
+
+	case TC_CLSBPF_DESTROY:
+		return nfp_net_bpf_stop(nn);
+
+	default:
+		return -ENOTSUPP;
+	}
+}
-- 
1.9.1

^ permalink raw reply related

* [PATCHv7 net-next 01/15] net: cls_bpf: add hardware offload
From: Jakub Kicinski @ 2016-09-21 10:43 UTC (permalink / raw)
  To: netdev; +Cc: ast, daniel, Jakub Kicinski
In-Reply-To: <1474454647-20137-1-git-send-email-jakub.kicinski@netronome.com>

This patch adds hardware offload capability to cls_bpf classifier,
similar to what have been done with U32 and flower.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
v3:
 - s/filter/prog/ in struct tc_cls_bpf_offload.
v2:
 - drop unnecessary WARN_ON;
 - reformat error handling a bit.
---
 include/linux/netdevice.h |  2 ++
 include/net/pkt_cls.h     | 14 ++++++++++
 net/sched/cls_bpf.c       | 70 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a10d8d18ce19..69f242c71865 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -789,6 +789,7 @@ enum {
 	TC_SETUP_CLSU32,
 	TC_SETUP_CLSFLOWER,
 	TC_SETUP_MATCHALL,
+	TC_SETUP_CLSBPF,
 };
 
 struct tc_cls_u32_offload;
@@ -800,6 +801,7 @@ struct tc_to_netdev {
 		struct tc_cls_u32_offload *cls_u32;
 		struct tc_cls_flower_offload *cls_flower;
 		struct tc_cls_matchall_offload *cls_mall;
+		struct tc_cls_bpf_offload *cls_bpf;
 	};
 };
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a459be5fe1c2..41e8071dff87 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -486,4 +486,18 @@ struct tc_cls_matchall_offload {
 	unsigned long cookie;
 };
 
+enum tc_clsbpf_command {
+	TC_CLSBPF_ADD,
+	TC_CLSBPF_REPLACE,
+	TC_CLSBPF_DESTROY,
+};
+
+struct tc_cls_bpf_offload {
+	enum tc_clsbpf_command command;
+	struct tcf_exts *exts;
+	struct bpf_prog *prog;
+	const char *name;
+	bool exts_integrated;
+};
+
 #endif
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index c6f7a47541eb..6523c5b4c0a5 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -39,6 +39,7 @@ struct cls_bpf_prog {
 	struct list_head link;
 	struct tcf_result res;
 	bool exts_integrated;
+	bool offloaded;
 	struct tcf_exts exts;
 	u32 handle;
 	union {
@@ -138,6 +139,71 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
 	return !prog->bpf_ops;
 }
 
+static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+			       enum tc_clsbpf_command cmd)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_cls_bpf_offload bpf_offload = {};
+	struct tc_to_netdev offload;
+
+	offload.type = TC_SETUP_CLSBPF;
+	offload.cls_bpf = &bpf_offload;
+
+	bpf_offload.command = cmd;
+	bpf_offload.exts = &prog->exts;
+	bpf_offload.prog = prog->filter;
+	bpf_offload.name = prog->bpf_name;
+	bpf_offload.exts_integrated = prog->exts_integrated;
+
+	return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+					     tp->protocol, &offload);
+}
+
+static void cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+			    struct cls_bpf_prog *oldprog)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct cls_bpf_prog *obj = prog;
+	enum tc_clsbpf_command cmd;
+
+	if (oldprog && oldprog->offloaded) {
+		if (tc_should_offload(dev, tp, 0)) {
+			cmd = TC_CLSBPF_REPLACE;
+		} else {
+			obj = oldprog;
+			cmd = TC_CLSBPF_DESTROY;
+		}
+	} else {
+		if (!tc_should_offload(dev, tp, 0))
+			return;
+		cmd = TC_CLSBPF_ADD;
+	}
+
+	if (cls_bpf_offload_cmd(tp, obj, cmd))
+		return;
+
+	obj->offloaded = true;
+	if (oldprog)
+		oldprog->offloaded = false;
+}
+
+static void cls_bpf_stop_offload(struct tcf_proto *tp,
+				 struct cls_bpf_prog *prog)
+{
+	int err;
+
+	if (!prog->offloaded)
+		return;
+
+	err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+	if (err) {
+		pr_err("Stopping hardware offload failed: %d\n", err);
+		return;
+	}
+
+	prog->offloaded = false;
+}
+
 static int cls_bpf_init(struct tcf_proto *tp)
 {
 	struct cls_bpf_head *head;
@@ -177,6 +243,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
 {
 	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
 
+	cls_bpf_stop_offload(tp, prog);
 	list_del_rcu(&prog->link);
 	tcf_unbind_filter(tp, &prog->res);
 	call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@@ -193,6 +260,7 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force)
 		return false;
 
 	list_for_each_entry_safe(prog, tmp, &head->plist, link) {
+		cls_bpf_stop_offload(tp, prog);
 		list_del_rcu(&prog->link);
 		tcf_unbind_filter(tp, &prog->res);
 		call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@@ -415,6 +483,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 	if (ret < 0)
 		goto errout;
 
+	cls_bpf_offload(tp, prog, oldprog);
+
 	if (oldprog) {
 		list_replace_rcu(&oldprog->link, &prog->link);
 		tcf_unbind_filter(tp, &oldprog->res);
-- 
1.9.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox