Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v2 5/7] MIPS: mscc: Add switch to ocelot
From: Alexandre Belloni @ 2018-04-26 19:59 UTC (permalink / raw)
  To: David S . Miller
  Cc: Allan Nielsen, razvan.stefanescu, po.liu, Thomas Petazzoni,
	Andrew Lunn, Florian Fainelli, netdev, devicetree, linux-kernel,
	linux-mips, Alexandre Belloni, James Hogan
In-Reply-To: <20180426195931.5393-1-alexandre.belloni@bootlin.com>

Ocelot has an integrated switch, add support for it.

Cc: James Hogan <jhogan@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 arch/mips/boot/dts/mscc/ocelot.dtsi | 88 +++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
index dd239cab2f9d..4f33dbc67348 100644
--- a/arch/mips/boot/dts/mscc/ocelot.dtsi
+++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
@@ -91,6 +91,72 @@
 			status = "disabled";
 		};
 
+		switch@1010000 {
+			compatible = "mscc,vsc7514-switch";
+			reg = <0x1010000 0x10000>,
+			      <0x1030000 0x10000>,
+			      <0x1080000 0x100>,
+			      <0x10d0000 0x10000>,
+			      <0x11e0000 0x100>,
+			      <0x11f0000 0x100>,
+			      <0x1200000 0x100>,
+			      <0x1210000 0x100>,
+			      <0x1220000 0x100>,
+			      <0x1230000 0x100>,
+			      <0x1240000 0x100>,
+			      <0x1250000 0x100>,
+			      <0x1260000 0x100>,
+			      <0x1270000 0x100>,
+			      <0x1280000 0x100>,
+			      <0x1800000 0x80000>,
+			      <0x1880000 0x10000>;
+			reg-names = "sys", "rew", "qs", "hsio", "port0",
+				    "port1", "port2", "port3", "port4", "port5",
+				    "port6", "port7", "port8", "port9", "port10",
+				    "qsys", "ana";
+			interrupts = <21 22>;
+			interrupt-names = "xtr", "inj";
+
+			ethernet-ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port0: port@0 {
+					reg = <0>;
+				};
+				port1: port@1 {
+					reg = <1>;
+				};
+				port2: port@2 {
+					reg = <2>;
+				};
+				port3: port@3 {
+					reg = <3>;
+				};
+				port4: port@4 {
+					reg = <4>;
+				};
+				port5: port@5 {
+					reg = <5>;
+				};
+				port6: port@6 {
+					reg = <6>;
+				};
+				port7: port@7 {
+					reg = <7>;
+				};
+				port8: port@8 {
+					reg = <8>;
+				};
+				port9: port@9 {
+					reg = <9>;
+				};
+				port10: port@10 {
+					reg = <10>;
+				};
+			};
+		};
+
 		reset@1070008 {
 			compatible = "mscc,ocelot-chip-reset";
 			reg = <0x1070008 0x4>;
@@ -113,5 +179,27 @@
 				function = "uart2";
 			};
 		};
+
+		mdio0: mdio@107009c {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "mscc,ocelot-miim";
+			reg = <0x107009c 0x36>, <0x10700f0 0x8>;
+			interrupts = <14>;
+			status = "disabled";
+
+			phy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+			phy1: ethernet-phy@1 {
+				reg = <1>;
+			};
+			phy2: ethernet-phy@2 {
+				reg = <2>;
+			};
+			phy3: ethernet-phy@3 {
+				reg = <3>;
+			};
+		};
 	};
 };
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 10/13] sctp: re-use sctp_transport_pmtu in sctp_transport_route
From: Marcelo Ricardo Leitner @ 2018-04-26 19:58 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
In-Reply-To: <cover.1524772453.git.marcelo.leitner@gmail.com>

sctp_transport_route currently is very similar to sctp_transport_pmtu plus
a few other bits.

This patch reuses sctp_transport_pmtu in sctp_transport_route and removes
the duplicated code.

Also, as all calls to sctp_transport_route were forcing the dst release
before calling it, let's just include such release too.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 net/sctp/socket.c    |  6 ++----
 net/sctp/transport.c | 35 +++++++++++++++++------------------
 2 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a93b60a28cc5fff0ebe299905fb31cb37de43d88..bb08d44b838bbbb0ffbf80b194a1291fed4069e4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -644,16 +644,15 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
 
 			list_for_each_entry(trans,
 			    &asoc->peer.transport_addr_list, transports) {
-				/* Clear the source and route cache */
-				sctp_transport_dst_release(trans);
 				trans->cwnd = min(4*asoc->pathmtu, max_t(__u32,
 				    2*asoc->pathmtu, 4380));
 				trans->ssthresh = asoc->peer.i.a_rwnd;
 				trans->rto = asoc->rto_initial;
 				sctp_max_rto(asoc, trans);
 				trans->rtt = trans->srtt = trans->rttvar = 0;
+				/* Clear the source and route cache */
 				sctp_transport_route(trans, NULL,
-				    sctp_sk(asoc->base.sk));
+						     sctp_sk(asoc->base.sk));
 			}
 		}
 		retval = sctp_send_asconf(asoc, chunk);
@@ -896,7 +895,6 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
 		 */
 		list_for_each_entry(transport, &asoc->peer.transport_addr_list,
 					transports) {
-			sctp_transport_dst_release(transport);
 			sctp_transport_route(transport, NULL,
 					     sctp_sk(asoc->base.sk));
 		}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ed73a9d91b83eadd3eb8d681fda104fe71562c3f..4a95e260b674b3dffe2ffc4c54cf9a330dfac18c 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -242,6 +242,15 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 						&transport->fl, sk);
 	}
 
+	if (transport->param_flags & SPP_PMTUD_DISABLE) {
+		struct sctp_association *asoc = transport->asoc;
+
+		if (!transport->pathmtu && asoc && asoc->pathmtu)
+			transport->pathmtu = asoc->pathmtu;
+		if (transport->pathmtu)
+			return;
+	}
+
 	if (transport->dst)
 		transport->pathmtu = sctp_dst_mtu(transport->dst);
 	else
@@ -290,6 +299,7 @@ void sctp_transport_route(struct sctp_transport *transport,
 	struct sctp_association *asoc = transport->asoc;
 	struct sctp_af *af = transport->af_specific;
 
+	sctp_transport_dst_release(transport);
 	af->get_dst(transport, saddr, &transport->fl, sctp_opt2sk(opt));
 
 	if (saddr)
@@ -297,25 +307,14 @@ void sctp_transport_route(struct sctp_transport *transport,
 	else
 		af->get_saddr(opt, transport, &transport->fl);
 
-	if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
-		return;
-	}
-	if (transport->dst) {
-		transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
+	sctp_transport_pmtu(transport, sctp_opt2sk(opt));
 
-		/* Initialize sk->sk_rcv_saddr, if the transport is the
-		 * association's active path for getsockname().
-		 */
-		if (asoc && (!asoc->peer.primary_path ||
-			     (transport == asoc->peer.active_path)))
-			opt->pf->to_sk_saddr(&transport->saddr,
-					     asoc->base.sk);
-	} else if ((transport->param_flags & SPP_PMTUD_DISABLE) &&
-		   asoc && asoc->pathmtu) {
-		transport->pathmtu = asoc->pathmtu;
-	} else {
-		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
-	}
+	/* Initialize sk->sk_rcv_saddr, if the transport is the
+	 * association's active path for getsockname().
+	 */
+	if (transport->dst && asoc &&
+	    (!asoc->peer.primary_path || transport == asoc->peer.active_path))
+		opt->pf->to_sk_saddr(&transport->saddr, asoc->base.sk);
 }
 
 /* Hold a reference to a transport.  */
-- 
2.14.3

^ permalink raw reply related

* [PATCH net-next 07/13] sctp: remove sctp_assoc_pending_pmtu
From: Marcelo Ricardo Leitner @ 2018-04-26 19:58 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
In-Reply-To: <cover.1524772453.git.marcelo.leitner@gmail.com>

No need for this helper.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 include/net/sctp/sctp.h | 6 ------
 net/sctp/socket.c       | 6 ++++--
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 350c65620a4eb35ba2af0d59fef70e2b8b461e44..e327acad8e7d0710e30f32d0829116593c1171cf 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -428,12 +428,6 @@ static inline int sctp_list_single_entry(struct list_head *head)
 	return (head->next != head) && (head->next == head->prev);
 }
 
-static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc)
-{
-	sctp_assoc_sync_pmtu(asoc);
-	asoc->pmtu_pending = 0;
-}
-
 static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk)
 {
 	return !list_empty(&chunk->list);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 21bf457be3ea321457107256c12ccb93e1fbab06..a93b60a28cc5fff0ebe299905fb31cb37de43d88 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1918,8 +1918,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 		goto err;
 	}
 
-	if (asoc->pmtu_pending)
-		sctp_assoc_pending_pmtu(asoc);
+	if (asoc->pmtu_pending) {
+		sctp_assoc_sync_pmtu(asoc);
+		asoc->pmtu_pending = 0;
+	}
 
 	if (sctp_wspace(asoc) < msg_len)
 		sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
-- 
2.14.3

^ permalink raw reply related

* [PATCH net-next v2 6/7] MIPS: mscc: connect phys to ports on ocelot_pcb123
From: Alexandre Belloni @ 2018-04-26 19:59 UTC (permalink / raw)
  To: David S . Miller
  Cc: Allan Nielsen, razvan.stefanescu, po.liu, Thomas Petazzoni,
	Andrew Lunn, Florian Fainelli, netdev, devicetree, linux-kernel,
	linux-mips, Alexandre Belloni, James Hogan
In-Reply-To: <20180426195931.5393-1-alexandre.belloni@bootlin.com>

Add phy to switch port connections for PCB123 for internal PHYs.

Cc: James Hogan <jhogan@kernel.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 arch/mips/boot/dts/mscc/ocelot_pcb123.dts | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
index 29d6414f8886..4ccd65379059 100644
--- a/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb123.dts
@@ -25,3 +25,23 @@
 &uart2 {
 	status = "okay";
 };
+
+&mdio0 {
+	status = "okay";
+};
+
+&port0 {
+	phy-handle = <&phy0>;
+};
+
+&port1 {
+	phy-handle = <&phy1>;
+};
+
+&port2 {
+	phy-handle = <&phy2>;
+};
+
+&port3 {
+	phy-handle = <&phy3>;
+};
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-next 13/13] sctp: allow unsetting sockopt MAXSEG
From: Marcelo Ricardo Leitner @ 2018-04-26 19:59 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
In-Reply-To: <cover.1524772453.git.marcelo.leitner@gmail.com>

RFC 6458 Section 8.1.16 says that setting MAXSEG as 0 means that the user
is not limiting it, and not that it should set to the *current* maximum,
as we are doing.

This patch thus allow setting it as 0, effectively removing the user
limit.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 net/sctp/socket.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 2d35c8ea2470e7f5481bb9675ffd233eb3424d91..1b4593b842b001903f518e90484c763d9d3698f3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3211,7 +3211,6 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign
 static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
 {
 	struct sctp_sock *sp = sctp_sk(sk);
-	struct sctp_af *af = sp->pf->af;
 	struct sctp_assoc_value params;
 	struct sctp_association *asoc;
 	int val;
@@ -3249,12 +3248,6 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
 	}
 
 	if (asoc) {
-		if (val == 0) {
-			val = asoc->pathmtu - af->net_header_len;
-			val -= af->ip_options_len(sk);
-			val -= sizeof(struct sctphdr) +
-			       sctp_datachk_len(&asoc->stream);
-		}
 		asoc->user_frag = val;
 		sctp_assoc_update_frag_point(asoc);
 	} else {
-- 
2.14.3

^ permalink raw reply related

* [PATCH net-next 09/13] sctp: remove sctp_transport_pmtu_check
From: Marcelo Ricardo Leitner @ 2018-04-26 19:58 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
In-Reply-To: <cover.1524772453.git.marcelo.leitner@gmail.com>

We are now keeping the MTU information synced between asoc, transport
and dst, which makes the check at sctp_packet_config() not needed
anymore. As it was the sole caller to this function, lets remove it.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 include/net/sctp/sctp.h | 12 ------------
 net/sctp/output.c       |  3 ---
 2 files changed, 15 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 4965cbfa7d92c0f60a76dd6e03571f78209dda5a..f66d4435000799f523fbaa34b0a57dbdeebda040 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -606,16 +606,4 @@ static inline __u32 sctp_dst_mtu(const struct dst_entry *dst)
 				 SCTP_DEFAULT_MINSEGMENT));
 }
 
-static inline bool sctp_transport_pmtu_check(struct sctp_transport *t)
-{
-	__u32 pmtu = sctp_dst_mtu(t->dst);
-
-	if (t->pathmtu == pmtu)
-		return true;
-
-	t->pathmtu = pmtu;
-
-	return false;
-}
-
 #endif /* __net_sctp_h__ */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index bf4226c3cc1de79f2041d55a68ea53f4fd600b25..e672dee302c7092433a64ed3ed8bfcd183e1f9c8 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -118,9 +118,6 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 		sctp_transport_route(tp, NULL, sp);
 		if (asoc->param_flags & SPP_PMTUD_ENABLE)
 			sctp_assoc_sync_pmtu(asoc);
-	} else if (!sctp_transport_pmtu_check(tp)) {
-		if (asoc->param_flags & SPP_PMTUD_ENABLE)
-			sctp_assoc_sync_pmtu(asoc);
 	}
 
 	/* If there a is a prepend chunk stick it on the list before
-- 
2.14.3

^ permalink raw reply related

* [PATCH net-next v2 7/7] MAINTAINERS: Add entry for Microsemi Ethernet switches
From: Alexandre Belloni @ 2018-04-26 19:59 UTC (permalink / raw)
  To: David S . Miller
  Cc: Allan Nielsen, razvan.stefanescu, po.liu, Thomas Petazzoni,
	Andrew Lunn, Florian Fainelli, netdev, devicetree, linux-kernel,
	linux-mips, Alexandre Belloni
In-Reply-To: <20180426195931.5393-1-alexandre.belloni@bootlin.com>

Add myself as a maintainer for the Microsemi Ethernet switches.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0a1410d5a621..b632deb3f503 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9280,6 +9280,12 @@ F:	include/linux/cciss*.h
 F:	include/uapi/linux/cciss*.h
 F:	Documentation/scsi/smartpqi.txt
 
+MICROSEMI ETHERNET SWITCH DRIVER
+M:	Alexandre Belloni <alexandre.belloni@bootlin.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/ethernet/mscc/
+
 MICROSOFT SURFACE PRO 3 BUTTON DRIVER
 M:	Chen Yu <yu.c.chen@intel.com>
 L:	platform-driver-x86@vger.kernel.org
-- 
2.17.0

^ permalink raw reply related

* [net 5/7] net/mlx5: Avoid cleaning flow steering table twice during error flow
From: Saeed Mahameed @ 2018-04-26 19:58 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Talat Batheesh, Saeed Mahameed
In-Reply-To: <20180426195842.29665-1-saeedm@mellanox.com>

From: Talat Batheesh <talatb@mellanox.com>

When we fail to initialize the RX root namespace, we need
to clean only that and not the entire flow steering.

Currently the code may try to clean the flow steering twice
on error witch leads to null pointer deference.
Make sure we clean correctly.

Fixes: fba53f7b5719 ("net/mlx5: Introduce mlx5_flow_steering structure")
Signed-off-by: Talat Batheesh <talatb@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index de51e7c39bc8..2595c67ea39e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -187,6 +187,7 @@ static void del_sw_ns(struct fs_node *node);
 static void del_sw_hw_rule(struct fs_node *node);
 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
 				struct mlx5_flow_destination *d2);
+static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns);
 static struct mlx5_flow_rule *
 find_flow_rule(struct fs_fte *fte,
 	       struct mlx5_flow_destination *dest);
@@ -2351,23 +2352,27 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
 
 static int init_root_ns(struct mlx5_flow_steering *steering)
 {
+	int err;
+
 	steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
 	if (!steering->root_ns)
-		goto cleanup;
+		return -ENOMEM;
 
-	if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node))
-		goto cleanup;
+	err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node);
+	if (err)
+		goto out_err;
 
 	set_prio_attrs(steering->root_ns);
-
-	if (create_anchor_flow_table(steering))
-		goto cleanup;
+	err = create_anchor_flow_table(steering);
+	if (err)
+		goto out_err;
 
 	return 0;
 
-cleanup:
-	mlx5_cleanup_fs(steering->dev);
-	return -ENOMEM;
+out_err:
+	cleanup_root_ns(steering->root_ns);
+	steering->root_ns = NULL;
+	return err;
 }
 
 static void clean_tree(struct fs_node *node)
-- 
2.14.3

^ permalink raw reply related

* [net 4/7] net/mlx5e: TX, Use correct counter in dma_map error flow
From: Saeed Mahameed @ 2018-04-26 19:58 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Tariq Toukan, Saeed Mahameed
In-Reply-To: <20180426195842.29665-1-saeedm@mellanox.com>

From: Tariq Toukan <tariqt@mellanox.com>

In case of a dma_mapping_error, do not use wi->num_dma
as a parameter for dma unmap function because it's yet
to be set, and holds an out-of-date value.
Use actual value (local variable num_dma) instead.

Fixes: 34802a42b352 ("net/mlx5e: Do not modify the TX SKB")
Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 20297108528a..5532aa3675c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -255,7 +255,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		dma_addr = dma_map_single(sq->pdev, skb_data, headlen,
 					  DMA_TO_DEVICE);
 		if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
-			return -ENOMEM;
+			goto dma_unmap_wqe_err;
 
 		dseg->addr       = cpu_to_be64(dma_addr);
 		dseg->lkey       = sq->mkey_be;
@@ -273,7 +273,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz,
 					    DMA_TO_DEVICE);
 		if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
-			return -ENOMEM;
+			goto dma_unmap_wqe_err;
 
 		dseg->addr       = cpu_to_be64(dma_addr);
 		dseg->lkey       = sq->mkey_be;
@@ -285,6 +285,10 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	return num_dma;
+
+dma_unmap_wqe_err:
+	mlx5e_dma_unmap_wqe_err(sq, num_dma);
+	return -ENOMEM;
 }
 
 static inline void
@@ -380,17 +384,15 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen,
 					  (struct mlx5_wqe_data_seg *)cseg + ds_cnt);
 	if (unlikely(num_dma < 0))
-		goto dma_unmap_wqe_err;
+		goto err_drop;
 
 	mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma,
 			     num_bytes, num_dma, wi, cseg);
 
 	return NETDEV_TX_OK;
 
-dma_unmap_wqe_err:
+err_drop:
 	sq->stats.dropped++;
-	mlx5e_dma_unmap_wqe_err(sq, wi->num_dma);
-
 	dev_kfree_skb_any(skb);
 
 	return NETDEV_TX_OK;
@@ -645,17 +647,15 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen,
 					  (struct mlx5_wqe_data_seg *)cseg + ds_cnt);
 	if (unlikely(num_dma < 0))
-		goto dma_unmap_wqe_err;
+		goto err_drop;
 
 	mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma,
 			     num_bytes, num_dma, wi, cseg);
 
 	return NETDEV_TX_OK;
 
-dma_unmap_wqe_err:
+err_drop:
 	sq->stats.dropped++;
-	mlx5e_dma_unmap_wqe_err(sq, wi->num_dma);
-
 	dev_kfree_skb_any(skb);
 
 	return NETDEV_TX_OK;
-- 
2.14.3

^ permalink raw reply related

* [PATCH net-next 12/13] sctp: consider idata chunks when setting SCTP_MAXSEG
From: Marcelo Ricardo Leitner @ 2018-04-26 19:59 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
In-Reply-To: <cover.1524772453.git.marcelo.leitner@gmail.com>

When setting SCTP_MAXSEG sock option, it should consider which kind of
data chunk is being used if the asoc is already available, so that the
limit better reflect reality.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 net/sctp/socket.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ad8965835d8d51d4eda857b91dfca140710bf7da..2d35c8ea2470e7f5481bb9675ffd233eb3424d91 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3233,18 +3233,21 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
 		return -EINVAL;
 	}
 
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+
 	if (val) {
 		int min_len, max_len;
+		__u16 datasize = asoc ? sctp_datachk_len(&asoc->stream) :
+				 sizeof(struct sctp_data_chunk);
 
 		min_len = sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT,
-					   sizeof(struct sctp_data_chunk));
-		max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk);
+					   datasize);
+		max_len = SCTP_MAX_CHUNK_LEN - datasize;
 
 		if (val < min_len || val > max_len)
 			return -EINVAL;
 	}
 
-	asoc = sctp_id2assoc(sk, params.assoc_id);
 	if (asoc) {
 		if (val == 0) {
 			val = asoc->pathmtu - af->net_header_len;
-- 
2.14.3

^ permalink raw reply related

* [net 6/7] net/mlx5e: Fix traffic between VF and representor
From: Saeed Mahameed @ 2018-04-26 19:58 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Shahar Klein, Saeed Mahameed
In-Reply-To: <20180426195842.29665-1-saeedm@mellanox.com>

From: Shahar Klein <shahark@mellanox.com>

After the cited commit, WQE RQ size is calculated based on sw_mtu but it
was not set for representors. This commit fixes that.

Fixes: 472a1e44b349 ("net/mlx5e: Save MTU in channels params")
Signed-off-by: Shahar Klein <shahark@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index d8f68e4d1018..876c3e4c6193 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -877,13 +877,14 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
 };
 
 static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
-				   struct mlx5e_params *params)
+				   struct mlx5e_params *params, u16 mtu)
 {
 	u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
 					 MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
 					 MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 
 	params->hard_mtu    = MLX5E_ETH_HARD_MTU;
+	params->sw_mtu      = mtu;
 	params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
 	params->rq_wq_type  = MLX5_WQ_TYPE_LINKED_LIST;
 	params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE;
@@ -931,7 +932,7 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
 
 	priv->channels.params.num_channels = profile->max_nch(mdev);
 
-	mlx5e_build_rep_params(mdev, &priv->channels.params);
+	mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu);
 	mlx5e_build_rep_netdev(netdev);
 
 	mlx5e_timestamp_init(priv);
-- 
2.14.3

^ permalink raw reply related

* [PATCH net-next 11/13] sctp: honor PMTU_DISABLED when handling icmp
From: Marcelo Ricardo Leitner @ 2018-04-26 19:59 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Vlad Yasevich, Neil Horman, Xin Long
In-Reply-To: <cover.1524772453.git.marcelo.leitner@gmail.com>

sctp_sendmsg() could trigger PMTU updates even when PMTU_DISABLED was
set, as pmtu_pending could be set unconditionally during icmp handling
if the socket was in use by the application.

This patch fixes it by checking for PMTU_DISABLED when handling such
deferred updates.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 net/sctp/socket.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bb08d44b838bbbb0ffbf80b194a1291fed4069e4..ad8965835d8d51d4eda857b91dfca140710bf7da 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1893,6 +1893,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 				struct sctp_sndrcvinfo *sinfo)
 {
 	struct sock *sk = asoc->base.sk;
+	struct sctp_sock *sp = sctp_sk(sk);
 	struct net *net = sock_net(sk);
 	struct sctp_datamsg *datamsg;
 	bool wait_connect = false;
@@ -1911,13 +1912,14 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 			goto err;
 	}
 
-	if (sctp_sk(sk)->disable_fragments && msg_len > asoc->frag_point) {
+	if (sp->disable_fragments && msg_len > asoc->frag_point) {
 		err = -EMSGSIZE;
 		goto err;
 	}
 
 	if (asoc->pmtu_pending) {
-		sctp_assoc_sync_pmtu(asoc);
+		if (sp->param_flags & SPP_PMTUD_ENABLE)
+			sctp_assoc_sync_pmtu(asoc);
 		asoc->pmtu_pending = 0;
 	}
 
@@ -1936,7 +1938,7 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
 		if (err)
 			goto err;
 
-		if (sctp_sk(sk)->strm_interleave) {
+		if (sp->strm_interleave) {
 			timeo = sock_sndtimeo(sk, 0);
 			err = sctp_wait_for_connect(asoc, &timeo);
 			if (err)
-- 
2.14.3

^ permalink raw reply related

* [net 7/7] net/mlx5: Properly deal with flow counters when deleting rules
From: Saeed Mahameed @ 2018-04-26 19:58 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev, Chris Mi, Jianbo Liu, Saeed Mahameed
In-Reply-To: <20180426195842.29665-1-saeedm@mellanox.com>

From: Chris Mi <chrism@mellanox.com>

When deleting a flow counter, the modify mask should be the action and
the flow counter. Otherwise the flow counter is not deleted and we'll
get a firmware warning when deleting the remaining destinations on the
same FTE.

It only happens in the presence of flow counter and multiple vport
destinations. If there is only one vport destination, there is no
need to update the FTE when deleting the only vport destination,
we just delete the FTE.

Fixes: ae05831424ed ("net/mlx5: Add option to add fwd rule with counter")
Signed-off-by: Chris Mi <chrism@mellanox.com>
Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 2595c67ea39e..c39c1692e674 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -482,7 +482,8 @@ static void del_sw_hw_rule(struct fs_node *node)
 
 	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
 	    --fte->dests_size) {
-		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
+			      BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
 		fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
 		update_fte = true;
 		goto out;
-- 
2.14.3

^ permalink raw reply related

* [PATCH net-next v2 1/7] dt-bindings: net: add DT bindings for Microsemi MIIM
From: Alexandre Belloni @ 2018-04-26 19:59 UTC (permalink / raw)
  To: David S . Miller
  Cc: Allan Nielsen, razvan.stefanescu, po.liu, Thomas Petazzoni,
	Andrew Lunn, Florian Fainelli, netdev, devicetree, linux-kernel,
	linux-mips, Alexandre Belloni, Rob Herring
In-Reply-To: <20180426195931.5393-1-alexandre.belloni@bootlin.com>

DT bindings for the Microsemi MII Management Controller found on Microsemi
SoCs

Cc: Rob Herring <robh+dt@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 .../devicetree/bindings/net/mscc-miim.txt     | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/mscc-miim.txt

diff --git a/Documentation/devicetree/bindings/net/mscc-miim.txt b/Documentation/devicetree/bindings/net/mscc-miim.txt
new file mode 100644
index 000000000000..7104679cf59d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mscc-miim.txt
@@ -0,0 +1,26 @@
+Microsemi MII Management Controller (MIIM) / MDIO
+=================================================
+
+Properties:
+- compatible: must be "mscc,ocelot-miim"
+- reg: The base address of the MDIO bus controller register bank. Optionally, a
+  second register bank can be defined if there is an associated reset register
+  for internal PHYs
+- #address-cells: Must be <1>.
+- #size-cells: Must be <0>.  MDIO addresses have no size component.
+- interrupts: interrupt specifier (refer to the interrupt binding)
+
+Typically an MDIO bus might have several children.
+
+Example:
+	mdio@107009c {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "mscc,ocelot-miim";
+		reg = <0x107009c 0x36>, <0x10700f0 0x8>;
+		interrupts = <14>;
+
+		phy0: ethernet-phy@0 {
+			reg = <0>;
+		};
+	};
-- 
2.17.0

^ permalink raw reply related

* Re: [B.A.T.M.A.N.] [PATCH] batman-adv: fix batadv_interface_tx()'s return type
From: Luc Van Oostenryck @ 2018-04-26 20:05 UTC (permalink / raw)
  To: Sven Eckelmann
  Cc: b.a.t.m.a.n, linux-kernel, Marek Lindner, netdev,
	Antonio Quartulli
In-Reply-To: <2141097.l2ETxyu3Mo@sven-edge>

On Wed, Apr 25, 2018 at 07:35:00PM +0200, Sven Eckelmann wrote:
> On Dienstag, 24. April 2018 15:18:46 CEST Luc Van Oostenryck wrote:
> > The method ndo_start_xmit() is defined as returning an 'netdev_tx_t',
> > which is a typedef for an enum type, but the implementation in this
> > driver returns an 'int'.
> > 
> > Fix this by returning 'netdev_tx_t' in this driver too.
> > 
> > Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
> > ---
> >  net/batman-adv/soft-interface.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> Applied as 6bf9e4d39a58 [1] but the alignment was fixed

Thanks for this,
-- Luc 

^ permalink raw reply

* Re: [dm-devel] [PATCH v5] fault-injection: introduce kvmalloc fallback options
From: Mikulas Patocka @ 2018-04-26 20:05 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: James Bottomley, Michal Hocko, David Rientjes, dm-devel,
	eric.dumazet, netdev, jasowang, Randy Dunlap, linux-kernel,
	Matthew Wilcox, linux-mm, edumazet, Andrew Morton, virtualization,
	David Miller, Vlastimil Babka
In-Reply-To: <20180426223925-mutt-send-email-mst@kernel.org>



On Thu, 26 Apr 2018, Michael S. Tsirkin wrote:

> On Thu, Apr 26, 2018 at 03:36:14PM -0400, Mikulas Patocka wrote:
> > People on this list argue "this should be a kernel parameter".
> 
> How about making it a writeable attribute, so it's easy to turn on/off
> after boot. Then you can keep it deterministic, userspace can play with
> the attribute at random if it wants to.
> 
> -- 
> MST

It is already controllable by an attribute in debugfs.

Will you email all the testers about this attribute? How many of them will 
remember to set it? How many of them will remember to set it a year after? 
Will you write a userspace program that manages it and introduce it into 
the distributon?

This is a little feature.

Mikulas

^ permalink raw reply

* Re: [PATCH] net: aquantia: fix aq_ndev_start_xmit()'s return type
From: Luc Van Oostenryck @ 2018-04-26 20:37 UTC (permalink / raw)
  To: David Miller; +Cc: linux-kernel, igor.russkikh, netdev
In-Reply-To: <20180424.104250.1411072442966778574.davem@davemloft.net>

On Tue, Apr 24, 2018 at 10:42:50AM -0400, David Miller wrote:
> 
> Luc please don't submit such a huge number of patches all at one time.
> 
> ...
> 
> Finally, make this a true patch series.  It is so much easier for
> maintainers to work with a set of changes all doing the same thing if
> you make them a proper patch series with an appropriate "[PATCH 0/N] ..."
> header posting.
> 
> Thank you.

I suppose these sort of patches are as much a PITA for the sender
than for the receivers.

I hesitated between a single patch, a series or separated patches.
In a sense, the single patch would have been the easier for both sides
but I guessed it would not have been very well welcomed. Since for a
series, you're supposed to CC the whole series to everyone involved,
it would have been, or at least at thought so, maximaly noisy for no
good reasons. Finally, as all of these patches are totally independent,
I thought it would be the best to send them as separated patches, 
each drivers maintainers being then free to accept, reject or ignore
the patch(es) concerning him/her. It seems it was a bad guess, and
yes, I see the point of having a series for this.

I'll remember all this for the next time (if next time there is,
of course, I was already quite hesitant to spend time to prepare
and send patches for these issues with enum/integer mix-up).

Sorry for the annoyance,
-- Luc

^ permalink raw reply

* [PATCH] net/mlx5: report persistent netdev stats across ifdown/ifup commands
From: Qing Huang @ 2018-04-26 20:37 UTC (permalink / raw)
  To: linux-kernel, linux-rdma, netdev; +Cc: leon, matanb, saeedm, Qing Huang

Current stats collecting scheme in mlx5 driver is to periodically fetch
aggregated stats from all the active mlx5 software channels associated
with the device. However when a mlx5 interface is brought down(ifdown),
all the channels will be deactivated and closed. A new set of channels
will be created when next ifup command or a similar command is called.
Unfortunately the new channels will have all stats reset to 0. So you
lose the accumulated stats information. This behavior is different from
other netdev drivers including the mlx4 driver. In order to fix it, we
now save prior mlx5 software stats into netdev stats fields, so all the
accumulated stats will survive multiple runs of ifdown/ifup commands and
be shown correctly.

Orabug: 27548610

Signed-off-by: Qing Huang <qing.huang@oracle.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 30 +++++++++++++++++++----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f1fe490..5d50e69 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2621,6 +2621,23 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
 		netdev_set_tc_queue(netdev, tc, nch, 0);
 }
 
+static void mlx5e_netdev_save_stats(struct mlx5e_priv *priv)
+{
+	struct net_device *netdev = priv->netdev;
+
+	netdev->stats.rx_packets += priv->stats.sw.rx_packets;
+	netdev->stats.rx_bytes   += priv->stats.sw.rx_bytes;
+	netdev->stats.tx_packets += priv->stats.sw.tx_packets;
+	netdev->stats.tx_bytes   += priv->stats.sw.tx_bytes;
+	netdev->stats.tx_dropped += priv->stats.sw.tx_queue_dropped;
+
+	priv->stats.sw.rx_packets	= 0;
+	priv->stats.sw.rx_bytes		= 0;
+	priv->stats.sw.tx_packets	= 0;
+	priv->stats.sw.tx_bytes		= 0;
+	priv->stats.sw.tx_queue_dropped = 0;
+}
+
 static void mlx5e_build_channels_tx_maps(struct mlx5e_priv *priv)
 {
 	struct mlx5e_channel *c;
@@ -2691,6 +2708,7 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
 		netif_set_real_num_tx_queues(netdev, new_num_txqs);
 
 	mlx5e_deactivate_priv_channels(priv);
+	mlx5e_netdev_save_stats(priv);
 	mlx5e_close_channels(&priv->channels);
 
 	priv->channels = *new_chs;
@@ -2770,6 +2788,7 @@ int mlx5e_close_locked(struct net_device *netdev)
 
 	netif_carrier_off(priv->netdev);
 	mlx5e_deactivate_priv_channels(priv);
+	mlx5e_netdev_save_stats(priv);
 	mlx5e_close_channels(&priv->channels);
 
 	return 0;
@@ -3215,11 +3234,12 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
 		stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
 		stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
 	} else {
-		stats->rx_packets = sstats->rx_packets;
-		stats->rx_bytes   = sstats->rx_bytes;
-		stats->tx_packets = sstats->tx_packets;
-		stats->tx_bytes   = sstats->tx_bytes;
-		stats->tx_dropped = sstats->tx_queue_dropped;
+		stats->rx_packets = sstats->rx_packets + dev->stats.rx_packets;
+		stats->rx_bytes   = sstats->rx_bytes + dev->stats.rx_bytes;
+		stats->tx_packets = sstats->tx_packets + dev->stats.tx_packets;
+		stats->tx_bytes   = sstats->tx_bytes + dev->stats.tx_bytes;
+		stats->tx_dropped = sstats->tx_queue_dropped +
+				    dev->stats.tx_dropped;
 	}
 
 	stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH net-next v2 2/7] net: mscc: Add MDIO driver
From: Andrew Lunn @ 2018-04-26 20:46 UTC (permalink / raw)
  To: Alexandre Belloni
  Cc: David S . Miller, Allan Nielsen, razvan.stefanescu, po.liu,
	Thomas Petazzoni, Florian Fainelli, netdev, devicetree,
	linux-kernel, linux-mips
In-Reply-To: <20180426195931.5393-3-alexandre.belloni@bootlin.com>

On Thu, Apr 26, 2018 at 09:59:26PM +0200, Alexandre Belloni wrote:
> Add a driver for the Microsemi MII Management controller (MIIM) found on
> Microsemi SoCs.
> On Ocelot, there are two controllers, one is connected to the internal
> PHYs, the other one can communicate with external PHYs.
> 
> Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>

Reviewed-by: Andrew Lunn <andrew@lunn.ch>

    Andrew

^ permalink raw reply

* Re: [PATCHv3 3/3] tools bpftool: Display license GPL compatible in prog show/list
From: Daniel Borkmann @ 2018-04-26 20:49 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Jakub Kicinski, Jiri Olsa, Alexei Starovoitov, lkml, netdev,
	Quentin Monnet
In-Reply-To: <20180426081801.GK3396@krava>

On 04/26/2018 10:18 AM, Jiri Olsa wrote:
[...]
> v3 of the last patch attached, the branch is also updated
> 
> thanks,
> jirka
> 
> 
> ---
> Display the license "gpl" string in bpftool prog command, like:
> 
>   # bpftool prog list
>   5: tracepoint  name func  tag 57cd311f2e27366b  gpl
>           loaded_at Apr 26/09:37  uid 0
>           xlated 16B  not jited  memlock 4096B
> 
>   # bpftool --json --pretty prog show
>   [{
>           "id": 5,
>           "type": "tracepoint",
>           "name": "func",
>           "tag": "57cd311f2e27366b",
>           "gpl_compatible": true,
>           "loaded_at": "Apr 26/09:37",
>           "uid": 0,
>           "bytes_xlated": 16,
>           "jited": false,
>           "bytes_memlock": 4096
>       }
>   ]
> 
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>

Ok, v2 from prior two patches and v3 of this one applied to bpf-next. Please
next time always submit a fresh new series at once, thanks Jiri.

^ permalink raw reply

* Proposal
From: MS Zeliha Omer Faruk @ 2018-04-26 20:39 UTC (permalink / raw)





Hello

   Greetings to you today i asked before but i did't get a response please
i know this might come to you as a surprise because you do not know me
personally i have a business proposal for you please reply for more
info.



Best Regards,

Esentepe Mahallesi Büyükdere
Caddesi Kristal Kule Binasi
No:215
 Sisli - Istanbul, Turkey

^ permalink raw reply

* Re: [PATCH net-next v2 5/7] MIPS: mscc: Add switch to ocelot
From: Andrew Lunn @ 2018-04-26 20:51 UTC (permalink / raw)
  To: Alexandre Belloni
  Cc: David S . Miller, Allan Nielsen, razvan.stefanescu, po.liu,
	Thomas Petazzoni, Florian Fainelli, netdev, devicetree,
	linux-kernel, linux-mips, James Hogan
In-Reply-To: <20180426195931.5393-6-alexandre.belloni@bootlin.com>

On Thu, Apr 26, 2018 at 09:59:29PM +0200, Alexandre Belloni wrote:
> Ocelot has an integrated switch, add support for it.
> 
> Cc: James Hogan <jhogan@kernel.org>
> Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
> ---
>  arch/mips/boot/dts/mscc/ocelot.dtsi | 88 +++++++++++++++++++++++++++++
>  1 file changed, 88 insertions(+)
> 
> diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi
> index dd239cab2f9d..4f33dbc67348 100644
> --- a/arch/mips/boot/dts/mscc/ocelot.dtsi
> +++ b/arch/mips/boot/dts/mscc/ocelot.dtsi
> @@ -91,6 +91,72 @@
>  			status = "disabled";
>  		};
>  
> +		switch@1010000 {
> +			compatible = "mscc,vsc7514-switch";
> +			reg = <0x1010000 0x10000>,
> +			      <0x1030000 0x10000>,
> +			      <0x1080000 0x100>,
> +			      <0x10d0000 0x10000>,
> +			      <0x11e0000 0x100>,
> +			      <0x11f0000 0x100>,
> +			      <0x1200000 0x100>,
> +			      <0x1210000 0x100>,
> +			      <0x1220000 0x100>,
> +			      <0x1230000 0x100>,
> +			      <0x1240000 0x100>,
> +			      <0x1250000 0x100>,
> +			      <0x1260000 0x100>,
> +			      <0x1270000 0x100>,
> +			      <0x1280000 0x100>,
> +			      <0x1800000 0x80000>,
> +			      <0x1880000 0x10000>;
> +			reg-names = "sys", "rew", "qs", "hsio", "port0",
> +				    "port1", "port2", "port3", "port4", "port5",
> +				    "port6", "port7", "port8", "port9", "port10",
> +				    "qsys", "ana";
> +			interrupts = <21 22>;
> +			interrupt-names = "xtr", "inj";
> +
> +			ethernet-ports {
> +				#address-cells = <1>;
> +				#size-cells = <0>;
> +
> +				port0: port@0 {
> +					reg = <0>;
> +				};
> +				port1: port@1 {
> +					reg = <1>;
> +				};
> +				port2: port@2 {
> +					reg = <2>;
> +				};
> +				port3: port@3 {
> +					reg = <3>;
> +				};
> +				port4: port@4 {
> +					reg = <4>;
> +				};
> +				port5: port@5 {
> +					reg = <5>;
> +				};
> +				port6: port@6 {
> +					reg = <6>;
> +				};
> +				port7: port@7 {
> +					reg = <7>;
> +				};
> +				port8: port@8 {
> +					reg = <8>;
> +				};
> +				port9: port@9 {
> +					reg = <9>;
> +				};
> +				port10: port@10 {
> +					reg = <10>;
> +				};
> +			};
> +		};
> +
>  		reset@1070008 {
>  			compatible = "mscc,ocelot-chip-reset";
>  			reg = <0x1070008 0x4>;
> @@ -113,5 +179,27 @@
>  				function = "uart2";
>  			};
>  		};
> +
> +		mdio0: mdio@107009c {
> +			#address-cells = <1>;
> +			#size-cells = <0>;
> +			compatible = "mscc,ocelot-miim";
> +			reg = <0x107009c 0x36>, <0x10700f0 0x8>;
> +			interrupts = <14>;
> +			status = "disabled";
> +
> +			phy0: ethernet-phy@0 {
> +				reg = <0>;
> +			};
> +			phy1: ethernet-phy@1 {
> +				reg = <1>;
> +			};
> +			phy2: ethernet-phy@2 {
> +				reg = <2>;
> +			};
> +			phy3: ethernet-phy@3 {
> +				reg = <3>;
> +			};

Hi Alexandre

These are internal PHYs? Is there an option to use external PHYs for
the ports which have internal PHYs?

I'm just wondering if they should be linked together by default. Or a
comment added to the commit message about why they are not linked
together here.

	 Andrew

^ permalink raw reply

* Re: [bpf PATCH] bpf: fix uninitialized variable in bpf tools
From: Daniel Borkmann @ 2018-04-26 20:55 UTC (permalink / raw)
  To: John Fastabend, ast, jbenc; +Cc: netdev
In-Reply-To: <20180425220852.10403.79675.stgit@john-Precision-Tower-5810>

On 04/26/2018 12:08 AM, John Fastabend wrote:
> Here the variable cont is used as the saved_pointer for a call to
> strtok_r(). It is safe to use the value uninitialized in this
> context however and the later reference is only ever used if
> the strtok_r is successful. But, 'gcc-5' at least doesn't have all
> this knowledge so initialize cont to NULL. Additionally, do the
> natural NULL check before accessing just for completness.
> 
> The warning is the following:
> 
> ./bpf/tools/bpf/bpf_dbg.c: In function ‘cmd_load’:
> ./bpf/tools/bpf/bpf_dbg.c:1077:13: warning: ‘cont’ may be used uninitialized in this function [-Wmaybe-uninitialized]
>   } else if (matches(subcmd, "pcap") == 0) {
> 
> Fixes: fd981e3c321a "filter: bpf_dbg: add minimal bpf debugger"
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>

Applied to bpf tree, thanks John!

^ permalink raw reply

* [PATCHv2 bpf-next 0/2] BPF tunnel testsuite
From: William Tu @ 2018-04-26 21:01 UTC (permalink / raw)
  To: netdev

The patch series provide end-to-end eBPF tunnel testsute.  A common topology
is created below for all types of tunnels:

Topology:                                                                     
---------                                                                     
     root namespace   |     at_ns0 namespace                                   
                      |                                                        
      -----------     |     -----------                                        
      | tnl dev |     |     | tnl dev |  (overlay network)                     
      -----------     |     -----------                                        
      metadata-mode   |     native-mode                                        
       with bpf       |                                                        
                      |                                                        
      ----------      |     ----------                                         
      |  veth1  | --------- |  veth0  |  (underlay network)                    
      ----------    peer    ----------                                         
	                                                                              
                                                                               
Device Configuration                                                          
--------------------                                                          
 Root namespace with metadata-mode tunnel + BPF                                
 Device names and addresses:                                                   
       veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)                         
       tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)              
                                                                               
 Namespace at_ns0 with native tunnel                                           
 Device names and addresses:                                                   
       veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)                       
       tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)              
                                                                               
                                                                               
End-to-end ping packet flow                                                   
---------------------------                                                   
 Most of the tests start by namespace creation, device configuration,          
 then ping the underlay and overlay network.  When doing 'ping 10.1.1.100'     
 from root namespace, the following operations happen:                         
 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.       
 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,  
    with remote_ip=172.16.1.200 and others.                                    
 3) Outer tunnel header is prepended and route the packet to veth1's egress    
 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0      
 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet                   
 6) Forward the packet to the overlay tnl dev                                  

Test Cases
-----------------------------
 Tunnel Type |  BPF Programs
-----------------------------
 GRE:          gre_set_tunnel, gre_get_tunnel
 IP6GRE:       ip6gretap_set_tunnel, ip6gretap_get_tunnel
 ERSPAN:       erspan_set_tunnel, erspan_get_tunnel
 IP6ERSPAN:    ip4ip6erspan_set_tunnel, ip4ip6erspan_get_tunnel
 VXLAN:        vxlan_set_tunnel, vxlan_get_tunnel
 IP6VXLAN:     ip6vxlan_set_tunnel, ip6vxlan_get_tunnel
 GENEVE:       geneve_set_tunnel, geneve_get_tunnel
 IP6GENEVE:    ip6geneve_set_tunnel, ip6geneve_get_tunnel
 IPIP:         ipip_set_tunnel, ipip_get_tunnel
 IP6IP:        ipip6_set_tunnel, ipip6_get_tunnel,
               ip6ip6_set_tunnel, ip6ip6_get_tunnel
 XFRM:         xfrm_get_state

William Tu (2):
  selftests/bpf: bpf tunnel test.
  samples/bpf: remove the bpf tunnel testsuite.

 samples/bpf/Makefile                           |   1 -
 samples/bpf/tcbpf2_kern.c                      | 612 ---------------------
 samples/bpf/test_tunnel_bpf.sh                 | 390 -------------
 tools/testing/selftests/bpf/Makefile           |   5 +-
 tools/testing/selftests/bpf/test_tunnel.sh     | 729 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_tunnel_kern.c | 713 ++++++++++++++++++++++++
 6 files changed, 1445 insertions(+), 1005 deletions(-)
 delete mode 100644 samples/bpf/tcbpf2_kern.c
 delete mode 100755 samples/bpf/test_tunnel_bpf.sh
 create mode 100755 tools/testing/selftests/bpf/test_tunnel.sh
 create mode 100644 tools/testing/selftests/bpf/test_tunnel_kern.c

-- 
2.7.4

^ permalink raw reply

* [PATCHv2 bpf-next 1/2] selftests/bpf: bpf tunnel test.
From: William Tu @ 2018-04-26 21:01 UTC (permalink / raw)
  To: netdev
In-Reply-To: <1524776500-27030-1-git-send-email-u9012063@gmail.com>

The patch migrates the original tests at samples/bpf/tcbpf2_kern.c
and samples/bpf/test_tunnel_bpf.sh to selftests.  There are a couple
changes from the original:
    1) add ipv6 vxlan, ipv6 geneve, ipv6 ipip tests
    2) simplify the original ipip tests (remove iperf tests)
    3) improve documentation
    4) use bpf_ntoh* and bpf_hton* api

In summary, 'test_tunnel_kern.o' contains the following bpf program:
  GRE: gre_set_tunnel, gre_get_tunnel
  IP6GRE: ip6gretap_set_tunnel, ip6gretap_get_tunnel
  ERSPAN: erspan_set_tunnel, erspan_get_tunnel
  IP6ERSPAN: ip4ip6erspan_set_tunnel, ip4ip6erspan_get_tunnel
  VXLAN: vxlan_set_tunnel, vxlan_get_tunnel
  IP6VXLAN: ip6vxlan_set_tunnel, ip6vxlan_get_tunnel
  GENEVE: geneve_set_tunnel, geneve_get_tunnel
  IP6GENEVE: ip6geneve_set_tunnel, ip6geneve_get_tunnel
  IPIP: ipip_set_tunnel, ipip_get_tunnel
  IP6IP: ipip6_set_tunnel, ipip6_get_tunnel,
         ip6ip6_set_tunnel, ip6ip6_get_tunnel
  XFRM: xfrm_get_state

Signed-off-by: William Tu <u9012063@gmail.com>
---
 tools/testing/selftests/bpf/Makefile           |   5 +-
 tools/testing/selftests/bpf/test_tunnel.sh     | 729 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/test_tunnel_kern.c | 713 ++++++++++++++++++++++++
 3 files changed, 1445 insertions(+), 2 deletions(-)
 create mode 100755 tools/testing/selftests/bpf/test_tunnel.sh
 create mode 100644 tools/testing/selftests/bpf/test_tunnel_kern.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 0c19d5e08f08..b64a7a39cbc8 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -32,7 +32,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
 	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
 	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
 	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
-	test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o
+	test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
@@ -40,7 +40,8 @@ TEST_PROGS := test_kmod.sh \
 	test_xdp_redirect.sh \
 	test_xdp_meta.sh \
 	test_offload.py \
-	test_sock_addr.sh
+	test_sock_addr.sh \
+	test_tunnel.sh
 
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
new file mode 100755
index 000000000000..aeb2901f21f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -0,0 +1,729 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# End-to-end eBPF tunnel test suite
+#   The script tests BPF network tunnel implementation.
+#
+# Topology:
+# ---------
+#     root namespace   |     at_ns0 namespace
+#                      |
+#      -----------     |     -----------
+#      | tnl dev |     |     | tnl dev |  (overlay network)
+#      -----------     |     -----------
+#      metadata-mode   |     native-mode
+#       with bpf       |
+#                      |
+#      ----------      |     ----------
+#      |  veth1  | --------- |  veth0  |  (underlay network)
+#      ----------    peer    ----------
+#
+#
+# Device Configuration
+# --------------------
+# Root namespace with metadata-mode tunnel + BPF
+# Device names and addresses:
+# 	veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
+# 	tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
+#
+# Namespace at_ns0 with native tunnel
+# Device names and addresses:
+# 	veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+# 	tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
+#
+#
+# End-to-end ping packet flow
+# ---------------------------
+# Most of the tests start by namespace creation, device configuration,
+# then ping the underlay and overlay network.  When doing 'ping 10.1.1.100'
+# from root namespace, the following operations happen:
+# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+#    with remote_ip=172.16.1.200 and others.
+# 3) Outer tunnel header is prepended and route the packet to veth1's egress
+# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
+# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
+# 6) Forward the packet to the overlay tnl dev
+
+PING_ARG="-c 3 -w 10 -q"
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+config_device()
+{
+	ip netns add at_ns0
+	ip link add veth0 type veth peer name veth1
+	ip link set veth0 netns at_ns0
+	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip link set dev veth1 up mtu 1500
+	ip addr add dev veth1 172.16.1.200/24
+}
+
+add_gre_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+        ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE key 2 external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6gretap_tunnel()
+{
+
+	# assign ipv6 address
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
+		local ::11 remote ::22
+
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip addr add dev $DEV fc80::200/24
+	ip link set dev $DEV up
+}
+
+add_erspan_tunnel()
+{
+	# at_ns0 namespace
+	if [ "$1" == "v1" ]; then
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 1 erspan 123
+	else
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 2 erspan_dir egress erspan_hwid 3
+	fi
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6erspan_tunnel()
+{
+
+	# assign ipv6 address
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	if [ "$1" == "v1" ]; then
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local ::11 remote ::22 \
+		erspan_ver 1 erspan 123
+	else
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local ::11 remote ::22 \
+		erspan_ver 2 erspan_dir egress erspan_hwid 7
+	fi
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_vxlan_tunnel()
+{
+	# Set static ARP entry here because iptables set-mark works
+	# on L3 packet, as a result not applying to ARP packets,
+	# causing errors at get_tunnel_{key/opt}.
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		id 2 dstport 4789 gbp remote 172.16.1.200
+	ip netns exec at_ns0 \
+		ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
+	ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external gbp dstport 4789
+	ip link set dev $DEV address 52:54:00:d9:02:00 up
+	ip addr add dev $DEV 10.1.1.200/24
+	arp -s 10.1.1.100 52:54:00:d9:01:00
+}
+
+add_ip6vxlan_tunnel()
+{
+	#ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
+	ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	#ip -4 addr del 172.16.1.200 dev veth1
+	ip -6 addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
+		local ::11 remote ::22
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external dstport 4789
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_geneve_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		id 2 dstport 6081 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE dstport 6081 external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6geneve_tunnel()
+{
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE id 22 \
+		remote ::22     # geneve has no local option
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_ipip_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ipip6tnl_tunnel()
+{
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		local ::11 remote ::22
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+test_gre()
+{
+	TYPE=gretap
+	DEV_NS=gretap00
+	DEV=gretap11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_gre_tunnel
+	attach_bpf $DEV gre_set_tunnel gre_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+        if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gre()
+{
+	TYPE=ip6gre
+	DEV_NS=ip6gre00
+	DEV=ip6gre11
+	ret=0
+
+	check $TYPE
+	config_device
+	# reuse the ip6gretap function
+	add_ip6gretap_tunnel
+	attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# overlay: ipv4 over ipv6
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	# overlay: ipv6 over ipv6
+	ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+	check_err $?
+	cleanup
+
+        if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gretap()
+{
+	TYPE=ip6gretap
+	DEV_NS=ip6gretap00
+	DEV=ip6gretap11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6gretap_tunnel
+	attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# overlay: ipv4 over ipv6
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	# overlay: ipv6 over ipv6
+	ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_erspan()
+{
+	TYPE=erspan
+	DEV_NS=erspan00
+	DEV=erspan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_erspan_tunnel $1
+	attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6erspan()
+{
+	TYPE=ip6erspan
+	DEV_NS=ip6erspan00
+	DEV=ip6erspan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6erspan_tunnel $1
+	attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
+	ping6 $PING_ARG ::11
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_vxlan()
+{
+	TYPE=vxlan
+	DEV_NS=vxlan00
+	DEV=vxlan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_vxlan_tunnel
+	attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6vxlan()
+{
+	TYPE=vxlan
+	DEV_NS=ip6vxlan00
+	DEV=ip6vxlan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6vxlan_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# ip4 over ip6
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_geneve()
+{
+	TYPE=geneve
+	DEV_NS=geneve00
+	DEV=geneve11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_geneve_tunnel
+	attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6geneve()
+{
+	TYPE=geneve
+	DEV_NS=ip6geneve00
+	DEV=ip6geneve11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6geneve_tunnel
+	attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_ipip()
+{
+	TYPE=ipip
+	DEV_NS=ipip00
+	DEV=ipip11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ipip_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ipip6()
+{
+	TYPE=ip6tnl
+	DEV_NS=ipip6tnl00
+	DEV=ipip6tnl11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ipip6tnl_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# ip4 over ip6
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+setup_xfrm_tunnel()
+{
+	auth=0x$(printf '1%.0s' {1..40})
+	enc=0x$(printf '2%.0s' {1..32})
+	spi_in_to_out=0x1
+	spi_out_to_in=0x2
+	# at_ns0 namespace
+	# at_ns0 -> root
+	ip netns exec at_ns0 \
+		ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+			spi $spi_in_to_out reqid 1 mode tunnel \
+			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+	ip netns exec at_ns0 \
+		ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
+		tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+		mode tunnel
+	# root -> at_ns0
+	ip netns exec at_ns0 \
+		ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+			spi $spi_out_to_in reqid 2 mode tunnel \
+			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+	ip netns exec at_ns0 \
+		ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
+		tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+		mode tunnel
+	# address & route
+	ip netns exec at_ns0 \
+		ip addr add dev veth0 10.1.1.100/32
+	ip netns exec at_ns0 \
+		ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
+			src 10.1.1.100
+
+	# root namespace
+	# at_ns0 -> root
+	ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+		spi $spi_in_to_out reqid 1 mode tunnel \
+		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
+	ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
+		tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+		mode tunnel
+	# root -> at_ns0
+	ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+		spi $spi_out_to_in reqid 2 mode tunnel \
+		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
+	ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
+		tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+		mode tunnel
+	# address & route
+	ip addr add dev veth1 10.1.1.200/32
+	ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
+}
+
+test_xfrm_tunnel()
+{
+	config_device
+        #tcpdump -nei veth1 ip &
+	output=$(mktemp)
+	cat /sys/kernel/debug/tracing/trace_pipe | tee $output &
+        setup_xfrm_tunnel
+	tc qdisc add dev veth1 clsact
+	tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
+		sec xfrm_get_state
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	sleep 1
+	grep "reqid 1" $output
+	check_err $?
+	grep "spi 0x1" $output
+	check_err $?
+	grep "remote ip 0xac100164" $output
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: xfrm tunnel"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
+}
+
+attach_bpf()
+{
+	DEV=$1
+	SET=$2
+	GET=$3
+	tc qdisc add dev $DEV clsact
+	tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
+	tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
+}
+
+cleanup()
+{
+	ip netns delete at_ns0 2> /dev/null
+	ip link del veth1 2> /dev/null
+	ip link del ipip11 2> /dev/null
+	ip link del ipip6tnl11 2> /dev/null
+	ip link del gretap11 2> /dev/null
+	ip link del ip6gre11 2> /dev/null
+	ip link del ip6gretap11 2> /dev/null
+	ip link del vxlan11 2> /dev/null
+	ip link del ip6vxlan11 2> /dev/null
+	ip link del geneve11 2> /dev/null
+	ip link del ip6geneve11 2> /dev/null
+	ip link del erspan11 2> /dev/null
+	ip link del ip6erspan11 2> /dev/null
+}
+
+cleanup_exit()
+{
+	echo "CATCH SIGKILL or SIGINT, cleanup and exit"
+	cleanup
+	exit 0
+}
+
+check()
+{
+	ip link help $1 2>&1 | grep -q "^Usage:"
+	if [ $? -ne 0 ];then
+		echo "SKIP $1: iproute2 not support"
+	cleanup
+	return 1
+	fi
+}
+
+enable_debug()
+{
+	echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
+}
+
+check_err()
+{
+	if [ $ret -eq 0 ]; then
+		ret=$1
+	fi
+}
+
+bpf_tunnel_test()
+{
+	echo "Testing GRE tunnel..."
+	test_gre
+	echo "Testing IP6GRE tunnel..."
+	test_ip6gre
+	echo "Testing IP6GRETAP tunnel..."
+	test_ip6gretap
+	echo "Testing ERSPAN tunnel..."
+	test_erspan v2
+	echo "Testing IP6ERSPAN tunnel..."
+	test_ip6erspan v2
+	echo "Testing VXLAN tunnel..."
+	test_vxlan
+	echo "Testing IP6VXLAN tunnel..."
+	test_ip6vxlan
+	echo "Testing GENEVE tunnel..."
+	test_geneve
+	echo "Testing IP6GENEVE tunnel..."
+	test_ip6geneve
+	echo "Testing IPIP tunnel..."
+	test_ipip
+	echo "Testing IPIP6 tunnel..."
+	test_ipip6
+	echo "Testing IPSec tunnel..."
+	test_xfrm_tunnel
+}
+
+trap cleanup 0 3 6
+trap cleanup_exit 2 9
+
+cleanup
+bpf_tunnel_test
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_tunnel_kern.c b/tools/testing/selftests/bpf/test_tunnel_kern.c
new file mode 100644
index 000000000000..504df69c83df
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel_kern.c
@@ -0,0 +1,713 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 VMware
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/pkt_cls.h>
+#include <linux/erspan.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define ERROR(ret) do {\
+		char fmt[] = "ERROR line:%d ret:%d\n";\
+		bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
+	} while (0)
+
+int _version SEC("version") = 1;
+
+struct geneve_opt {
+	__be16	opt_class;
+	__u8	type;
+	__u8	length:5;
+	__u8	r3:1;
+	__u8	r2:1;
+	__u8	r1:1;
+	__u8	opt_data[8]; /* hard-coded to 8 byte */
+};
+
+struct vxlan_metadata {
+	__u32     gbp;
+};
+
+SEC("gre_set_tunnel")
+int _gre_set_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("gre_get_tunnel")
+int _gre_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "key %d remote ip 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+	return TC_ACT_OK;
+}
+
+SEC("ip6gretap_set_tunnel")
+int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+	key.tunnel_label = 0xabcde;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+				     BPF_F_SEQ_NUMBER);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6gretap_get_tunnel")
+int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip6 ::%x label %x\n";
+	struct bpf_tunnel_key key;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+	return TC_ACT_OK;
+}
+
+SEC("erspan_set_tunnel")
+int _erspan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&md, 0, sizeof(md));
+#ifdef ERSPAN_V1
+	md.version = 1;
+	md.u.index = bpf_htonl(123);
+#else
+	__u8 direction = 1;
+	__u8 hwid = 7;
+
+	md.version = 2;
+	md.u.md2.dir = direction;
+	md.u.md2.hwid = hwid & 0xf;
+	md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("erspan_get_tunnel")
+int _erspan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	__u32 index;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+	char fmt2[] = "\tindex %x\n";
+
+	index = bpf_ntohl(md.u.index);
+	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+	bpf_trace_printk(fmt2, sizeof(fmt2),
+			 md.u.md2.dir,
+			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+			 bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+	return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_set_tunnel")
+int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11);
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&md, 0, sizeof(md));
+
+#ifdef ERSPAN_V1
+	md.u.index = bpf_htonl(123);
+	md.version = 1;
+#else
+	__u8 direction = 0;
+	__u8 hwid = 17;
+
+	md.version = 2;
+	md.u.md2.dir = direction;
+	md.u.md2.hwid = hwid & 0xf;
+	md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_get_tunnel")
+int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	__u32 index;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+	char fmt2[] = "\tindex %x\n";
+
+	index = bpf_ntohl(md.u.index);
+	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+	bpf_trace_printk(fmt2, sizeof(fmt2),
+			 md.u.md2.dir,
+			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+			 bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+	return TC_ACT_OK;
+}
+
+SEC("vxlan_set_tunnel")
+int _vxlan_set_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("vxlan_get_tunnel")
+int _vxlan_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+	char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.gbp);
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_set_tunnel")
+int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 22;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_get_tunnel")
+int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip6 ::%x label %x\n";
+	struct bpf_tunnel_key key;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+	return TC_ACT_OK;
+}
+
+SEC("geneve_set_tunnel")
+int _geneve_set_tunnel(struct __sk_buff *skb)
+{
+	int ret, ret2;
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	__builtin_memset(&gopt, 0x0, sizeof(gopt));
+	gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+	gopt.type = 0x08;
+	gopt.r1 = 0;
+	gopt.r2 = 0;
+	gopt.r3 = 0;
+	gopt.length = 2; /* 4-byte multiple */
+	*(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("geneve_get_tunnel")
+int _geneve_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+	return TC_ACT_OK;
+}
+
+SEC("ip6geneve_set_tunnel")
+int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 22;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&gopt, 0x0, sizeof(gopt));
+	gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+	gopt.type = 0x08;
+	gopt.r1 = 0;
+	gopt.r2 = 0;
+	gopt.r3 = 0;
+	gopt.length = 2; /* 4-byte multiple */
+	*(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+
+	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6geneve_get_tunnel")
+int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip_set_tunnel")
+int _ipip_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.tunnel_ttl = 64;
+	if (iph->protocol == IPPROTO_ICMP) {
+		key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	} else {
+		if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
+			return TC_ACT_SHOT;
+
+		if (tcp->dest == bpf_htons(5200))
+			key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+		else if (tcp->dest == bpf_htons(5201))
+			key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
+		else
+			return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip_get_tunnel")
+int _ipip_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+	return TC_ACT_OK;
+}
+
+SEC("ipip6_set_tunnel")
+int _ipip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip6_get_tunnel")
+int _ipip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+			 bpf_htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_set_tunnel")
+int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct ipv6hdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.remote_ipv6[0] = bpf_htonl(0x2401db00);
+	key.tunnel_ttl = 64;
+
+	if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
+		key.remote_ipv6[3] = bpf_htonl(1);
+	} else {
+		if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
+			ERROR(iph->nexthdr);
+			return TC_ACT_SHOT;
+		}
+
+		if (tcp->dest == bpf_htons(5200)) {
+			key.remote_ipv6[3] = bpf_htonl(1);
+		} else if (tcp->dest == bpf_htons(5201)) {
+			key.remote_ipv6[3] = bpf_htonl(2);
+		} else {
+			ERROR(tcp->dest);
+			return TC_ACT_SHOT;
+		}
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_get_tunnel")
+int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+			 bpf_htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+SEC("xfrm_get_state")
+int _xfrm_get_state(struct __sk_buff *skb)
+{
+	struct bpf_xfrm_state x;
+	char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
+	int ret;
+
+	ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
+	if (ret < 0)
+		return TC_ACT_OK;
+
+	bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
+			 bpf_ntohl(x.remote_ipv4));
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.7.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox