Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v3 next-queue 03/10] ixgbe: add ipsec engine start and stop routines
From: Shannon Nelson @ 2017-12-19 23:59 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

Add in the code for running and stopping the hardware ipsec
encryption/decryption engine.  It is good to keep the engine
off when not in use in order to save on the power draw.

v2: add limiter to do-while loop waiting for paths to drain

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 142 +++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index bd7585f..85eaafc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -152,10 +152,152 @@ static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
 }
 
 /**
+ * ixgbe_ipsec_stop_data
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_stop_data(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	bool link = adapter->link_up;
+	u32 t_rdy, r_rdy;
+	u32 limit;
+	u32 reg;
+
+	/* halt data paths */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+	reg |= IXGBE_SECTXCTRL_TX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+	reg |= IXGBE_SECRXCTRL_RX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg);
+
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* If the tx fifo doesn't have link, but still has data,
+	 * we can't clear the tx sec block.  Set the MAC loopback
+	 * before block clear
+	 */
+	if (!link) {
+		reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+		reg |= IXGBE_MACC_FLU;
+		IXGBE_WRITE_REG(hw, IXGBE_MACC, reg);
+
+		reg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+		reg |= IXGBE_HLREG0_LPBK;
+		IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg);
+
+		IXGBE_WRITE_FLUSH(hw);
+		mdelay(3);
+	}
+
+	/* wait for the paths to empty */
+	limit = 20;
+	do {
+		mdelay(10);
+		t_rdy = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT) &
+			IXGBE_SECTXSTAT_SECTX_RDY;
+		r_rdy = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT) &
+			IXGBE_SECRXSTAT_SECRX_RDY;
+	} while (!t_rdy && !r_rdy && limit--);
+
+	/* undo loopback if we played with it earlier */
+	if (!link) {
+		reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+		reg &= ~IXGBE_MACC_FLU;
+		IXGBE_WRITE_REG(hw, IXGBE_MACC, reg);
+
+		reg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+		reg &= ~IXGBE_HLREG0_LPBK;
+		IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg);
+
+		IXGBE_WRITE_FLUSH(hw);
+	}
+}
+
+/**
+ * ixgbe_ipsec_stop_engine
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_stop_engine(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg;
+
+	ixgbe_ipsec_stop_data(adapter);
+
+	/* disable Rx and Tx SA lookup */
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, 0);
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, 0);
+
+	/* disable the Rx and Tx engines and full packet store-n-forward */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+	reg |= IXGBE_SECTXCTRL_SECTX_DIS;
+	reg &= ~IXGBE_SECTXCTRL_STORE_FORWARD;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+	reg |= IXGBE_SECRXCTRL_SECRX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg);
+
+	/* restore the "tx security buffer almost full threshold" to 0x250 */
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, 0x250);
+
+	/* Set minimum IFG between packets back to the default 0x1 */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+	reg = (reg & 0xfffffff0) | 0x1;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
+	/* final set for normal (no ipsec offload) processing */
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_SECTX_DIS);
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, IXGBE_SECRXCTRL_SECRX_DIS);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_start_engine
+ * @adapter: board private structure
+ *
+ * NOTE: this increases power consumption whether being used or not
+ **/
+static void ixgbe_ipsec_start_engine(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg;
+
+	ixgbe_ipsec_stop_data(adapter);
+
+	/* Set minimum IFG between packets to 3 */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+	reg = (reg & 0xfffffff0) | 0x3;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
+	/* Set "tx security buffer almost full threshold" to 0x15 so that the
+	 * almost full indication is generated only after buffer contains at
+	 * least an entire jumbo packet.
+	 */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXBUFFAF);
+	reg = (reg & 0xfffffc00) | 0x15;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, reg);
+
+	/* restart the data paths by clearing the DISABLE bits */
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, 0);
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_STORE_FORWARD);
+
+	/* enable Rx and Tx SA lookup */
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, IXGBE_RXTXIDX_IPS_EN);
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, IXGBE_RXTXIDX_IPS_EN);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
  * ixgbe_init_ipsec_offload - initialize security registers for IPSec operation
  * @adapter: board private structure
  **/
 void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
 {
 	ixgbe_ipsec_clear_hw_tables(adapter);
+	ixgbe_ipsec_stop_engine(adapter);
 }
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 next-queue 06/10] ixgbe: restore offloaded SAs after a reset
From: Shannon Nelson @ 2017-12-19 23:59 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

On a chip reset most of the table contents are lost, so must be
restored.  This scans the driver's ipsec tables and restores both
the filled and empty table slots to their pre-reset values.

v2: during restore, clean the tables before restarting

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h       |  2 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 41 ++++++++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |  1 +
 3 files changed, 44 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 8f41508..af690c2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -1010,8 +1010,10 @@ s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
 #ifdef CONFIG_XFRM_OFFLOAD
 void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
 void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter);
+void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
 #else
 static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { };
 static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { };
 #endif /* CONFIG_XFRM_OFFLOAD */
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 96971ce..9cf120d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -299,6 +299,47 @@ static void ixgbe_ipsec_start_engine(struct ixgbe_adapter *adapter)
 }
 
 /**
+ * ixgbe_ipsec_restore - restore the ipsec HW settings after a reset
+ * @adapter: board private structure
+ **/
+void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_IPSEC_ENABLED))
+		return;
+
+	/* clean up and restart the engine */
+	ixgbe_ipsec_stop_engine(adapter);
+	ixgbe_ipsec_clear_hw_tables(adapter);
+	ixgbe_ipsec_start_engine(adapter);
+
+	/* reload the IP addrs */
+	for (i = 0; i < IXGBE_IPSEC_MAX_RX_IP_COUNT; i++) {
+		struct rx_ip_sa *ipsa = &ipsec->ip_tbl[i];
+
+		if (ipsa->used)
+			ixgbe_ipsec_set_rx_ip(hw, i, ipsa->ipaddr);
+	}
+
+	/* reload the Rx and Tx keys */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+		struct rx_sa *rsa = &ipsec->rx_tbl[i];
+		struct tx_sa *tsa = &ipsec->tx_tbl[i];
+
+		if (rsa->used)
+			ixgbe_ipsec_set_rx_sa(hw, i, rsa->xs->id.spi,
+					      rsa->key, rsa->salt,
+					      rsa->mode, rsa->iptbl_ind);
+
+		if (tsa->used)
+			ixgbe_ipsec_set_tx_sa(hw, i, tsa->key, tsa->salt);
+	}
+}
+
+/**
  * ixgbe_ipsec_find_empty_idx - find the first unused security parameter index
  * @ipsec: pointer to ipsec struct
  * @rxtable: true if we need to look in the Rx table
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2b3da0c..04e8b26 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -5347,6 +5347,7 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 
 	ixgbe_set_rx_mode(adapter->netdev);
 	ixgbe_restore_vlan(adapter);
+	ixgbe_ipsec_restore(adapter);
 
 	switch (hw->mac.type) {
 	case ixgbe_mac_82599EB:
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 next-queue 10/10] ixgbe: register ipsec offload with the xfrm subsystem
From: Shannon Nelson @ 2017-12-20  0:00 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

With all the support code in place we can now link in the ipsec
offload operations and set the ESP feature flag for the XFRM
subsystem to see.

v3: added ifdef CONFIG_XFRM_OFFLOAD in ixgbe_features_check

v2: added the xdo_dev_state_free callback to make XFRM happy
    changed use of NETIF_F_HW_CSUM_BIT to NETIF_F_HW_CSUM

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 17 +++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |  6 ++++++
 2 files changed, 23 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 4b16466..424dbf7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -715,10 +715,23 @@ static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
 	return true;
 }
 
+/**
+ * ixgbe_ipsec_free - called by xfrm garbage collections
+ * @xs: pointer to transformer state struct
+ *
+ * We don't have any garbage to collect, so we shouldn't bother
+ * implementing this function, but the XFRM code doesn't check for
+ * existence before calling the API callback.
+ **/
+static void ixgbe_ipsec_free(struct xfrm_state *xs)
+{
+}
+
 static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
 	.xdo_dev_state_add = ixgbe_ipsec_add_sa,
 	.xdo_dev_state_delete = ixgbe_ipsec_del_sa,
 	.xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
+	.xdo_dev_state_free = ixgbe_ipsec_free,
 };
 
 /**
@@ -877,6 +890,10 @@ void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
 	ixgbe_ipsec_stop_engine(adapter);
 	ixgbe_ipsec_clear_hw_tables(adapter);
 
+	adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
+	adapter->netdev->features |= NETIF_F_HW_ESP;
+	adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
 	return;
 
 err2:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 8c88d32..2f56309 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9798,6 +9798,12 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
 	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
 		features &= ~NETIF_F_TSO;
 
+#ifdef CONFIG_XFRM_OFFLOAD
+	/* IPsec offload doesn't get along well with others *yet* */
+	if (skb->sp)
+		features &= ~(NETIF_F_TSO | NETIF_F_HW_CSUM);
+#endif
+
 	return features;
 }
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 next-queue 02/10] ixgbe: add ipsec register access routines
From: Shannon Nelson @ 2017-12-19 23:59 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

Add a few routines to make access to the ipsec registers just a little
easier, and throw in the beginnings of an initialization.

v3: fixed a couple checkpatch warnings

v2: Rx table selector becomes an enum with a shift
    Combine the clear table loops into one
    Name the table index shift value
    Use the addr as __be32

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/Makefile      |   1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe.h       |   6 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 161 +++++++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h |  53 ++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |   1 +
 5 files changed, 222 insertions(+)
 create mode 100644 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
 create mode 100644 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h

diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index 35e6fa6..8319465 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -42,3 +42,4 @@ ixgbe-$(CONFIG_IXGBE_DCB) +=  ixgbe_dcb.o ixgbe_dcb_82598.o \
 ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o
 ixgbe-$(CONFIG_DEBUG_FS) += ixgbe_debugfs.o
 ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o
+ixgbe-$(CONFIG_XFRM_OFFLOAD) += ixgbe_ipsec.o
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index dd55787..1e11462 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -52,6 +52,7 @@
 #ifdef CONFIG_IXGBE_DCA
 #include <linux/dca.h>
 #endif
+#include "ixgbe_ipsec.h"
 
 #include <net/busy_poll.h>
 
@@ -1001,4 +1002,9 @@ void ixgbe_store_key(struct ixgbe_adapter *adapter);
 void ixgbe_store_reta(struct ixgbe_adapter *adapter);
 s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
 		       u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
+#ifdef CONFIG_XFRM_OFFLOAD
+void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
+#else
+static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { };
+#endif /* CONFIG_XFRM_OFFLOAD */
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
new file mode 100644
index 0000000..bd7585f
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -0,0 +1,161 @@
+/*******************************************************************************
+ *
+ * Intel 10 Gigabit PCI Express Linux driver
+ * Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * Linux NICS <linux.nics@intel.com>
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include "ixgbe.h"
+
+/**
+ * ixgbe_ipsec_set_tx_sa - set the Tx SA registers
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @key: key byte array
+ * @salt: salt bytes
+ **/
+static void ixgbe_ipsec_set_tx_sa(struct ixgbe_hw *hw, u16 idx,
+				  u32 key[], u32 salt)
+{
+	u32 reg;
+	int i;
+
+	for (i = 0; i < 4; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(i), cpu_to_be32(key[3 - i]));
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXSALT, cpu_to_be32(salt));
+	IXGBE_WRITE_FLUSH(hw);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_IPSTXIDX);
+	reg &= IXGBE_RXTXIDX_IPS_EN;
+	reg |= idx << IXGBE_RXTXIDX_IDX_SHIFT | IXGBE_RXTXIDX_WRITE;
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, reg);
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_item - set an Rx table item
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @tbl: table selector
+ *
+ * Trigger the device to store into a particular Rx table the
+ * data that has already been loaded into the input register
+ **/
+static void ixgbe_ipsec_set_rx_item(struct ixgbe_hw *hw, u16 idx,
+				    enum ixgbe_ipsec_tbl_sel tbl)
+{
+	u32 reg;
+
+	reg = IXGBE_READ_REG(hw, IXGBE_IPSRXIDX);
+	reg &= IXGBE_RXTXIDX_IPS_EN;
+	reg |= tbl << IXGBE_RXIDX_TBL_SHIFT |
+	       idx << IXGBE_RXTXIDX_IDX_SHIFT |
+	       IXGBE_RXTXIDX_WRITE;
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, reg);
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_sa - set up the register bits to save SA info
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @spi: security parameter index
+ * @key: key byte array
+ * @salt: salt bytes
+ * @mode: rx decrypt control bits
+ * @ip_idx: index into IP table for related IP address
+ **/
+static void ixgbe_ipsec_set_rx_sa(struct ixgbe_hw *hw, u16 idx, __be32 spi,
+				  u32 key[], u32 salt, u32 mode, u32 ip_idx)
+{
+	int i;
+
+	/* store the SPI (in bigendian) and IPidx */
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXSPI, spi);
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPIDX, ip_idx);
+	IXGBE_WRITE_FLUSH(hw);
+
+	ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_spi_tbl);
+
+	/* store the key, salt, and mode */
+	for (i = 0; i < 4; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(i), cpu_to_be32(key[3 - i]));
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXSALT, cpu_to_be32(salt));
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXMOD, mode);
+	IXGBE_WRITE_FLUSH(hw);
+
+	ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_key_tbl);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_ip - set up the register bits to save SA IP addr info
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @addr: IP address byte array
+ **/
+static void ixgbe_ipsec_set_rx_ip(struct ixgbe_hw *hw, u16 idx, __be32 addr[])
+{
+	int i;
+
+	/* store the ip address */
+	for (i = 0; i < 4; i++)
+		IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(i), addr[i]);
+	IXGBE_WRITE_FLUSH(hw);
+
+	ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_ip_tbl);
+}
+
+/**
+ * ixgbe_ipsec_clear_hw_tables - because some tables don't get cleared on reset
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 buf[4] = {0, 0, 0, 0};
+	u16 idx;
+
+	/* disable Rx and Tx SA lookup */
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, 0);
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, 0);
+
+	/* scrub the tables - split the loops for the max of the IP table */
+	for (idx = 0; idx < IXGBE_IPSEC_MAX_RX_IP_COUNT; idx++) {
+		ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
+		ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
+		ixgbe_ipsec_set_rx_ip(hw, idx, (__be32 *)buf);
+	}
+	for (; idx < IXGBE_IPSEC_MAX_RX_IP_COUNT; idx++) {
+		ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
+		ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
+	}
+}
+
+/**
+ * ixgbe_init_ipsec_offload - initialize security registers for IPSec operation
+ * @adapter: board private structure
+ **/
+void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
+{
+	ixgbe_ipsec_clear_hw_tables(adapter);
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
new file mode 100644
index 0000000..8fe8289
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -0,0 +1,53 @@
+/*******************************************************************************
+
+  Intel 10 Gigabit PCI Express Linux driver
+  Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program.  If not, see <http://www.gnu.org/licenses/>.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Contact Information:
+  Linux NICS <linux.nics@intel.com>
+  e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_IPSEC_H_
+#define _IXGBE_IPSEC_H_
+
+#define IXGBE_IPSEC_MAX_SA_COUNT	1024
+#define IXGBE_IPSEC_MAX_RX_IP_COUNT	128
+#define IXGBE_IPSEC_BASE_RX_INDEX	0
+#define IXGBE_IPSEC_BASE_TX_INDEX	IXGBE_IPSEC_MAX_SA_COUNT
+
+#define IXGBE_RXTXIDX_IPS_EN		0x00000001
+#define IXGBE_RXIDX_TBL_SHIFT		1
+enum ixgbe_ipsec_tbl_sel {
+	ips_rx_ip_tbl	=	0x01,
+	ips_rx_spi_tbl	=	0x02,
+	ips_rx_key_tbl	=	0x03,
+};
+
+#define IXGBE_RXTXIDX_IDX_SHIFT		3
+#define IXGBE_RXTXIDX_READ		0x40000000
+#define IXGBE_RXTXIDX_WRITE		0x80000000
+
+#define IXGBE_RXMOD_VALID		0x00000001
+#define IXGBE_RXMOD_PROTO_ESP		0x00000004
+#define IXGBE_RXMOD_DECRYPT		0x00000008
+#define IXGBE_RXMOD_IPV6		0x00000010
+
+#endif /* _IXGBE_IPSEC_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 6d5f31e..51fb3cf 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10327,6 +10327,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 					 NETIF_F_FCOE_MTU;
 	}
 #endif /* IXGBE_FCOE */
+	ixgbe_init_ipsec_offload(adapter);
 
 	if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
 		netdev->hw_features |= NETIF_F_LRO;
-- 
2.7.4

^ permalink raw reply related

* Re: Linux ECN Handling
From: Neal Cardwell @ 2017-12-20  0:08 UTC (permalink / raw)
  To: Steve Ibanez
  Cc: Eric Dumazet, Yuchung Cheng, Daniel Borkmann, Netdev,
	Florian Westphal, Mohammad Alizadeh, Lawrence Brakmo
In-Reply-To: <CACJspmKv1sK9CeLsUi07-6=o=g+3RBbXhpf16Fv7-TZ-StM8Xw@mail.gmail.com>

On Tue, Dec 19, 2017 at 5:00 PM, Steve Ibanez <sibanez@stanford.edu> wrote:
> Hi Neal,
>
> I managed to track down the code path that the unACKed CWR packet is
> taking. The tcp_rcv_established() function calls tcp_ack_snd_check()
> at the end of step5 and then the return statement indicated below is
> invoked, which prevents the __tcp_ack_snd_check() function from
> running.
>
> static inline void tcp_ack_snd_check(struct sock *sk)
> {
>         if (!inet_csk_ack_scheduled(sk)) {
>                 /* We sent a data segment already. */
>                 return;   /* <=== here */
>         }
>         __tcp_ack_snd_check(sk, 1);
> }
>
> So somehow tcp_ack_snd_check() thinks that a data segment was already
> sent when in fact it wasn't. Do you see a way around this issue?

Thanks for tracking that down! AFAICT in this case the call chain we
are trying to achieve is as follows:

tcp_rcv_established()
 -> tcp_data_queue()
 -> tcp_event_data_recv()
 -> inet_csk_schedule_ack()

The only think I can think of would be to add printks that fire for
CWR packets, to isolate why the code bails out before it reaches those
calls...

thanks,
neal

^ permalink raw reply

* RE: [PATCH net-next] netdevsim: correctly check return value of debugfs_create_dir
From: Prashant Bhole @ 2017-12-20  0:16 UTC (permalink / raw)
  To: 'David Miller'; +Cc: jakub.kicinski, netdev
In-Reply-To: <20171219.091152.920867492172236638.davem@davemloft.net>

> From: David Miller [mailto:davem@davemloft.net]
> 
> From: "Prashant Bhole" <bhole_prashant_q7@lab.ntt.co.jp>
> Date: Tue, 19 Dec 2017 13:45:47 +0900
> 
> > I tried to evaluate whether fixing return value of
> > debugfs_create_dir() (and
> > friends) will be useful or not because it has not been changed since
> > very long time. Now I am not much convinced about changing this api.
> >
> > Important and possible error codes could be -EEXIST and -ENOMEM.
> > Suppose -EEXIST is returned, IMO the directory shouldn't exists in the
> > first place because it is specific to particular module. Also, there
> > is no point in creating file in such directory, because directory
> > owner (creator) might remove it too. This means there are less chances
> > that api change will be useful. Please let me know your opinion on it.
> >
> > If you are ok with above explanation, shall I submit v2 for this patch?
> 
> Well, something is seriously wrong if the directory exists already.
> 
> It could be that two netdevsim modules, independantly compiled, are trying
to
> be loaded.
> 
> Wouldn't it clearly be desirable to fail and not load the module in that
case?
> 
> This is why I think ignoring debugfs errors is foolish.

Right. I am planning to do following (quoting previous mail), In debugfs
error will not be ignored in modules load.
-----------
Dave,
Thanks for comments. I will try to fix error handling in netdevsim first.

Jakub,
Let's decide with an example. The typical directory structure for netdevsim
interface is as below:
/sys/kernel/debug/netdevsim/sim0/bpf_bound_progs/
Please let me know if you are ok with following:

1) If debugfs_create_dir() fails in module_init, let's keep it fatal error
with corrected condition:
+	if (IS_ERR_OR_NULL(nsim_ddir))
+		return -ENOMEM;

2) In case sim0 or bpf_bound_progs are  fail to create, we need to add
checks before creating any file in them.
-----------

Shall I submit v2?

-Prashant

^ permalink raw reply

* Re: [RFC PATCH] virtio_net: Extend virtio to use VF datapath when available
From: Samudrala, Sridhar @ 2017-12-20  0:26 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: David Miller, mst, netdev, virtualization, alexander.duyck,
	jesse.brandeburg
In-Reply-To: <20171219145345.3c261273@xeon-e3>

On 12/19/2017 2:53 PM, Stephen Hemminger wrote:
> On Tue, 19 Dec 2017 14:37:50 -0800
> "Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:
>
>> On 12/19/2017 11:46 AM, Stephen Hemminger wrote:
>>> On Tue, 19 Dec 2017 11:42:33 -0800
>>> "Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:
>>>   
>>>> On 12/19/2017 10:41 AM, Stephen Hemminger wrote:
>>>>> On Tue, 19 Dec 2017 13:21:17 -0500 (EST)
>>>>> David Miller <davem@davemloft.net> wrote:
>>>>>      
>>>>>> From: Stephen Hemminger <stephen@networkplumber.org>
>>>>>> Date: Tue, 19 Dec 2017 09:55:48 -0800
>>>>>>      
>>>>>>> could be 10ms, just enough to let udev do its renaming
>>>>>> Please, move to some kind of notification or event based handling of
>>>>>> this problem.
>>>>>>
>>>>>> No delay is safe, what if userspace gets swapped out or whatever
>>>>>> else might make userspace stall unexpectedly?
>>>>>>      
>>>>> The plan is to remove the delay and do the naming in the kernel.
>>>>> This was suggested by Lennart since udev is only doing naming policy
>>>>> because kernel names were not repeatable.
>>>>>
>>>>> This makes the VF show up as "ethN_vf" on Hyper-V which is user friendly.
>>>>>
>>>>> Patch is pending.
>>>> Do we really need to delay the setup until the name is changed?
>>>> Can't we call dev_set_mtu() and dev_open() until dev_change_name() is done?
>>>>
>>>> Thanks
>>>> Sridhar
>>> You can call dev_set_mtu, but when dev_open is done the device name
>>> can not be changed by userspace.
>> I did a quick test to remove the delay and also the dev_open() call and
>> i don't see
>> any issues with virtio taking over the VF datapath.
>> Only the netdev_info() messages may show old device name.
>>
>> Any specific scenario where we need to explicitly call  VF's dev_open()
>> in the VF setup process?
>> I tried i40evf driver loaded after virtio_net  AND  virtio_net loading
>> after i40evf.
>>
> It happens with hotplug. It is possible on Hyper-V to hotplug SR-IOV on
> and off while guest is running. If SR-IOV is disabled in host then the
> VF device is removed (hotplug) and the inverse. If the master device is
> up then the VF device should be brought up by the master device.

Even with KVM, we need to hot unplug SR-IOV device on the source host 
and  plug it
back on the destination host to enable live migration. It all works for 
me even without
the dev_open() of the lower device  from the VF setup routine. Will send 
out a v2 version
of this patch without the delayed VF setup after some more testing next 
week.

Thanks
Sridhar

^ permalink raw reply

* Re: [PATCH net-next] netdevsim: correctly check return value of debugfs_create_dir
From: Jakub Kicinski @ 2017-12-20  0:30 UTC (permalink / raw)
  To: Prashant Bhole; +Cc: 'David Miller', netdev
In-Reply-To: <024601d37927$b7e83410$27b89c30$@lab.ntt.co.jp>

On Wed, 20 Dec 2017 09:16:01 +0900, Prashant Bhole wrote:
> > From: David Miller [mailto:davem@davemloft.net]
> > 
> > From: "Prashant Bhole" <bhole_prashant_q7@lab.ntt.co.jp>
> > Date: Tue, 19 Dec 2017 13:45:47 +0900
> >   
> > > I tried to evaluate whether fixing return value of
> > > debugfs_create_dir() (and
> > > friends) will be useful or not because it has not been changed since
> > > very long time. Now I am not much convinced about changing this api.
> > >
> > > Important and possible error codes could be -EEXIST and -ENOMEM.
> > > Suppose -EEXIST is returned, IMO the directory shouldn't exists in the
> > > first place because it is specific to particular module. Also, there
> > > is no point in creating file in such directory, because directory
> > > owner (creator) might remove it too. This means there are less chances
> > > that api change will be useful. Please let me know your opinion on it.
> > >
> > > If you are ok with above explanation, shall I submit v2 for this patch?  
> > 
> > Well, something is seriously wrong if the directory exists already.
> > 
> > It could be that two netdevsim modules, independantly compiled, are trying  
> to
> > be loaded.
> > 
> > Wouldn't it clearly be desirable to fail and not load the module in that  
> case?
> > 
> > This is why I think ignoring debugfs errors is foolish.  
> 
> Right. I am planning to do following (quoting previous mail), In debugfs
> error will not be ignored in modules load.
> -----------
> Dave,
> Thanks for comments. I will try to fix error handling in netdevsim first.
> 
> Jakub,
> Let's decide with an example. The typical directory structure for netdevsim
> interface is as below:
> /sys/kernel/debug/netdevsim/sim0/bpf_bound_progs/
> Please let me know if you are ok with following:
> 
> 1) If debugfs_create_dir() fails in module_init, let's keep it fatal error
> with corrected condition:
> +	if (IS_ERR_OR_NULL(nsim_ddir))
> +		return -ENOMEM;

Ack.

> 2) In case sim0 or bpf_bound_progs are  fail to create, we need to add
> checks before creating any file in them.

What do you mean by "check before"?  Checking if creation of each file
fails or not, or something different?

^ permalink raw reply

* Re: [PATCH bpf-next] libbpf: Fix build errors.
From: Daniel Borkmann @ 2017-12-20  0:36 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20171219.155311.134149966959114257.davem@davemloft.net>

On 12/19/2017 09:53 PM, David Miller wrote:
> 
> These elf object pieces are of type Elf64_Xword and therefore could be
> "long long" on some builds.
> 
> Cast to "long long" and use printf format %lld to deal with this since
> we are building with -Werror=format.
> 
> Signed-off-by: David S. Miller <davem@davemloft.net>

Applied to bpf-next, thanks David!

^ permalink raw reply

* RE: [PATCH net-next] netdevsim: correctly check return value of debugfs_create_dir
From: Prashant Bhole @ 2017-12-20  0:38 UTC (permalink / raw)
  To: 'Jakub Kicinski'; +Cc: 'David Miller', netdev
In-Reply-To: <20171219163014.1434bb62@cakuba.netronome.com>

> From: Jakub Kicinski [mailto:jakub.kicinski@netronome.com]
> 
> On Wed, 20 Dec 2017 09:16:01 +0900, Prashant Bhole wrote:
> > > From: David Miller [mailto:davem@davemloft.net]
> > >
> > > From: "Prashant Bhole" <bhole_prashant_q7@lab.ntt.co.jp>
> > > Date: Tue, 19 Dec 2017 13:45:47 +0900
> > >
> > > > I tried to evaluate whether fixing return value of
> > > > debugfs_create_dir() (and
> > > > friends) will be useful or not because it has not been changed
> > > > since very long time. Now I am not much convinced about changing
this api.
> > > >
> > > > Important and possible error codes could be -EEXIST and -ENOMEM.
> > > > Suppose -EEXIST is returned, IMO the directory shouldn't exists in
> > > > the first place because it is specific to particular module. Also,
> > > > there is no point in creating file in such directory, because
> > > > directory owner (creator) might remove it too. This means there
> > > > are less chances that api change will be useful. Please let me know
your
> opinion on it.
> > > >
> > > > If you are ok with above explanation, shall I submit v2 for this
patch?
> > >
> > > Well, something is seriously wrong if the directory exists already.
> > >
> > > It could be that two netdevsim modules, independantly compiled, are
> > > trying
> > to
> > > be loaded.
> > >
> > > Wouldn't it clearly be desirable to fail and not load the module in
> > > that
> > case?
> > >
> > > This is why I think ignoring debugfs errors is foolish.
> >
> > Right. I am planning to do following (quoting previous mail), In
> > debugfs error will not be ignored in modules load.
> > -----------
> > Dave,
> > Thanks for comments. I will try to fix error handling in netdevsim
first.
> >
> > Jakub,
> > Let's decide with an example. The typical directory structure for
> > netdevsim interface is as below:
> > /sys/kernel/debug/netdevsim/sim0/bpf_bound_progs/
> > Please let me know if you are ok with following:
> >
> > 1) If debugfs_create_dir() fails in module_init, let's keep it fatal
> > error with corrected condition:
> > +	if (IS_ERR_OR_NULL(nsim_ddir))
> > +		return -ENOMEM;
> 
> Ack.
> 
> > 2) In case sim0 or bpf_bound_progs are  fail to create, we need to add
> > checks before creating any file in them.
> 
> What do you mean by "check before"?  Checking if creation of each file
fails or
> not, or something different?

For example:
I will check if state->ddir is not NULL before creating files in it.

if (state->ddir) {
	debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id);
	debugfs_create_file("state", 0400, state->ddir,
			    &state->state, &nsim_bpf_string_fops);
	debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded);
}

-Prashant

^ permalink raw reply

* Re: [PATCH net-next] netdevsim: correctly check return value of debugfs_create_dir
From: Jakub Kicinski @ 2017-12-20  0:45 UTC (permalink / raw)
  To: Prashant Bhole; +Cc: 'David Miller', netdev
In-Reply-To: <024a01d3792a$e92c7ab0$bb857010$@lab.ntt.co.jp>

On Wed, 20 Dec 2017 09:38:52 +0900, Prashant Bhole wrote:
> > > 2) In case sim0 or bpf_bound_progs are  fail to create, we need to add
> > > checks before creating any file in them.  
> > 
> > What do you mean by "check before"?  Checking if creation of each file  
> > fails or not, or something different?  
> 
> For example:
> I will check if state->ddir is not NULL before creating files in it.
> 
> if (state->ddir) {
> 	debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id);
> 	debugfs_create_file("state", 0400, state->ddir,
> 			    &state->state, &nsim_bpf_string_fops);
> 	debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded);
> }

Ah, I would just error out in case we can't create any of the
sub-directories as well.

^ permalink raw reply

* Re: [PATCH net] enic: add wq clean up budget
From: Govindarajulu Varadarajan @ 2017-12-20  0:37 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, govindarajulu90, benve
In-Reply-To: <alpine.LNX.2.20.1712081528060.28747@cae-iprp-alln-lb.cisco.com>

On Fri, 8 Dec 2017, Govindarajulu Varadarajan wrote:

> On Wed, 6 Dec 2017, David Miller wrote:
>
>> From: Govindarajulu Varadarajan <gvaradar@cisco.com>
>> Date: Tue,  5 Dec 2017 11:14:41 -0800
>> 
>>> In case of tx clean up, we set '-1' as budget. This means clean up until
>>> wq is empty or till (1 << 32) pkts are cleaned. Under heavy load this
>>> will run for long time and cause
>>> "watchdog: BUG: soft lockup - CPU#25 stuck for 21s!" warning.
>>> 
>>> This patch sets wq clean up budget to 256.
>>> 
>>> Signed-off-by: Govindarajulu Varadarajan <gvaradar@cisco.com>
>> 
>> This driver with all of it's indirection and layers upon layers of
>> macros for queue processing is so difficult to read, and this can't
>> be generating nice optimal code either...
>> 
>> Anyways, I was walking over the driver to see if the logic is
>> contributing to this.
>> 
>> The limit you are proposing sounds unnecessary, nobody else I can
>> see needs this, and that includes all of the most heavily used
>> drivers under load.
>
> I used 256 as the limit because most of the other drivers use it.
>
> * mlx4 uses MLX4_EN_DEFAULT_TX_WORK as the tx budget in 
> mlx4_en_process_tx_cq()
>  Added in commit fbc6daf19745 ("net/mlx4_en: Ignore budget on TX napi 
> polling")
>
> * i40e&vf uses vsi->work_limit as tx budget in i40e_clean_tx_irq(), which is
>  set to I40E_DEFAULT_IRQ_WORK. Added in commit
>  a619afe814453 ("i40e/i40evf: Add support for bulk free in Tx cleanup")
>
> * ixgbe uses q_vector->tx.work_limit as tx budget in ixgbe_clean_tx_irq(),
>  which is set to IXGBE_DEFAULT_TX_WORK. Added in commit
>  592245559e900 ("ixgbe: Change default Tx work limit size to 256 buffers")
>
>> 
>> If I had to guess I'd say that the problem is that the queue loop
>> keeps sampling the head and tail pointers, where as it should just
>> do that _once_ and only process that TX entries found in that
>> snapshot and return to the poll() routine immedately afterwards.
>
> The only way to know the tail pointer at the time napi is scheduled is to 
> read
> hw fetch_index register. This is discouraged by hw engineers.
>
> We work around this by using color bit. Every cq entry has color bit. It is
> either 0 or 1. Hw flips the bit when it creates a new cq entry. So every new
> cq entry will have a different color bit than previous. We reach end of the
> queue when previous color bit is same as current cq entry's color. i.e hw did
> not flip the bit, so its not a new cq entry.
>
> So enic driver cannot know the tail pointer at the time napi is scheduled, 
> until
> we reach the tail pointer.
>

David,

How would you want us to fix this issue? Is doing an ioread on fetch_index for
every poll our only option? (to get head and tail point once)

If 256 is not reasonable, will wq_budget equal to wq ring size be acceptable?
At any point number of wq entries to be cleaned cannot be more than ring size.

Thanks
Govind

^ permalink raw reply

* Re: [v2 PATCH -tip 3/6] net: sctp: Add SCTP ACK tracking trace event
From: kbuild test robot @ 2017-12-20  0:48 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: kbuild-all, Ingo Molnar, Ian McDonald, Vlad Yasevich,
	Stephen Hemminger, Steven Rostedt, Peter Zijlstra,
	Thomas Gleixner, LKML, H . Peter Anvin, Gerrit Renker,
	David S . Miller, Neil Horman, dccp, netdev, linux-sctp,
	Stephen Rothwell, mhiramat
In-Reply-To: <151358473510.28850.10475072993963389604.stgit@devbox>

[-- Attachment #1: Type: text/plain, Size: 7154 bytes --]

Hi Masami,

I love your patch! Perhaps something to improve:

[auto build test WARNING on net/master]
[also build test WARNING on v4.15-rc4 next-20171219]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Masami-Hiramatsu/net-tcp-sctp-dccp-Replace-jprobe-usage-with-trace-events/20171220-081035
config: i386-randconfig-x011-201751 (attached as .config)
compiler: gcc-7 (Debian 7.2.0-12) 7.2.1 20171025
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from include/trace/define_trace.h:96:0,
                    from include/trace/events/sctp.h:96,
                    from net//sctp/sm_statefuns.c:63:
   include/trace/events/sctp.h: In function 'trace_event_raw_event_sctp_probe_path':
>> include/trace/events/sctp.h:31:19: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
      __entry->asoc = (__u64)asoc;
                      ^
   include/trace/trace_events.h:719:4: note: in definition of macro 'DECLARE_EVENT_CLASS'
     { assign; }       \
       ^~~~~~
   include/trace/trace_events.h:78:9: note: in expansion of macro 'PARAMS'
            PARAMS(assign),         \
            ^~~~~~
>> include/trace/events/sctp.h:11:1: note: in expansion of macro 'TRACE_EVENT'
    TRACE_EVENT(sctp_probe_path,
    ^~~~~~~~~~~
>> include/trace/events/sctp.h:30:2: note: in expansion of macro 'TP_fast_assign'
     TP_fast_assign(
     ^~~~~~~~~~~~~~
   include/trace/events/sctp.h: In function 'trace_event_raw_event_sctp_probe':
   include/trace/events/sctp.h:72:19: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
      __entry->asoc = (__u64)asoc;
                      ^
   include/trace/trace_events.h:719:4: note: in definition of macro 'DECLARE_EVENT_CLASS'
     { assign; }       \
       ^~~~~~
   include/trace/trace_events.h:78:9: note: in expansion of macro 'PARAMS'
            PARAMS(assign),         \
            ^~~~~~
   include/trace/events/sctp.h:50:1: note: in expansion of macro 'TRACE_EVENT'
    TRACE_EVENT(sctp_probe,
    ^~~~~~~~~~~
   include/trace/events/sctp.h:68:2: note: in expansion of macro 'TP_fast_assign'
     TP_fast_assign(
     ^~~~~~~~~~~~~~
   In file included from include/trace/define_trace.h:97:0,
                    from include/trace/events/sctp.h:96,
                    from net//sctp/sm_statefuns.c:63:
   include/trace/events/sctp.h: In function 'perf_trace_sctp_probe_path':
>> include/trace/events/sctp.h:31:19: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
      __entry->asoc = (__u64)asoc;
                      ^
   include/trace/perf.h:66:4: note: in definition of macro 'DECLARE_EVENT_CLASS'
     { assign; }       \
       ^~~~~~
   include/trace/trace_events.h:78:9: note: in expansion of macro 'PARAMS'
            PARAMS(assign),         \
            ^~~~~~
>> include/trace/events/sctp.h:11:1: note: in expansion of macro 'TRACE_EVENT'
    TRACE_EVENT(sctp_probe_path,
    ^~~~~~~~~~~
>> include/trace/events/sctp.h:30:2: note: in expansion of macro 'TP_fast_assign'
     TP_fast_assign(
     ^~~~~~~~~~~~~~
   include/trace/events/sctp.h: In function 'perf_trace_sctp_probe':
   include/trace/events/sctp.h:72:19: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
      __entry->asoc = (__u64)asoc;
                      ^
   include/trace/perf.h:66:4: note: in definition of macro 'DECLARE_EVENT_CLASS'
     { assign; }       \
       ^~~~~~
   include/trace/trace_events.h:78:9: note: in expansion of macro 'PARAMS'
            PARAMS(assign),         \
            ^~~~~~
   include/trace/events/sctp.h:50:1: note: in expansion of macro 'TRACE_EVENT'
    TRACE_EVENT(sctp_probe,
    ^~~~~~~~~~~
   include/trace/events/sctp.h:68:2: note: in expansion of macro 'TP_fast_assign'
     TP_fast_assign(
     ^~~~~~~~~~~~~~

vim +31 include/trace/events/sctp.h

    10	
  > 11	TRACE_EVENT(sctp_probe_path,
    12	
    13		TP_PROTO(struct sctp_transport *sp,
    14			 const struct sctp_association *asoc),
    15	
    16		TP_ARGS(sp, asoc),
    17	
    18		TP_STRUCT__entry(
    19			__field(__u64, asoc)
    20			__field(__u32, primary)
    21			__array(__u8, ipaddr, sizeof(union sctp_addr))
    22			__field(__u32, state)
    23			__field(__u32, cwnd)
    24			__field(__u32, ssthresh)
    25			__field(__u32, flight_size)
    26			__field(__u32, partial_bytes_acked)
    27			__field(__u32, pathmtu)
    28		),
    29	
  > 30		TP_fast_assign(
  > 31			__entry->asoc = (__u64)asoc;
    32			__entry->primary = (sp == asoc->peer.primary_path);
    33			memcpy(__entry->ipaddr, &sp->ipaddr, sizeof(union sctp_addr));
    34			__entry->state = sp->state;
    35			__entry->cwnd = sp->cwnd;
    36			__entry->ssthresh = sp->ssthresh;
    37			__entry->flight_size = sp->flight_size;
    38			__entry->partial_bytes_acked = sp->partial_bytes_acked;
    39			__entry->pathmtu = sp->pathmtu;
    40		),
    41	
    42		TP_printk("asoc=%#llx%s ipaddr=%pISpc state=%u cwnd=%u ssthresh=%u "
    43			  "flight_size=%u partial_bytes_acked=%u pathmtu=%u",
    44			  __entry->asoc, __entry->primary ? "(*)" : "",
    45			  __entry->ipaddr, __entry->state, __entry->cwnd,
    46			  __entry->ssthresh, __entry->flight_size,
    47			  __entry->partial_bytes_acked, __entry->pathmtu)
    48	);
    49	
    50	TRACE_EVENT(sctp_probe,
    51	
    52		TP_PROTO(const struct sctp_endpoint *ep,
    53			 const struct sctp_association *asoc,
    54			 struct sctp_chunk *chunk),
    55	
    56		TP_ARGS(ep, asoc, chunk),
    57	
    58		TP_STRUCT__entry(
    59			__field(__u64, asoc)
    60			__field(__u32, mark)
    61			__field(__u16, bind_port)
    62			__field(__u16, peer_port)
    63			__field(__u32, pathmtu)
    64			__field(__u32, rwnd)
    65			__field(__u16, unack_data)
    66		),
    67	
    68		TP_fast_assign(
    69			struct sctp_transport *sp;
    70			struct sk_buff *skb = chunk->skb;
    71	
    72			__entry->asoc = (__u64)asoc;
    73			__entry->mark = skb->mark;
    74			__entry->bind_port = ep->base.bind_addr.port;
    75			__entry->peer_port = asoc->peer.port;
    76			__entry->pathmtu = asoc->pathmtu;
    77			__entry->rwnd = asoc->peer.rwnd;
    78			__entry->unack_data = asoc->unack_data;
    79	
    80			list_for_each_entry(sp, &asoc->peer.transport_addr_list,
    81					    transports) {
    82				trace_sctp_probe_path(sp, asoc);
    83			}
    84		),
    85	
    86		TP_printk("asoc=%#llx mark=%#x bind_port=%d peer_port=%d pathmtu=%d "
    87			  "rwnd=%u unack_data=%d",
    88			  __entry->asoc, __entry->mark, __entry->bind_port,
    89			  __entry->peer_port, __entry->pathmtu, __entry->rwnd,
    90			  __entry->unack_data)
    91	);
    92	
    93	#endif /* _TRACE_SCTP_H */
    94	
    95	/* This part must be outside protection */
  > 96	#include <trace/define_trace.h>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 26730 bytes --]

^ permalink raw reply

* Re: [PATCH bpf 03/11] bpf: Add write access to tcp_sock and sock fields
From: Daniel Borkmann @ 2017-12-20  0:51 UTC (permalink / raw)
  To: Lawrence Brakmo, netdev; +Cc: Kernel Team, Blake Matheny, Alexei Starovoitov
In-Reply-To: <20171219062200.372711-4-brakmo@fb.com>

On 12/19/2017 07:21 AM, Lawrence Brakmo wrote:
> This patch adds a macro, SOCK_OPS_SET_FIELD, for writing to
> struct tcp_sock or struct sock fields. This required adding a new
> field "temp" to struct bpf_sock_ops_kern for temporary storage that
> is used by sock_ops_convert_ctx_access. It is used to store and recover
> the contents of a register, so the register can be used to store the
> address of the sk. Since we cannot overwrite the dst_reg because it
> contains the pointer to ctx, nor the src_reg since it contains the value
> we want to store, we need an extra register to contain the address
> of the sk.
> 
> Also adds the macro SOCK_OPS_GET_OR_SET_FIELD that calls one of the
> GET or SET macros depending on the value of the TYPE field.
> 
> Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
> ---
>  include/linux/filter.h |  3 +++
>  include/net/tcp.h      |  2 +-
>  net/core/filter.c      | 46 ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 50 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 5feb441..8929162 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -987,6 +987,9 @@ struct bpf_sock_ops_kern {
>  		u32 replylong[4];
>  	};
>  	u32	is_fullsock;
> +	u64	temp;			/* Used by sock_ops_convert_ctx_access
> +					 * as temporary storaage of a register
> +					 */
>  };
>  
>  #endif /* __LINUX_FILTER_H__ */
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 6cc205c..e0213f1 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2011,7 +2011,7 @@ static inline int tcp_call_bpf(struct sock *sk, int op)
>  	struct bpf_sock_ops_kern sock_ops;
>  	int ret;
>  
> -	memset(&sock_ops, 0, sizeof(sock_ops));
> +	memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, is_fullsock));

I don't think this is correct. sock_ops is on stack, so above you only
zero up to the offset of is_fullsock, but not including it, so when
you have !sk_fullsock(sk), then your BPF prog will still act as if the
sock_ops.is_fullsock was set in case prior stack garbage said so.

>  	if (sk_fullsock(sk)) {
>  		sock_ops.is_fullsock = 1;
>  		sock_owned_by_me(sk);

Thanks,
Daniel

^ permalink raw reply

* RE: [PATCH net-next] netdevsim: correctly check return value of debugfs_create_dir
From: Prashant Bhole @ 2017-12-20  0:54 UTC (permalink / raw)
  To: 'Jakub Kicinski'; +Cc: 'David Miller', netdev
In-Reply-To: <20171219164523.60ac1308@cakuba.netronome.com>


> From: Jakub Kicinski [mailto:jakub.kicinski@netronome.com]
> 
> On Wed, 20 Dec 2017 09:38:52 +0900, Prashant Bhole wrote:
> > > > 2) In case sim0 or bpf_bound_progs are  fail to create, we need to
> > > > add checks before creating any file in them.
> > >
> > > What do you mean by "check before"?  Checking if creation of each
> > > file fails or not, or something different?
> >
> > For example:
> > I will check if state->ddir is not NULL before creating files in it.
> >
> > if (state->ddir) {
> > 	debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id);
> > 	debugfs_create_file("state", 0400, state->ddir,
> > 			    &state->state, &nsim_bpf_string_fops);
> > 	debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded);
> > }
> 
> Ah, I would just error out in case we can't create any of the
sub-directories as
> well.

Does that mean fatal error if we can't create any of the subdirectories?
Or
Similar check as mentioned above before creating subdirectories? (I was
about to do this)

-Prashant

^ permalink raw reply

* Re: [PATCH -tip v3 3/6] net: sctp: Add SCTP ACK tracking trace event
From: Masami Hiramatsu @ 2017-12-20  1:05 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Ingo Molnar, Ian McDonald, Vlad Yasevich, Stephen Hemminger,
	Peter Zijlstra, Thomas Gleixner, LKML, H . Peter Anvin,
	Gerrit Renker, David S . Miller, Neil Horman, dccp, netdev,
	linux-sctp, Stephen Rothwell
In-Reply-To: <20171219102024.09a92c75@gandalf.local.home>

On Tue, 19 Dec 2017 10:20:24 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Tue, 19 Dec 2017 17:58:25 +0900
> Masami Hiramatsu <mhiramat@kernel.org> wrote:
> 
> > +TRACE_EVENT(sctp_probe,
> > +
> > +	TP_PROTO(const struct sctp_endpoint *ep,
> > +		 const struct sctp_association *asoc,
> > +		 struct sctp_chunk *chunk),
> > +
> > +	TP_ARGS(ep, asoc, chunk),
> > +
> > +	TP_STRUCT__entry(
> > +		__field(__u64, asoc)
> > +		__field(__u32, mark)
> > +		__field(__u16, bind_port)
> > +		__field(__u16, peer_port)
> > +		__field(__u32, pathmtu)
> > +		__field(__u32, rwnd)
> > +		__field(__u16, unack_data)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		struct sctp_transport *sp;
> > +		struct sk_buff *skb = chunk->skb;
> > +
> > +		__entry->asoc = (__u64)asoc;
> > +		__entry->mark = skb->mark;
> > +		__entry->bind_port = ep->base.bind_addr.port;
> > +		__entry->peer_port = asoc->peer.port;
> > +		__entry->pathmtu = asoc->pathmtu;
> > +		__entry->rwnd = asoc->peer.rwnd;
> > +		__entry->unack_data = asoc->unack_data;
> > +
> > +		if (trace_sctp_probe_path_enabled()) {
> > +			list_for_each_entry(sp, &asoc->peer.transport_addr_list,
> > +					    transports) {
> > +				trace_sctp_probe_path(sp, asoc);
> > +			}
> > +		}
> 
> I thought you were going to move this into the code, like I suggested?

Ah, I missed to define sp in the block...

Thanks,

> 
> -- Steve
> 
> > +	),
> > +
> > +	TP_printk("asoc=%#llx mark=%#x bind_port=%d peer_port=%d pathmtu=%d "
> > +		  "rwnd=%u unack_data=%d",
> > +		  __entry->asoc, __entry->mark, __entry->bind_port,
> > +		  __entry->peer_port, __entry->pathmtu, __entry->rwnd,
> > +		  __entry->unack_data)
> > +);
> > +


-- 
Masami Hiramatsu <mhiramat@kernel.org>

^ permalink raw reply

* [PATCH v3 iproute2 net-next] erspan: add erspan version II support
From: William Tu @ 2017-12-20  1:08 UTC (permalink / raw)
  To: netdev; +Cc: dsahern

The patch adds support for configuring the erspan v2, for both
ipv4 and ipv6 erspan implementation.  Three additional fields
are added: 'erspan_ver' for distinguishing v1 or v2, 'erspan_dir'
for specifying direction of the mirrored traffic, and 'erspan_hwid'
for users to set ERSPAN engine ID within a system.

As for manpage, the ERSPAN descriptions used to be under GRE, IPIP,
SIT Type paragraph.  Since IP6GRE/IP6GRETAP also supports ERSPAN,
the patch removes the old one, creates a separate ERSPAN paragrah,
and adds an example.

Signed-off-by: William Tu <u9012063@gmail.com>
---
change in v3:
  - change erspan_dir 0/1 to "in[gress]/e[gress]"
  - update manpage
change in v2:
  - fix typo ETH_P_ERSPAN2
  - fix space and indent
---
 include/uapi/linux/if_ether.h  |  1 +
 include/uapi/linux/if_tunnel.h |  3 ++
 ip/link_gre.c                  | 68 +++++++++++++++++++++++++++++--
 ip/link_gre6.c                 | 69 +++++++++++++++++++++++++++++--
 man/man8/ip-link.8.in          | 92 ++++++++++++++++++++++++++++++++++++------
 5 files changed, 214 insertions(+), 19 deletions(-)

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 2eb529a90250..133567bf2e04 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -47,6 +47,7 @@
 #define ETH_P_PUP	0x0200		/* Xerox PUP packet		*/
 #define ETH_P_PUPAT	0x0201		/* Xerox PUP Addr Trans packet	*/
 #define ETH_P_TSN	0x22F0		/* TSN (IEEE 1722) packet	*/
+#define ETH_P_ERSPAN2	0x22EB		/* ERSPAN version 2 (type III)	*/
 #define ETH_P_IP	0x0800		/* Internet Protocol packet	*/
 #define ETH_P_X25	0x0805		/* CCITT X.25			*/
 #define ETH_P_ARP	0x0806		/* Address Resolution packet	*/
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 38cdf90692f8..ecdc76669cfd 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -137,6 +137,9 @@ enum {
 	IFLA_GRE_IGNORE_DF,
 	IFLA_GRE_FWMARK,
 	IFLA_GRE_ERSPAN_INDEX,
+	IFLA_GRE_ERSPAN_VER,
+	IFLA_GRE_ERSPAN_DIR,
+	IFLA_GRE_ERSPAN_HWID,
 	__IFLA_GRE_MAX,
 };
 
diff --git a/ip/link_gre.c b/ip/link_gre.c
index 43cb1af6196a..27c03121f7e3 100644
--- a/ip/link_gre.c
+++ b/ip/link_gre.c
@@ -98,6 +98,9 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv,
 	__u8 ignore_df = 0;
 	__u32 fwmark = 0;
 	__u32 erspan_idx = 0;
+	__u8 erspan_ver = 0;
+	__u8 erspan_dir = 0;
+	__u16 erspan_hwid = 0;
 
 	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
 		if (rtnl_talk(&rth, &req.n, &answer) < 0) {
@@ -179,6 +182,15 @@ get_failed:
 		if (greinfo[IFLA_GRE_ERSPAN_INDEX])
 			erspan_idx = rta_getattr_u32(greinfo[IFLA_GRE_ERSPAN_INDEX]);
 
+		if (greinfo[IFLA_GRE_ERSPAN_VER])
+			erspan_ver = rta_getattr_u8(greinfo[IFLA_GRE_ERSPAN_VER]);
+
+		if (greinfo[IFLA_GRE_ERSPAN_DIR])
+			erspan_dir = rta_getattr_u8(greinfo[IFLA_GRE_ERSPAN_DIR]);
+
+		if (greinfo[IFLA_GRE_ERSPAN_HWID])
+			erspan_hwid = rta_getattr_u16(greinfo[IFLA_GRE_ERSPAN_HWID]);
+
 		free(answer);
 	}
 
@@ -343,6 +355,26 @@ get_failed:
 				invarg("invalid erspan index\n", *argv);
 			if (erspan_idx & ~((1<<20) - 1) || erspan_idx == 0)
 				invarg("erspan index must be > 0 and <= 20-bit\n", *argv);
+		} else if (strcmp(*argv, "erspan_ver") == 0) {
+			NEXT_ARG();
+			if (get_u8(&erspan_ver, *argv, 0))
+				invarg("invalid erspan version\n", *argv);
+			if (erspan_ver != 1 && erspan_ver != 2)
+				invarg("erspan version must be 1 or 2\n", *argv);
+		} else if (strcmp(*argv, "erspan_dir") == 0) {
+			NEXT_ARG();
+			if (strcmp(*argv, "ingress") == 0 ||
+			    strcmp(*argv, "in") == 0)
+				erspan_dir = 0;
+			else if (strcmp(*argv, "egress") == 0 ||
+				 strcmp(*argv, "e") == 0)
+				erspan_dir = 1;
+			else
+				invarg("Invalid erspan direction.", *argv);
+		} else if (strcmp(*argv, "erspan_hwid") == 0) {
+			NEXT_ARG();
+			if (get_u16(&erspan_hwid, *argv, 0))
+				invarg("invalid erspan hwid\n", *argv);
 		} else
 			usage();
 		argc--; argv++;
@@ -374,8 +406,15 @@ get_failed:
 		addattr_l(n, 1024, IFLA_GRE_TTL, &ttl, 1);
 		addattr_l(n, 1024, IFLA_GRE_TOS, &tos, 1);
 		addattr32(n, 1024, IFLA_GRE_FWMARK, fwmark);
-		if (erspan_idx != 0)
-			addattr32(n, 1024, IFLA_GRE_ERSPAN_INDEX, erspan_idx);
+		if (erspan_ver) {
+			addattr8(n, 1024, IFLA_GRE_ERSPAN_VER, erspan_ver);
+			if (erspan_ver == 1 && erspan_idx != 0) {
+				addattr32(n, 1024, IFLA_GRE_ERSPAN_INDEX, erspan_idx);
+			} else if (erspan_ver == 2) {
+				addattr8(n, 1024, IFLA_GRE_ERSPAN_DIR, erspan_dir);
+				addattr16(n, 1024, IFLA_GRE_ERSPAN_HWID, erspan_hwid);
+			}
+		}
 	} else {
 		addattr_l(n, 1024, IFLA_GRE_COLLECT_METADATA, NULL, 0);
 	}
@@ -514,7 +553,30 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
 	if (tb[IFLA_GRE_ERSPAN_INDEX]) {
 		__u32 erspan_idx = rta_getattr_u32(tb[IFLA_GRE_ERSPAN_INDEX]);
 
-		fprintf(f, "erspan_index %u ", erspan_idx);
+		print_uint(PRINT_ANY, "erspan_index", "erspan_index %u ", erspan_idx);
+	}
+
+	if (tb[IFLA_GRE_ERSPAN_VER]) {
+		__u8 erspan_ver = rta_getattr_u8(tb[IFLA_GRE_ERSPAN_VER]);
+
+		print_uint(PRINT_ANY, "erspan_ver", "erspan_ver %u ", erspan_ver);
+	}
+
+	if (tb[IFLA_GRE_ERSPAN_DIR]) {
+		__u8 erspan_dir = rta_getattr_u8(tb[IFLA_GRE_ERSPAN_DIR]);
+
+		if (erspan_dir == 0)
+			print_string(PRINT_ANY, "erspan_dir",
+				     "erspan_dir ingress ", NULL);
+		else
+			print_string(PRINT_ANY, "erspan_dir",
+				     "erspan_dir egress ", NULL);
+	}
+
+	if (tb[IFLA_GRE_ERSPAN_HWID]) {
+		__u16 erspan_hwid = rta_getattr_u16(tb[IFLA_GRE_ERSPAN_HWID]);
+
+		print_hex(PRINT_ANY, "erspan_hwid", "erspan_hwid 0x%x ", erspan_hwid);
 	}
 
 	if (tb[IFLA_GRE_ENCAP_TYPE] &&
diff --git a/ip/link_gre6.c b/ip/link_gre6.c
index 2cb46ca116d0..de6a38d50cd1 100644
--- a/ip/link_gre6.c
+++ b/ip/link_gre6.c
@@ -109,6 +109,9 @@ static int gre_parse_opt(struct link_util *lu, int argc, char **argv,
 	int len;
 	__u32 fwmark = 0;
 	__u32 erspan_idx = 0;
+	__u8 erspan_ver = 0;
+	__u8 erspan_dir = 0;
+	__u16 erspan_hwid = 0;
 
 	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
 		if (rtnl_talk(&rth, &req.n, &answer) < 0) {
@@ -191,6 +194,15 @@ get_failed:
 		if (greinfo[IFLA_GRE_ERSPAN_INDEX])
 			erspan_idx = rta_getattr_u32(greinfo[IFLA_GRE_ERSPAN_INDEX]);
 
+		if (greinfo[IFLA_GRE_ERSPAN_VER])
+			erspan_ver = rta_getattr_u8(greinfo[IFLA_GRE_ERSPAN_VER]);
+
+		if (greinfo[IFLA_GRE_ERSPAN_DIR])
+			erspan_dir = rta_getattr_u8(greinfo[IFLA_GRE_ERSPAN_DIR]);
+
+		if (greinfo[IFLA_GRE_ERSPAN_HWID])
+			erspan_hwid = rta_getattr_u16(greinfo[IFLA_GRE_ERSPAN_HWID]);
+
 		free(answer);
 	}
 
@@ -389,6 +401,26 @@ get_failed:
 				invarg("invalid erspan index\n", *argv);
 			if (erspan_idx & ~((1<<20) - 1) || erspan_idx == 0)
 				invarg("erspan index must be > 0 and <= 20-bit\n", *argv);
+		} else if (strcmp(*argv, "erspan_ver") == 0) {
+			NEXT_ARG();
+			if (get_u8(&erspan_ver, *argv, 0))
+				invarg("invalid erspan version\n", *argv);
+			if (erspan_ver != 1 && erspan_ver != 2)
+				invarg("erspan version must be 1 or 2\n", *argv);
+		} else if (strcmp(*argv, "erspan_dir") == 0) {
+			NEXT_ARG();
+			if (strcmp(*argv, "ingress") == 0 ||
+			    strcmp(*argv, "in") == 0)
+				erspan_dir = 0;
+			else if (strcmp(*argv, "egress") == 0 ||
+				 strcmp(*argv, "e") == 0)
+				erspan_dir = 1;
+			else
+				invarg("Invalid erspan direction.", *argv);
+		} else if (strcmp(*argv, "erspan_hwid") == 0) {
+			NEXT_ARG();
+			if (get_u16(&erspan_hwid, *argv, 0))
+				invarg("invalid erspan hwid\n", *argv);
 		} else
 			usage();
 		argc--; argv++;
@@ -408,9 +440,15 @@ get_failed:
 		addattr_l(n, 1024, IFLA_GRE_FLOWINFO, &flowinfo, 4);
 		addattr32(n, 1024, IFLA_GRE_FLAGS, flags);
 		addattr32(n, 1024, IFLA_GRE_FWMARK, fwmark);
-		if (erspan_idx != 0)
-			addattr32(n, 1024, IFLA_GRE_ERSPAN_INDEX, erspan_idx);
-
+		if (erspan_ver) {
+			addattr8(n, 1024, IFLA_GRE_ERSPAN_VER, erspan_ver);
+			if (erspan_ver == 1 && erspan_idx != 0) {
+				addattr32(n, 1024, IFLA_GRE_ERSPAN_INDEX, erspan_idx);
+			} else {
+				addattr8(n, 1024, IFLA_GRE_ERSPAN_DIR, erspan_dir);
+				addattr16(n, 1024, IFLA_GRE_ERSPAN_HWID, erspan_hwid);
+			}
+		}
 		addattr16(n, 1024, IFLA_GRE_ENCAP_TYPE, encaptype);
 		addattr16(n, 1024, IFLA_GRE_ENCAP_FLAGS, encapflags);
 		addattr16(n, 1024, IFLA_GRE_ENCAP_SPORT, htons(encapsport));
@@ -587,7 +625,30 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
 
 	if (tb[IFLA_GRE_ERSPAN_INDEX]) {
 		__u32 erspan_idx = rta_getattr_u32(tb[IFLA_GRE_ERSPAN_INDEX]);
-		fprintf(f, "erspan_index %u ", erspan_idx);
+		print_uint(PRINT_ANY, "erspan_index", "erspan_index %u ", erspan_idx);
+	}
+
+	if (tb[IFLA_GRE_ERSPAN_VER]) {
+		__u8 erspan_ver = rta_getattr_u8(tb[IFLA_GRE_ERSPAN_VER]);
+
+		print_uint(PRINT_ANY, "erspan_ver", "erspan_ver %u ", erspan_ver);
+	}
+
+	if (tb[IFLA_GRE_ERSPAN_DIR]) {
+		__u8 erspan_dir = rta_getattr_u8(tb[IFLA_GRE_ERSPAN_DIR]);
+
+		if (erspan_dir == 0)
+			print_string(PRINT_ANY, "erspan_dir",
+				     "erspan_dir ingress ", NULL);
+		else
+			print_string(PRINT_ANY, "erspan_dir",
+				     "erspan_dir egress ", NULL);
+	}
+
+	if (tb[IFLA_GRE_ERSPAN_HWID]) {
+		__u16 erspan_hwid = rta_getattr_u16(tb[IFLA_GRE_ERSPAN_HWID]);
+
+		print_hex(PRINT_ANY, "erspan_hwid", "erspan_hwid 0x%x ", erspan_hwid);
 	}
 
 	if (tb[IFLA_GRE_ENCAP_TYPE] &&
diff --git a/man/man8/ip-link.8.in b/man/man8/ip-link.8.in
index 9e9a5f0d2cef..2b051ed7b5a0 100644
--- a/man/man8/ip-link.8.in
+++ b/man/man8/ip-link.8.in
@@ -665,13 +665,13 @@ keyword.
 .in -8
 
 .TP
-GRE, IPIP, SIT, ERSPAN Type Support
+GRE, IPIP, SIT Type Support
 For a link of types
-.I GRE/IPIP/SIT/ERSPAN
+.I GRE/IPIP/SIT
 the following additional arguments are supported:
 
 .BI "ip link add " DEVICE
-.BR type " { " gre " | " ipip " | " sit " | " erspan " }"
+.BR type " { " gre " | " ipip " | " sit " }"
 .BI " remote " ADDR " local " ADDR
 [
 .BR encap " { " fou " | " gue " | " none " }"
@@ -685,8 +685,6 @@ the following additional arguments are supported:
 .I " [no]encap-remcsum "
 ] [
 .I " mode " { ip6ip | ipip | mplsip | any } "
-] [
-.BR erspan " \fIIDX "
 ]
 
 .in +8
@@ -731,13 +729,6 @@ MPLS-Over-IPv4, "any" indicates IPv6, IPv4 or MPLS Over IPv4. Supported for
 SIT where the default is "ip6ip" and IPIP where the default is "ipip".
 IPv6-Over-IPv4 is not supported for IPIP.
 
-.sp
-.BR erspan " \fIIDX "
-- specifies the ERSPAN index field.
-.IR IDX
-indicates a 20 bit index/port number associated with the ERSPAN
-traffic's source port and direction.
-
 .in -8
 
 .TP
@@ -883,6 +874,76 @@ the following additional arguments are supported:
 - specifies the mode (datagram or connected) to use.
 
 .TP
+ERSPAN Type Support
+For a link of type
+.I ERSPAN/IP6ERSPAN
+the following additional arguments are supported:
+
+.BI "ip link add " DEVICE
+.BR type " { " erspan " | " ip6erspan " }"
+.BI remote " ADDR " local " ADDR " seq
+.RB key
+.I KEY
+.BR erspan_ver " \fIversion "
+[
+.BR erspan " \fIIDX "
+] [
+.BR erspan_dir " { " \fIin[gress] " | " \fIe[gress] " }"
+] [
+.BR erspan_hwid " \fIhwid "
+] [
+.RB external
+]
+
+.in +8
+.sp
+.BI  remote " ADDR "
+- specifies the remote address of the tunnel.
+
+.sp
+.BI  local " ADDR "
+- specifies the fixed local address for tunneled packets.
+It must be an address on another interface on this host.
+
+.sp
+.BR erspan_ver " \fIversion "
+- specifies the ERSPAN version number.
+.IR version
+indicates the ERSPAN version to be created: 1 for version 1 (type II)
+or 2 for version 2 (type III).
+
+.sp
+.BR erspan " \fIIDX "
+- specifies the ERSPAN v1 index field.
+.IR IDX
+indicates a 20 bit index/port number associated with the ERSPAN
+traffic's source port and direction.
+
+.sp
+.BR erspan_dir " { " \fIin[gress] " | " \fIe[gress] " }"
+- specifies the ERSPAN v2 mirrored traffic's direction.
+
+.sp
+.BR erspan_hwid " \fIhwid "
+- an unique identifier of an ERSPAN v2 engine within a system.
+.IR hwid
+is a 6-bit value for users to configure.
+
+.sp
+.BR external
+- make this tunnel externally controlled (or not, which is the default).
+In the kernel, this is referred to as collect metadata mode.  This flag is
+mutually exclusive with the
+.BR remote ,
+.BR local ,
+.BR erspan_ver ,
+.BR erspan ,
+.BR erspan_dir " and " erspan_hwid
+options.
+
+.in -8
+
+.TP
 GENEVE Type Support
 For a link of type
 .I GENEVE
@@ -2062,6 +2123,13 @@ ip link add link wpan0 lowpan0 type lowpan
 Creates a 6LoWPAN interface named lowpan0 on the underlying
 IEEE 802.15.4 device wpan0.
 .RE
+.PP
+ip link add dev ip6erspan11 type ip6erspan seq key 102
+local fc00:100::2 remote fc00:100::1
+erspan_ver 2 erspan_dir ingress erspan_hwid 17
+.RS 4
+Creates a IP6ERSPAN version 2 interface named ip6erspan00.
+.RE
 
 .SH SEE ALSO
 .br
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH bpf 03/11] bpf: Add write access to tcp_sock and sock fields
From: Alexei Starovoitov @ 2017-12-20  1:10 UTC (permalink / raw)
  To: Lawrence Brakmo, netdev; +Cc: Kernel Team, Blake Matheny, Daniel Borkmann
In-Reply-To: <20171219062200.372711-4-brakmo@fb.com>

On 12/18/17 10:21 PM, Lawrence Brakmo wrote:
> +#define SOCK_OPS_SET_FIELD(FIELD_NAME, OBJ)				      \
> +	do {								      \
> +		int reg = BPF_REG_9;					      \
> +		BUILD_BUG_ON(FIELD_SIZEOF(OBJ, FIELD_NAME) >		      \
> +			     FIELD_SIZEOF(struct bpf_sock_ops, FIELD_NAME));  \
> +		while (si->dst_reg == reg || si->src_reg == reg)	      \
> +			reg--;						      \
> +		*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg,		      \
> +				      offsetof(struct bpf_sock_ops_kern,      \
> +					       temp));			      \
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
> +						struct bpf_sock_ops_kern,     \
> +						is_fullsock),		      \
> +				      reg, si->dst_reg,			      \
> +				      offsetof(struct bpf_sock_ops_kern,      \
> +					       is_fullsock));		      \
> +		*insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);		      \
> +		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(			      \
> +						struct bpf_sock_ops_kern, sk),\
> +				      reg, si->dst_reg,			      \
> +				      offsetof(struct bpf_sock_ops_kern, sk));\
> +		*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, FIELD_NAME),      \
> +				      reg, si->src_reg,			      \
> +				      offsetof(OBJ, FIELD_NAME));	      \
> +		*insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg,		      \
> +				      offsetof(struct bpf_sock_ops_kern,      \
> +					       temp));			      \
> +	} while (0)

that's neat. I like it.
I guess the prog can check is_fullsock on its own to see whether writes
will fail or not, so JEQ above is ok.
Only while() loop looks a bit scary.
May be replace with two 'if' ?
if (si->dst_reg == reg || si->src_reg == reg)
   reg --;
if (si->dst_reg == reg || si->src_reg == reg)
   reg --;
so it's clear that tmp reg will be reg_7, 8 or 9.

^ permalink raw reply

* Re: [PATCH net-next] netdevsim: correctly check return value of debugfs_create_dir
From: Jakub Kicinski @ 2017-12-20  1:18 UTC (permalink / raw)
  To: Prashant Bhole; +Cc: 'David Miller', netdev
In-Reply-To: <024e01d3792d$291ef420$7b5cdc60$@lab.ntt.co.jp>

On Wed, 20 Dec 2017 09:54:59 +0900, Prashant Bhole wrote:
> > Ah, I would just error out in case we can't create any of the  
> > sub-directories as well.  
> 
> Does that mean fatal error if we can't create any of the subdirectories?

Yes.

^ permalink raw reply

* Re: [PATCH v2,net-next] ip6_gre: fix a pontential issue in ip6erspan_rcv
From: Haishuang Yan @ 2017-12-20  1:27 UTC (permalink / raw)
  To: David Miller; +Cc: kuznet, yoshfuji, netdev, linux-kernel, u9012063
In-Reply-To: <20171219.103459.1958757813714459905.davem@davemloft.net>



> On 2017年12月19日, at 下午11:34, David Miller <davem@davemloft.net> wrote:
> 
> From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
> Date: Sat, 16 Dec 2017 10:25:25 +0800
> 
>> pskb_may_pull() can change skb->data, so we need to load ipv6h/ershdr at
>> the right place.
>> 
>> Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
>> Acked-by: William Tu <u9012063@gmail.com>
>> Cc: William Tu <u9012063@gmail.com>
>> Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
> 
> This patch does not apply:
> 
>> +	ipv6h = ipv6_hdr(skb);
>> +	ershdr = (struct erspan_base_hdr *)skb->data;
>> 	ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
>> 	tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
>> 	pkt_md = (struct erspan_metadata *)(ershdr + 1);
> 
> There is not "pkt_md = ..." assignment in net-next on this line.
> 

Okay, I will fix it and resubmit another commit, thanks.

^ permalink raw reply

* Re: [PATCH v3 iproute2 net-next] erspan: add erspan version II support
From: David Ahern @ 2017-12-20  1:28 UTC (permalink / raw)
  To: William Tu, netdev
In-Reply-To: <1513732121-115044-1-git-send-email-u9012063@gmail.com>

Hi William:

On 12/19/17 6:08 PM, William Tu wrote:
> @@ -343,6 +355,26 @@ get_failed:
>  				invarg("invalid erspan index\n", *argv);
>  			if (erspan_idx & ~((1<<20) - 1) || erspan_idx == 0)
>  				invarg("erspan index must be > 0 and <= 20-bit\n", *argv);
> +		} else if (strcmp(*argv, "erspan_ver") == 0) {
> +			NEXT_ARG();
> +			if (get_u8(&erspan_ver, *argv, 0))
> +				invarg("invalid erspan version\n", *argv);
> +			if (erspan_ver != 1 && erspan_ver != 2)
> +				invarg("erspan version must be 1 or 2\n", *argv);
> +		} else if (strcmp(*argv, "erspan_dir") == 0) {
> +			NEXT_ARG();
> +			if (strcmp(*argv, "ingress") == 0 ||
> +			    strcmp(*argv, "in") == 0)
> +				erspan_dir = 0;
> +			else if (strcmp(*argv, "egress") == 0 ||
> +				 strcmp(*argv, "e") == 0)

iproute2 has a matches() function that should be used -- it basically
allows whatever shorthand notation matches -- in this case e, eg, egr,
egres, egress all match. Checkout ip/iplink.c and search for matches.

^ permalink raw reply

* Re: [PATCH v2,net-next 1/2] ip_gre: fix potential memory leak in erspan_rcv
From: Haishuang Yan @ 2017-12-20  1:33 UTC (permalink / raw)
  To: David Miller; +Cc: kuznet, yoshfuji, netdev, linux-kernel, u9012063
In-Reply-To: <20171219.103633.721139612524381957.davem@davemloft.net>



> On 2017年12月19日, at 下午11:36, David Miller <davem@davemloft.net> wrote:
> 
> From: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
> Date: Sat, 16 Dec 2017 10:48:38 +0800
> 
>> If md is NULL, tun_dst must be freed, otherwise it will cause memory
>> leak.
>> 
>> Fixes: 1a66a836da6 ("gre: add collect_md mode to ERSPAN tunnel")
>> Cc: William Tu <u9012063@gmail.com>
>> Signed-off-by: Haishuang Yan <yanhaishuang@cmss.chinamobile.com>
>> 
>> Change since v2:
>>  * Rebase on latest master branch.
>>  * Correct wrong fix information.
> 
> Please do not put a changelog after the fixes and signoff tags, those tags must
> appear last in the commit message.
> 
> Thank you.
> 

Okay, I will resubmit another commit, thanks.

^ permalink raw reply

* Re: [PATCH bpf 11/11] bpf: add selftest for tcpbpf
From: Alexei Starovoitov @ 2017-12-20  1:34 UTC (permalink / raw)
  To: Lawrence Brakmo, netdev; +Cc: Kernel Team, Blake Matheny, Daniel Borkmann
In-Reply-To: <20171219062200.372711-12-brakmo@fb.com>

On 12/18/17 10:22 PM, Lawrence Brakmo wrote:
> -	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o
> +	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
> +	test_tcpbpf_kern.o

it won't apply. please base patches on bpf-next tree

> +#!/usr/local/bin/python
> +#
> +# Copyright (c) 2017 Facebook
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of version 2 of the GNU General Public
> +# License as published by the Free Software Foundation.

the license should be in SPDX format.

> +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
> @@ -0,0 +1,133 @@
> +/* Copyright (c) 2017 Facebook
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + */

same here.

> +		case BPF_SOCK_OPS_STATE_CB:
> +			if (skops->args[1] == 7) {
> +				__u32 key = 0;
> +				struct globals g, *gp;
> +
> +				gp = bpf_map_lookup_elem(&global_map, &key);
> +				if (gp == NULL) {
> +				} else {
> +					g = *gp;
> +					g.total_retrans = skops->total_retrans;
> +					g.data_segs_in = skops->data_segs_in;

you can reduce indent by doing
if (!gp)
   break;
g = *gp;
g.total_retrans = skops->total_retrans;

> +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
> @@ -0,0 +1,119 @@
> +/* Copyright (c) 2017 Facebook
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + */
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <errno.h>
> +#include <signal.h>
> +#include <string.h>
> +#include <assert.h>
> +#include <linux/perf_event.h>
> +#include <linux/ptrace.h>
> +#include <linux/bpf.h>
> +#include <sys/ioctl.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <bpf/bpf.h>
> +#include <bpf/libbpf.h>
> +//#include "bpf_load.h"

please remove left over comments.

^ permalink raw reply

* RE: [PATCH net-next] netdevsim: correctly check return value of debugfs_create_dir
From: Prashant Bhole @ 2017-12-20  1:40 UTC (permalink / raw)
  To: 'Jakub Kicinski'; +Cc: 'David Miller', netdev
In-Reply-To: <20171219171806.15fe3765@cakuba.netronome.com>

> From: Jakub Kicinski [mailto:jakub.kicinski@netronome.com]
> 
> On Wed, 20 Dec 2017 09:54:59 +0900, Prashant Bhole wrote:
> > > Ah, I would just error out in case we can't create any of the
> > > sub-directories as well.
> >
> > Does that mean fatal error if we can't create any of the subdirectories?
> 
> Yes.

Ok. In this case there is no need of condition before creating files. I will
submit v2.

-Prashant

^ permalink raw reply

* Re: [v2 PATCH -tip 1/6] net: tcp: Add trace events for TCP congestion window tracing
From: kbuild test robot @ 2017-12-20  1:44 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: kbuild-all, Ingo Molnar, Ian McDonald, Vlad Yasevich,
	Stephen Hemminger, Steven Rostedt, Peter Zijlstra,
	Thomas Gleixner, LKML, H . Peter Anvin, Gerrit Renker,
	David S . Miller, Neil Horman, dccp, netdev, linux-sctp,
	Stephen Rothwell, mhiramat
In-Reply-To: <151358467535.28850.8937168919346099524.stgit@devbox>

[-- Attachment #1: Type: text/plain, Size: 11009 bytes --]

Hi Masami,

I love your patch! Yet something to improve:

[auto build test ERROR on net/master]
[also build test ERROR on v4.15-rc4 next-20171219]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Masami-Hiramatsu/net-tcp-sctp-dccp-Replace-jprobe-usage-with-trace-events/20171220-081035
config: sh-allmodconfig (attached as .config)
compiler: sh4-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=sh 

All error/warnings (new ones prefixed by >>):

   In file included from include/trace/events/udp.h:9:0,
                    from net//core/net-traces.c:35:
>> include/trace/events/tcp.h:37:11: error: expected ')' before 'const'
     TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
              ^
   include/linux/tracepoint.h:105:27: note: in definition of macro 'TP_PROTO'
    #define TP_PROTO(args...) args
                              ^~~~
>> include/linux/tracepoint.h:237:20: error: redefinition of '__tpstrtab_tcp_retransmit_skb'
     static const char __tpstrtab_##name[]     \
                       ^
>> include/linux/tracepoint.h:247:2: note: in expansion of macro 'DEFINE_TRACE_FN'
     DEFINE_TRACE_FN(name, NULL, NULL);
     ^~~~~~~~~~~~~~~
>> include/trace/define_trace.h:51:2: note: in expansion of macro 'DEFINE_TRACE'
     DEFINE_TRACE(name)
     ^~~~~~~~~~~~
>> include/trace/events/tcp.h:90:1: note: in expansion of macro 'DEFINE_EVENT'
    DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
    ^~~~~~~~~~~~
   In file included from include/trace/events/tcp.h:10:0,
                    from net//core/net-traces.c:34:
   include/linux/tracepoint.h:237:20: note: previous definition of '__tpstrtab_tcp_retransmit_skb' was here
     static const char __tpstrtab_##name[]     \
                       ^
>> include/linux/tracepoint.h:247:2: note: in expansion of macro 'DEFINE_TRACE_FN'
     DEFINE_TRACE_FN(name, NULL, NULL);
     ^~~~~~~~~~~~~~~
>> include/trace/define_trace.h:51:2: note: in expansion of macro 'DEFINE_TRACE'
     DEFINE_TRACE(name)
     ^~~~~~~~~~~~
>> include/trace/events/tcp.h:90:1: note: in expansion of macro 'DEFINE_EVENT'
    DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
    ^~~~~~~~~~~~
   In file included from include/trace/events/udp.h:9:0,
                    from net//core/net-traces.c:35:
   include/linux/tracepoint.h:239:20: error: redefinition of '__tracepoint_tcp_retransmit_skb'
     struct tracepoint __tracepoint_##name     \
                       ^
>> include/linux/tracepoint.h:247:2: note: in expansion of macro 'DEFINE_TRACE_FN'
     DEFINE_TRACE_FN(name, NULL, NULL);
     ^~~~~~~~~~~~~~~
>> include/trace/define_trace.h:51:2: note: in expansion of macro 'DEFINE_TRACE'
     DEFINE_TRACE(name)
     ^~~~~~~~~~~~
>> include/trace/events/tcp.h:90:1: note: in expansion of macro 'DEFINE_EVENT'
    DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
    ^~~~~~~~~~~~
   In file included from include/trace/events/tcp.h:10:0,
                    from net//core/net-traces.c:34:
   include/linux/tracepoint.h:239:20: note: previous definition of '__tracepoint_tcp_retransmit_skb' was here
     struct tracepoint __tracepoint_##name     \
                       ^
>> include/linux/tracepoint.h:247:2: note: in expansion of macro 'DEFINE_TRACE_FN'
     DEFINE_TRACE_FN(name, NULL, NULL);
     ^~~~~~~~~~~~~~~
>> include/trace/define_trace.h:51:2: note: in expansion of macro 'DEFINE_TRACE'
     DEFINE_TRACE(name)
     ^~~~~~~~~~~~
>> include/trace/events/tcp.h:90:1: note: in expansion of macro 'DEFINE_EVENT'
    DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
    ^~~~~~~~~~~~
   In file included from include/trace/events/udp.h:9:0,
                    from net//core/net-traces.c:35:
>> include/linux/tracepoint.h:242:35: error: redefinition of '__tracepoint_ptr_tcp_retransmit_skb'
     static struct tracepoint * const __tracepoint_ptr_##name __used  \
                                      ^
>> include/linux/tracepoint.h:247:2: note: in expansion of macro 'DEFINE_TRACE_FN'
     DEFINE_TRACE_FN(name, NULL, NULL);
     ^~~~~~~~~~~~~~~
>> include/trace/define_trace.h:51:2: note: in expansion of macro 'DEFINE_TRACE'
     DEFINE_TRACE(name)
     ^~~~~~~~~~~~
>> include/trace/events/tcp.h:90:1: note: in expansion of macro 'DEFINE_EVENT'
    DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
    ^~~~~~~~~~~~
   In file included from include/trace/events/tcp.h:10:0,
                    from net//core/net-traces.c:34:
   include/linux/tracepoint.h:242:35: note: previous definition of '__tracepoint_ptr_tcp_retransmit_skb' was here
     static struct tracepoint * const __tracepoint_ptr_##name __used  \
                                      ^
>> include/linux/tracepoint.h:247:2: note: in expansion of macro 'DEFINE_TRACE_FN'
     DEFINE_TRACE_FN(name, NULL, NULL);
     ^~~~~~~~~~~~~~~
>> include/trace/define_trace.h:51:2: note: in expansion of macro 'DEFINE_TRACE'
     DEFINE_TRACE(name)
     ^~~~~~~~~~~~

vim +37 include/trace/events/tcp.h

e086101b Cong Wang   2017-10-13  12  
e8fce239 Song Liu    2017-10-23  13  #define tcp_state_name(state)	{ state, #state }
e8fce239 Song Liu    2017-10-23  14  #define show_tcp_state_name(val)			\
e8fce239 Song Liu    2017-10-23  15  	__print_symbolic(val,				\
e8fce239 Song Liu    2017-10-23  16  		tcp_state_name(TCP_ESTABLISHED),	\
e8fce239 Song Liu    2017-10-23  17  		tcp_state_name(TCP_SYN_SENT),		\
e8fce239 Song Liu    2017-10-23  18  		tcp_state_name(TCP_SYN_RECV),		\
e8fce239 Song Liu    2017-10-23  19  		tcp_state_name(TCP_FIN_WAIT1),		\
e8fce239 Song Liu    2017-10-23  20  		tcp_state_name(TCP_FIN_WAIT2),		\
e8fce239 Song Liu    2017-10-23  21  		tcp_state_name(TCP_TIME_WAIT),		\
e8fce239 Song Liu    2017-10-23  22  		tcp_state_name(TCP_CLOSE),		\
e8fce239 Song Liu    2017-10-23  23  		tcp_state_name(TCP_CLOSE_WAIT),		\
e8fce239 Song Liu    2017-10-23  24  		tcp_state_name(TCP_LAST_ACK),		\
e8fce239 Song Liu    2017-10-23  25  		tcp_state_name(TCP_LISTEN),		\
e8fce239 Song Liu    2017-10-23  26  		tcp_state_name(TCP_CLOSING),		\
e8fce239 Song Liu    2017-10-23  27  		tcp_state_name(TCP_NEW_SYN_RECV))
e8fce239 Song Liu    2017-10-23  28  
f6e37b25 Song Liu    2017-10-23  29  /*
f6e37b25 Song Liu    2017-10-23  30   * tcp event with arguments sk and skb
f6e37b25 Song Liu    2017-10-23  31   *
f6e37b25 Song Liu    2017-10-23  32   * Note: this class requires a valid sk pointer; while skb pointer could
f6e37b25 Song Liu    2017-10-23  33   *       be NULL.
f6e37b25 Song Liu    2017-10-23  34   */
f6e37b25 Song Liu    2017-10-23 @35  DECLARE_EVENT_CLASS(tcp_event_sk_skb,
e086101b Cong Wang   2017-10-13  36  
7344e29f Song Liu    2017-10-23 @37  	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
e086101b Cong Wang   2017-10-13  38  
e086101b Cong Wang   2017-10-13  39  	TP_ARGS(sk, skb),
e086101b Cong Wang   2017-10-13  40  
e086101b Cong Wang   2017-10-13  41  	TP_STRUCT__entry(
7344e29f Song Liu    2017-10-23  42  		__field(const void *, skbaddr)
7344e29f Song Liu    2017-10-23  43  		__field(const void *, skaddr)
e086101b Cong Wang   2017-10-13  44  		__field(__u16, sport)
e086101b Cong Wang   2017-10-13  45  		__field(__u16, dport)
e086101b Cong Wang   2017-10-13  46  		__array(__u8, saddr, 4)
e086101b Cong Wang   2017-10-13  47  		__array(__u8, daddr, 4)
e086101b Cong Wang   2017-10-13  48  		__array(__u8, saddr_v6, 16)
e086101b Cong Wang   2017-10-13  49  		__array(__u8, daddr_v6, 16)
e086101b Cong Wang   2017-10-13  50  	),
e086101b Cong Wang   2017-10-13  51  
e086101b Cong Wang   2017-10-13  52  	TP_fast_assign(
e086101b Cong Wang   2017-10-13  53  		struct inet_sock *inet = inet_sk(sk);
e086101b Cong Wang   2017-10-13  54  		struct in6_addr *pin6;
e086101b Cong Wang   2017-10-13  55  		__be32 *p32;
e086101b Cong Wang   2017-10-13  56  
e086101b Cong Wang   2017-10-13  57  		__entry->skbaddr = skb;
e086101b Cong Wang   2017-10-13  58  		__entry->skaddr = sk;
e086101b Cong Wang   2017-10-13  59  
e086101b Cong Wang   2017-10-13  60  		__entry->sport = ntohs(inet->inet_sport);
e086101b Cong Wang   2017-10-13  61  		__entry->dport = ntohs(inet->inet_dport);
e086101b Cong Wang   2017-10-13  62  
e086101b Cong Wang   2017-10-13  63  		p32 = (__be32 *) __entry->saddr;
e086101b Cong Wang   2017-10-13  64  		*p32 = inet->inet_saddr;
e086101b Cong Wang   2017-10-13  65  
e086101b Cong Wang   2017-10-13  66  		p32 = (__be32 *) __entry->daddr;
e086101b Cong Wang   2017-10-13  67  		*p32 =  inet->inet_daddr;
e086101b Cong Wang   2017-10-13  68  
89005678 David Ahern 2017-10-18  69  #if IS_ENABLED(CONFIG_IPV6)
89005678 David Ahern 2017-10-18  70  		if (sk->sk_family == AF_INET6) {
e086101b Cong Wang   2017-10-13  71  			pin6 = (struct in6_addr *)__entry->saddr_v6;
386fd5da David Ahern 2017-10-16  72  			*pin6 = sk->sk_v6_rcv_saddr;
e086101b Cong Wang   2017-10-13  73  			pin6 = (struct in6_addr *)__entry->daddr_v6;
386fd5da David Ahern 2017-10-16  74  			*pin6 = sk->sk_v6_daddr;
89005678 David Ahern 2017-10-18  75  		} else
89005678 David Ahern 2017-10-18  76  #endif
89005678 David Ahern 2017-10-18  77  		{
e086101b Cong Wang   2017-10-13  78  			pin6 = (struct in6_addr *)__entry->saddr_v6;
e086101b Cong Wang   2017-10-13  79  			ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
e086101b Cong Wang   2017-10-13  80  			pin6 = (struct in6_addr *)__entry->daddr_v6;
e086101b Cong Wang   2017-10-13  81  			ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
e086101b Cong Wang   2017-10-13  82  		}
e086101b Cong Wang   2017-10-13  83  	),
e086101b Cong Wang   2017-10-13  84  
fb6ff75e David Ahern 2017-10-16  85  	TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
e086101b Cong Wang   2017-10-13  86  		  __entry->sport, __entry->dport, __entry->saddr, __entry->daddr,
e086101b Cong Wang   2017-10-13  87  		  __entry->saddr_v6, __entry->daddr_v6)
e086101b Cong Wang   2017-10-13  88  );
e086101b Cong Wang   2017-10-13  89  
f6e37b25 Song Liu    2017-10-23 @90  DEFINE_EVENT(tcp_event_sk_skb, tcp_retransmit_skb,
f6e37b25 Song Liu    2017-10-23  91  
7344e29f Song Liu    2017-10-23  92  	TP_PROTO(const struct sock *sk, const struct sk_buff *skb),
f6e37b25 Song Liu    2017-10-23  93  
f6e37b25 Song Liu    2017-10-23  94  	TP_ARGS(sk, skb)
f6e37b25 Song Liu    2017-10-23  95  );
f6e37b25 Song Liu    2017-10-23  96  

:::::: The code at line 37 was first introduced by commit
:::::: 7344e29f285a94b965075599731811c352f3ab40 tcp: mark trace event arguments sk and skb as const

:::::: TO: Song Liu <songliubraving@fb.com>
:::::: CC: David S. Miller <davem@davemloft.net>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 47602 bytes --]

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox