Netdev List
 help / color / mirror / Atom feed
* [PATCH v3 next-queue 06/10] ixgbe: restore offloaded SAs after a reset
From: Shannon Nelson @ 2017-12-19 23:59 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

On a chip reset most of the table contents are lost, so must be
restored.  This scans the driver's ipsec tables and restores both
the filled and empty table slots to their pre-reset values.

v2: during restore, clean the tables before restarting

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h       |  2 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 41 ++++++++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |  1 +
 3 files changed, 44 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 8f41508..af690c2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -1010,8 +1010,10 @@ s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
 #ifdef CONFIG_XFRM_OFFLOAD
 void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
 void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter);
+void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
 #else
 static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { };
 static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { };
 #endif /* CONFIG_XFRM_OFFLOAD */
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 96971ce..9cf120d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -299,6 +299,47 @@ static void ixgbe_ipsec_start_engine(struct ixgbe_adapter *adapter)
 }
 
 /**
+ * ixgbe_ipsec_restore - restore the ipsec HW settings after a reset
+ * @adapter: board private structure
+ **/
+void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int i;
+
+	if (!(adapter->flags2 & IXGBE_FLAG2_IPSEC_ENABLED))
+		return;
+
+	/* clean up and restart the engine */
+	ixgbe_ipsec_stop_engine(adapter);
+	ixgbe_ipsec_clear_hw_tables(adapter);
+	ixgbe_ipsec_start_engine(adapter);
+
+	/* reload the IP addrs */
+	for (i = 0; i < IXGBE_IPSEC_MAX_RX_IP_COUNT; i++) {
+		struct rx_ip_sa *ipsa = &ipsec->ip_tbl[i];
+
+		if (ipsa->used)
+			ixgbe_ipsec_set_rx_ip(hw, i, ipsa->ipaddr);
+	}
+
+	/* reload the Rx and Tx keys */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+		struct rx_sa *rsa = &ipsec->rx_tbl[i];
+		struct tx_sa *tsa = &ipsec->tx_tbl[i];
+
+		if (rsa->used)
+			ixgbe_ipsec_set_rx_sa(hw, i, rsa->xs->id.spi,
+					      rsa->key, rsa->salt,
+					      rsa->mode, rsa->iptbl_ind);
+
+		if (tsa->used)
+			ixgbe_ipsec_set_tx_sa(hw, i, tsa->key, tsa->salt);
+	}
+}
+
+/**
  * ixgbe_ipsec_find_empty_idx - find the first unused security parameter index
  * @ipsec: pointer to ipsec struct
  * @rxtable: true if we need to look in the Rx table
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2b3da0c..04e8b26 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -5347,6 +5347,7 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
 
 	ixgbe_set_rx_mode(adapter->netdev);
 	ixgbe_restore_vlan(adapter);
+	ixgbe_ipsec_restore(adapter);
 
 	switch (hw->mac.type) {
 	case ixgbe_mac_82599EB:
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 next-queue 03/10] ixgbe: add ipsec engine start and stop routines
From: Shannon Nelson @ 2017-12-19 23:59 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

Add in the code for running and stopping the hardware ipsec
encryption/decryption engine.  It is good to keep the engine
off when not in use in order to save on the power draw.

v2: add limiter to do-while loop waiting for paths to drain

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 142 +++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index bd7585f..85eaafc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -152,10 +152,152 @@ static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
 }
 
 /**
+ * ixgbe_ipsec_stop_data
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_stop_data(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	bool link = adapter->link_up;
+	u32 t_rdy, r_rdy;
+	u32 limit;
+	u32 reg;
+
+	/* halt data paths */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+	reg |= IXGBE_SECTXCTRL_TX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+	reg |= IXGBE_SECRXCTRL_RX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg);
+
+	IXGBE_WRITE_FLUSH(hw);
+
+	/* If the tx fifo doesn't have link, but still has data,
+	 * we can't clear the tx sec block.  Set the MAC loopback
+	 * before block clear
+	 */
+	if (!link) {
+		reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+		reg |= IXGBE_MACC_FLU;
+		IXGBE_WRITE_REG(hw, IXGBE_MACC, reg);
+
+		reg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+		reg |= IXGBE_HLREG0_LPBK;
+		IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg);
+
+		IXGBE_WRITE_FLUSH(hw);
+		mdelay(3);
+	}
+
+	/* wait for the paths to empty */
+	limit = 20;
+	do {
+		mdelay(10);
+		t_rdy = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT) &
+			IXGBE_SECTXSTAT_SECTX_RDY;
+		r_rdy = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT) &
+			IXGBE_SECRXSTAT_SECRX_RDY;
+	} while (!t_rdy && !r_rdy && limit--);
+
+	/* undo loopback if we played with it earlier */
+	if (!link) {
+		reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+		reg &= ~IXGBE_MACC_FLU;
+		IXGBE_WRITE_REG(hw, IXGBE_MACC, reg);
+
+		reg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+		reg &= ~IXGBE_HLREG0_LPBK;
+		IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg);
+
+		IXGBE_WRITE_FLUSH(hw);
+	}
+}
+
+/**
+ * ixgbe_ipsec_stop_engine
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_stop_engine(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg;
+
+	ixgbe_ipsec_stop_data(adapter);
+
+	/* disable Rx and Tx SA lookup */
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, 0);
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, 0);
+
+	/* disable the Rx and Tx engines and full packet store-n-forward */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+	reg |= IXGBE_SECTXCTRL_SECTX_DIS;
+	reg &= ~IXGBE_SECTXCTRL_STORE_FORWARD;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg);
+
+	reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+	reg |= IXGBE_SECRXCTRL_SECRX_DIS;
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg);
+
+	/* restore the "tx security buffer almost full threshold" to 0x250 */
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, 0x250);
+
+	/* Set minimum IFG between packets back to the default 0x1 */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+	reg = (reg & 0xfffffff0) | 0x1;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
+	/* final set for normal (no ipsec offload) processing */
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_SECTX_DIS);
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, IXGBE_SECRXCTRL_SECRX_DIS);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_start_engine
+ * @adapter: board private structure
+ *
+ * NOTE: this increases power consumption whether being used or not
+ **/
+static void ixgbe_ipsec_start_engine(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg;
+
+	ixgbe_ipsec_stop_data(adapter);
+
+	/* Set minimum IFG between packets to 3 */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+	reg = (reg & 0xfffffff0) | 0x3;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
+	/* Set "tx security buffer almost full threshold" to 0x15 so that the
+	 * almost full indication is generated only after buffer contains at
+	 * least an entire jumbo packet.
+	 */
+	reg = IXGBE_READ_REG(hw, IXGBE_SECTXBUFFAF);
+	reg = (reg & 0xfffffc00) | 0x15;
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, reg);
+
+	/* restart the data paths by clearing the DISABLE bits */
+	IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, 0);
+	IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_STORE_FORWARD);
+
+	/* enable Rx and Tx SA lookup */
+	IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, IXGBE_RXTXIDX_IPS_EN);
+	IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, IXGBE_RXTXIDX_IPS_EN);
+
+	IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
  * ixgbe_init_ipsec_offload - initialize security registers for IPSec operation
  * @adapter: board private structure
  **/
 void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
 {
 	ixgbe_ipsec_clear_hw_tables(adapter);
+	ixgbe_ipsec_stop_engine(adapter);
 }
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 next-queue 01/10] ixgbe: clean up ipsec defines
From: Shannon Nelson @ 2017-12-19 23:59 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

Clean up the ipsec/macsec descriptor bit definitions to match the rest
of the defines and file organization.  Also recognise the bit-definition
overlap in the error mask macro.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index ffa0ee5..3df0763 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -2321,11 +2321,6 @@ enum {
 #define IXGBE_TXD_CMD_VLE    0x40000000 /* Add VLAN tag */
 #define IXGBE_TXD_STAT_DD    0x00000001 /* Descriptor Done */
 
-#define IXGBE_RXDADV_IPSEC_STATUS_SECP                  0x00020000
-#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL       0x08000000
-#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_LENGTH         0x10000000
-#define IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED            0x18000000
-#define IXGBE_RXDADV_IPSEC_ERROR_BIT_MASK               0x18000000
 /* Multiple Transmit Queue Command Register */
 #define IXGBE_MTQC_RT_ENA       0x1 /* DCB Enable */
 #define IXGBE_MTQC_VT_ENA       0x2 /* VMDQ2 Enable */
@@ -2377,6 +2372,9 @@ enum {
 #define IXGBE_RXDADV_ERR_LE     0x02000000 /* Length Error */
 #define IXGBE_RXDADV_ERR_PE     0x08000000 /* Packet Error */
 #define IXGBE_RXDADV_ERR_OSE    0x10000000 /* Oversize Error */
+#define IXGBE_RXDADV_ERR_IPSEC_INV_PROTOCOL  0x08000000 /* overlap ERR_PE  */
+#define IXGBE_RXDADV_ERR_IPSEC_INV_LENGTH    0x10000000 /* overlap ERR_OSE */
+#define IXGBE_RXDADV_ERR_IPSEC_AUTH_FAILED   0x18000000
 #define IXGBE_RXDADV_ERR_USE    0x20000000 /* Undersize Error */
 #define IXGBE_RXDADV_ERR_TCPE   0x40000000 /* TCP/UDP Checksum Error */
 #define IXGBE_RXDADV_ERR_IPE    0x80000000 /* IP Checksum Error */
@@ -2398,6 +2396,7 @@ enum {
 #define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP 0x00000020 /* 10: Recv. FCP_RSP */
 #define IXGBE_RXDADV_STAT_FCSTAT_DDP    0x00000030 /* 11: Ctxt w/ DDP */
 #define IXGBE_RXDADV_STAT_TS		0x00010000 /* IEEE 1588 Time Stamp */
+#define IXGBE_RXDADV_STAT_SECP          0x00020000 /* IPsec/MACsec pkt found */
 
 /* PSRTYPE bit definitions */
 #define IXGBE_PSRTYPE_TCPHDR    0x00000010
@@ -2464,13 +2463,6 @@ enum {
 #define IXGBE_RXDADV_PKTTYPE_ETQF_MASK  0x00000070 /* ETQF has 8 indices */
 #define IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT 4          /* Right-shift 4 bits */
 
-/* Security Processing bit Indication */
-#define IXGBE_RXDADV_LNKSEC_STATUS_SECP         0x00020000
-#define IXGBE_RXDADV_LNKSEC_ERROR_NO_SA_MATCH   0x08000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_REPLAY_ERROR  0x10000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_BIT_MASK      0x18000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG       0x18000000
-
 /* Masks to determine if packets should be dropped due to frame errors */
 #define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \
 				      IXGBE_RXD_ERR_CE | \
@@ -2484,6 +2476,8 @@ enum {
 				      IXGBE_RXDADV_ERR_LE | \
 				      IXGBE_RXDADV_ERR_PE | \
 				      IXGBE_RXDADV_ERR_OSE | \
+				      IXGBE_RXDADV_ERR_IPSEC_INV_PROTOCOL | \
+				      IXGBE_RXDADV_ERR_IPSEC_INV_LENGTH | \
 				      IXGBE_RXDADV_ERR_USE)
 
 /* Multicast bit mask */
@@ -2893,6 +2887,7 @@ struct ixgbe_adv_tx_context_desc {
 				 IXGBE_ADVTXD_POPTS_SHIFT)
 #define IXGBE_ADVTXD_POPTS_TXSM (IXGBE_TXD_POPTS_TXSM << \
 				 IXGBE_ADVTXD_POPTS_SHIFT)
+#define IXGBE_ADVTXD_POPTS_IPSEC     0x00000400 /* IPSec offload request */
 #define IXGBE_ADVTXD_POPTS_ISCO_1ST  0x00000000 /* 1st TSO of iSCSI PDU */
 #define IXGBE_ADVTXD_POPTS_ISCO_MDL  0x00000800 /* Middle TSO of iSCSI PDU */
 #define IXGBE_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */
@@ -2908,7 +2903,6 @@ struct ixgbe_adv_tx_context_desc {
 #define IXGBE_ADVTXD_TUCMD_L4T_SCTP  0x00001000  /* L4 Packet TYPE of SCTP */
 #define IXGBE_ADVTXD_TUCMD_L4T_RSV     0x00001800 /* RSV L4 Packet TYPE */
 #define IXGBE_ADVTXD_TUCMD_MKRREQ    0x00002000 /*Req requires Markers and CRC*/
-#define IXGBE_ADVTXD_POPTS_IPSEC      0x00000400 /* IPSec offload request */
 #define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */
 #define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */
 #define IXGBE_ADVTXT_TUCMD_FCOE      0x00008000       /* FCoE Frame Type */
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH v2] hv_netvsc: automatically name slave VF network device
From: Jakub Kicinski @ 2017-12-20  0:02 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev, Stephen Hemminger
In-Reply-To: <20171219235930.16916-1-sthemmin@microsoft.com>

On Tue, 19 Dec 2017 15:59:30 -0800, Stephen Hemminger wrote:
> Rename the VF device to ethX_vf based on the ethX as the
> synthetic device.  This eliminates the need for delay on setup,
> and the PCI (udev based) naming is not reproducible on Hyper-V
> anyway. The name of the VF does not matter since all control
> operations take place the primary device. It does make the
> user experience better to associate the names.
> 
> Based on feedback from all.systems.go talk.
> 
> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>

I think you must have hit send before seeing my last reply on v1...

^ permalink raw reply

* [PATCH v3 next-queue 07/10] ixgbe: process the Rx ipsec offload
From: Shannon Nelson @ 2017-12-19 23:59 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

If the chip sees and decrypts an ipsec offload, set up the skb
sp pointer with the ralated SA info.  Since the chip is rude
enough to keep to itself the table index it used for the
decryption, we have to do our own table lookup, using the
hash for speed.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h       |  6 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 89 ++++++++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |  3 +
 3 files changed, 98 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index af690c2..a094b23 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -1011,9 +1011,15 @@ s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
 void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
 void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter);
 void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
+void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+		    union ixgbe_adv_rx_desc *rx_desc,
+		    struct sk_buff *skb);
 #else
 static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { };
 static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { };
 static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+				  union ixgbe_adv_rx_desc *rx_desc,
+				  struct sk_buff *skb) { };
 #endif /* CONFIG_XFRM_OFFLOAD */
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 9cf120d..a9b8f5c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -374,6 +374,35 @@ static int ixgbe_ipsec_find_empty_idx(struct ixgbe_ipsec *ipsec, bool rxtable)
 }
 
 /**
+ * ixgbe_ipsec_find_rx_state - find the state that matches
+ * @ipsec: pointer to ipsec struct
+ * @daddr: inbound address to match
+ * @proto: protocol to match
+ * @spi: SPI to match
+ *
+ * Returns a pointer to the matching SA state information
+ **/
+static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec,
+						    __be32 daddr, u8 proto,
+						    __be32 spi)
+{
+	struct rx_sa *rsa;
+	struct xfrm_state *ret = NULL;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist, spi)
+		if (spi == rsa->xs->id.spi &&
+		    daddr == rsa->xs->id.daddr.a4 &&
+		    proto == rsa->xs->id.proto) {
+			ret = rsa->xs;
+			xfrm_state_hold(ret);
+			break;
+		}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
  * ixgbe_ipsec_parse_proto_keys - find the key and salt based on the protocol
  * @xs: pointer to xfrm_state struct
  * @mykey: pointer to key array to populate
@@ -672,6 +701,66 @@ static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
 };
 
 /**
+ * ixgbe_ipsec_rx - decode ipsec bits from Rx descriptor
+ * @rx_ring: receiving ring
+ * @rx_desc: receive data descriptor
+ * @skb: current data packet
+ *
+ * Determine if there was an ipsec encapsulation noticed, and if so set up
+ * the resulting status for later in the receive stack.
+ **/
+void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+		    union ixgbe_adv_rx_desc *rx_desc,
+		    struct sk_buff *skb)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(rx_ring->netdev);
+	u16 pkt_info = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info);
+	u16 ipsec_pkt_types = IXGBE_RXDADV_PKTTYPE_IPSEC_AH |
+				IXGBE_RXDADV_PKTTYPE_IPSEC_ESP;
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_offload *xo = NULL;
+	struct xfrm_state *xs = NULL;
+	struct iphdr *iph;
+	u8 *c_hdr;
+	__be32 spi;
+	u8 proto;
+
+	/* we can assume no vlan header in the way, b/c the
+	 * hw won't recognize the IPsec packet and anyway the
+	 * currently vlan device doesn't support xfrm offload.
+	 */
+	/* TODO: not supporting IPv6 yet */
+	iph = (struct iphdr *)(skb->data + ETH_HLEN);
+	c_hdr = (u8 *)iph + iph->ihl * 4;
+	switch (pkt_info & ipsec_pkt_types) {
+	case IXGBE_RXDADV_PKTTYPE_IPSEC_AH:
+		spi = ((struct ip_auth_hdr *)c_hdr)->spi;
+		proto = IPPROTO_AH;
+		break;
+	case IXGBE_RXDADV_PKTTYPE_IPSEC_ESP:
+		spi = ((struct ip_esp_hdr *)c_hdr)->spi;
+		proto = IPPROTO_ESP;
+		break;
+	default:
+		return;
+	}
+
+	xs = ixgbe_ipsec_find_rx_state(ipsec, iph->daddr, proto, spi);
+	if (unlikely(!xs))
+		return;
+
+	skb->sp = secpath_dup(skb->sp);
+	if (unlikely(!skb->sp))
+		return;
+
+	skb->sp->xvec[skb->sp->len++] = xs;
+	skb->sp->olen++;
+	xo = xfrm_offload(skb);
+	xo->flags = CRYPTO_DONE;
+	xo->status = CRYPTO_SUCCESS;
+}
+
+/**
  * ixgbe_init_ipsec_offload - initialize security registers for IPSec operation
  * @adapter: board private structure
  **/
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 04e8b26..0ee1e5e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1755,6 +1755,9 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
 
 	skb_record_rx_queue(skb, rx_ring->queue_index);
 
+	if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
+		ixgbe_ipsec_rx(rx_ring, rx_desc, skb);
+
 	skb->protocol = eth_type_trans(skb, dev);
 }
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 next-queue 08/10] ixgbe: process the Tx ipsec offload
From: Shannon Nelson @ 2017-12-20  0:00 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

If the skb has a security association referenced in the skb, then
set up the Tx descriptor with the ipsec offload bits.  While we're
here, we fix an oddly named field in the context descriptor struct.

v3: added ifdef CONFIG_XFRM_OFFLOAD check around call to ixgbe_ipsec_tx()

v2: use ihl != 5
    move the ixgbe_ipsec_tx() call to near the call to ixgbe_tso()
    drop the ipsec packet if the tx offload setup fails
    simplify the ixgbe_ipsec_tx() parameters by using 'first'
    leave out the ixgbe_tso() changes since we don't support TSO
       with ipsec yet.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h       | 10 +++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 79 ++++++++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c   |  4 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  | 26 +++++++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h  |  2 +-
 5 files changed, 112 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index a094b23..3d2b7bf 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -171,10 +171,11 @@ enum ixgbe_tx_flags {
 	IXGBE_TX_FLAGS_CC	= 0x08,
 	IXGBE_TX_FLAGS_IPV4	= 0x10,
 	IXGBE_TX_FLAGS_CSUM	= 0x20,
+	IXGBE_TX_FLAGS_IPSEC	= 0x40,
 
 	/* software defined flags */
-	IXGBE_TX_FLAGS_SW_VLAN	= 0x40,
-	IXGBE_TX_FLAGS_FCOE	= 0x80,
+	IXGBE_TX_FLAGS_SW_VLAN	= 0x80,
+	IXGBE_TX_FLAGS_FCOE	= 0x100,
 };
 
 /* VLAN info */
@@ -1014,6 +1015,8 @@ void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
 void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
 		    union ixgbe_adv_rx_desc *rx_desc,
 		    struct sk_buff *skb);
+int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first,
+		   struct ixgbe_ipsec_tx_data *itd);
 #else
 static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { };
 static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { };
@@ -1021,5 +1024,8 @@ static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { };
 static inline void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
 				  union ixgbe_adv_rx_desc *rx_desc,
 				  struct sk_buff *skb) { };
+static inline int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
+				 struct ixgbe_tx_buffer *first,
+				 struct ixgbe_ipsec_tx_data *itd) { return 0; };
 #endif /* CONFIG_XFRM_OFFLOAD */
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index a9b8f5c..c2fd2ac 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -695,12 +695,91 @@ static void ixgbe_ipsec_del_sa(struct xfrm_state *xs)
 	}
 }
 
+/**
+ * ixgbe_ipsec_offload_ok - can this packet use the xfrm hw offload
+ * @skb: current data packet
+ * @xs: pointer to transformer state struct
+ **/
+static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
+{
+	if (xs->props.family == AF_INET) {
+		/* Offload with IPv4 options is not supported yet */
+		if (ip_hdr(skb)->ihl != 5)
+			return false;
+	} else {
+		/* Offload with IPv6 extension headers is not support yet */
+		if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+			return false;
+	}
+
+	return true;
+}
+
 static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
 	.xdo_dev_state_add = ixgbe_ipsec_add_sa,
 	.xdo_dev_state_delete = ixgbe_ipsec_del_sa,
+	.xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
 };
 
 /**
+ * ixgbe_ipsec_tx - setup Tx flags for ipsec offload
+ * @tx_ring: outgoing context
+ * @first: current data packet
+ * @itd: ipsec Tx data for later use in building context descriptor
+ **/
+int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
+		   struct ixgbe_tx_buffer *first,
+		   struct ixgbe_ipsec_tx_data *itd)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev);
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_state *xs;
+	struct tx_sa *tsa;
+
+	if (!first->skb->sp->len) {
+		netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
+			   __func__, first->skb->sp->len);
+		return 0;
+	}
+
+	xs = xfrm_input_state(first->skb);
+	if (!xs) {
+		netdev_err(tx_ring->netdev, "%s: no xfrm_input_state() xs = %p\n",
+			   __func__, xs);
+		return 0;
+	}
+
+	itd->sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+	if (itd->sa_idx > IXGBE_IPSEC_MAX_SA_COUNT) {
+		netdev_err(tx_ring->netdev, "%s: bad sa_idx=%d handle=%lu\n",
+			   __func__, itd->sa_idx, xs->xso.offload_handle);
+		return 0;
+	}
+
+	tsa = &ipsec->tx_tbl[itd->sa_idx];
+	if (!tsa->used) {
+		netdev_err(tx_ring->netdev, "%s: unused sa_idx=%d\n",
+			   __func__, itd->sa_idx);
+		return 0;
+	}
+
+	first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC;
+
+	itd->flags = 0;
+	if (xs->id.proto == IPPROTO_ESP) {
+		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
+			      IXGBE_ADVTXD_TUCMD_L4T_TCP;
+		if (first->protocol == htons(ETH_P_IP))
+			itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
+		itd->trailer_len = xs->props.trailer_len;
+	}
+	if (tsa->encrypt)
+		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
+
+	return 1;
+}
+
+/**
  * ixgbe_ipsec_rx - decode ipsec bits from Rx descriptor
  * @rx_ring: receiving ring
  * @rx_desc: receive data descriptor
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index f1bfae0..d7875b3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -1261,7 +1261,7 @@ void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
 }
 
 void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens,
-		       u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx)
+		       u32 fceof_saidx, u32 type_tucmd, u32 mss_l4len_idx)
 {
 	struct ixgbe_adv_tx_context_desc *context_desc;
 	u16 i = tx_ring->next_to_use;
@@ -1275,7 +1275,7 @@ void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens,
 	type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
 
 	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
-	context_desc->seqnum_seed	= cpu_to_le32(fcoe_sof_eof);
+	context_desc->fceof_saidx	= cpu_to_le32(fceof_saidx);
 	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
 	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0ee1e5e..6b9a9b2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7756,10 +7756,12 @@ static inline bool ixgbe_ipv6_csum_is_sctp(struct sk_buff *skb)
 }
 
 static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
-			  struct ixgbe_tx_buffer *first)
+			  struct ixgbe_tx_buffer *first,
+			  struct ixgbe_ipsec_tx_data *itd)
 {
 	struct sk_buff *skb = first->skb;
 	u32 vlan_macip_lens = 0;
+	u32 fceof_saidx = 0;
 	u32 type_tucmd = 0;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -7800,7 +7802,12 @@ static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
 	vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
-	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, 0);
+	if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) {
+		fceof_saidx |= itd->sa_idx;
+		type_tucmd |= itd->flags | itd->trailer_len;
+	}
+
+	ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0);
 }
 
 #define IXGBE_SET_FLAG(_input, _flag, _result) \
@@ -7843,11 +7850,16 @@ static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc,
 					IXGBE_TX_FLAGS_CSUM,
 					IXGBE_ADVTXD_POPTS_TXSM);
 
-	/* enble IPv4 checksum for TSO */
+	/* enable IPv4 checksum for TSO */
 	olinfo_status |= IXGBE_SET_FLAG(tx_flags,
 					IXGBE_TX_FLAGS_IPV4,
 					IXGBE_ADVTXD_POPTS_IXSM);
 
+	/* enable IPsec */
+	olinfo_status |= IXGBE_SET_FLAG(tx_flags,
+					IXGBE_TX_FLAGS_IPSEC,
+					IXGBE_ADVTXD_POPTS_IPSEC);
+
 	/*
 	 * Check Context must be set if Tx switch is enabled, which it
 	 * always is for case where virtual functions are running
@@ -8306,6 +8318,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 	u32 tx_flags = 0;
 	unsigned short f;
 	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	struct ixgbe_ipsec_tx_data ipsec_tx = { 0 };
 	__be16 protocol = skb->protocol;
 	u8 hdr_len = 0;
 
@@ -8410,11 +8423,16 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 	}
 
 #endif /* IXGBE_FCOE */
+
+#ifdef CONFIG_XFRM_OFFLOAD
+	if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
+		goto out_drop;
+#endif
 	tso = ixgbe_tso(tx_ring, first, &hdr_len);
 	if (tso < 0)
 		goto out_drop;
 	else if (!tso)
-		ixgbe_tx_csum(tx_ring, first);
+		ixgbe_tx_csum(tx_ring, first, &ipsec_tx);
 
 	/* add the ATR filter if ATR is on */
 	if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state))
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 3df0763..0ac725fa 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -2856,7 +2856,7 @@ union ixgbe_adv_rx_desc {
 /* Context descriptors */
 struct ixgbe_adv_tx_context_desc {
 	__le32 vlan_macip_lens;
-	__le32 seqnum_seed;
+	__le32 fceof_saidx;
 	__le32 type_tucmd_mlhl;
 	__le32 mss_l4len_idx;
 };
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 next-queue 09/10] ixgbe: ipsec offload stats
From: Shannon Nelson @ 2017-12-20  0:00 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

Add a simple statistic to count the ipsec offloads.

v2: change per ring counter to adapter rx and tx counters
    move tx_ipsec count to the tx clean code

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h         | 2 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c   | 2 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    | 5 ++++-
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 3d2b7bf..1dfe147 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -629,10 +629,12 @@ struct ixgbe_adapter {
 	int num_tx_queues;
 	u16 tx_itr_setting;
 	u16 tx_work_limit;
+	u64 tx_ipsec;
 
 	/* Rx fast path data */
 	int num_rx_queues;
 	u16 rx_itr_setting;
+	u64 rx_ipsec;
 
 	/* Port number used to identify VXLAN traffic */
 	__be16 vxlan_port;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index c3e7a81..bcf011e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -114,6 +114,8 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
 	{"tx_hwtstamp_timeouts", IXGBE_STAT(tx_hwtstamp_timeouts)},
 	{"tx_hwtstamp_skipped", IXGBE_STAT(tx_hwtstamp_skipped)},
 	{"rx_hwtstamp_cleared", IXGBE_STAT(rx_hwtstamp_cleared)},
+	{"tx_ipsec", IXGBE_STAT(tx_ipsec)},
+	{"rx_ipsec", IXGBE_STAT(rx_ipsec)},
 #ifdef IXGBE_FCOE
 	{"fcoe_bad_fccrc", IXGBE_STAT(stats.fccrc)},
 	{"rx_fcoe_dropped", IXGBE_STAT(stats.fcoerpdc)},
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index c2fd2ac..4b16466 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -837,6 +837,8 @@ void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
 	xo = xfrm_offload(skb);
 	xo->flags = CRYPTO_DONE;
 	xo->status = CRYPTO_SUCCESS;
+
+	adapter->rx_ipsec++;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 6b9a9b2..8c88d32 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1173,7 +1173,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 	struct ixgbe_adapter *adapter = q_vector->adapter;
 	struct ixgbe_tx_buffer *tx_buffer;
 	union ixgbe_adv_tx_desc *tx_desc;
-	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0;
 	unsigned int budget = q_vector->tx.work_limit;
 	unsigned int i = tx_ring->next_to_clean;
 
@@ -1204,6 +1204,8 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 		/* update the statistics for this packet */
 		total_bytes += tx_buffer->bytecount;
 		total_packets += tx_buffer->gso_segs;
+		if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC)
+			total_ipsec++;
 
 		/* free the skb */
 		if (ring_is_xdp(tx_ring))
@@ -1266,6 +1268,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 	u64_stats_update_end(&tx_ring->syncp);
 	q_vector->tx.total_bytes += total_bytes;
 	q_vector->tx.total_packets += total_packets;
+	adapter->tx_ipsec += total_ipsec;
 
 	if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
 		/* schedule immediate reset if we believe we hung */
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 next-queue 00/10] ixgbe: Add ipsec offload
From: Shannon Nelson @ 2017-12-19 23:59 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev

This is an implementation of the ipsec hardware offload feature for
the ixgbe driver and Intel's 10Gbe series NICs: x540, x550, 82599.
These patches apply to net-next v4.14 as well as Jeff Kirsher's next-queue
v4.15-rc1-206-ge47375b.

The ixgbe NICs support ipsec offload for 1024 Rx and 1024 Tx Security
Associations (SAs), using up to 128 inbound IP addresses, and using the
rfc4106(gcm(aes)) encryption.  This code does not yet support IPv6,
checksum offload, or TSO in conjunction with the ipsec offload - those
will be added in the future.

This code shows improvements in both packet throughput and CPU utilization.
For example, here are some quicky numbers that show the magnitude of the
performance gain on a single run of "iperf -c <dest>" with the ipsec
offload on both ends of a point-to-point connection:

	9.4 Gbps - normal case
	7.6 Gbps - ipsec with offload
	343 Mbps - ipsec no offload

To set up a similar test case, you first need to be sure you have a recent
version of iproute2 that supports the ipsec offload tag, probably something
from ip 4.12 or newer would be best.  I have a shell script that builds
up the appropriate commands for me, but here are the resulting commands
for all tcp traffic between 14.0.0.52 and 14.0.0.70:

For the left side (14.0.0.52):
  ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 proto tcp tmpl \
     proto esp src 14.0.0.52 dst 14.0.0.70 spi 0x07 mode transport reqid 0x07
  ip x p add dir in src 14.0.0.70/24 dst 14.0.0.52/24 proto tcp tmpl \
     proto esp dst 14.0.0.52 src 14.0.0.70 spi 0x07 mode transport reqid 0x07
  ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 spi 0x07 mode transport \
     reqid 0x07 replay-window 32 \
     aead 'rfc4106(gcm(aes))' 0x44434241343332312423222114131211f4f3f2f1 128 \
     sel src 14.0.0.52/24 dst 14.0.0.70/24 proto tcp offload dev eth4 dir out
  ip x s add proto esp dst 14.0.0.52 src 14.0.0.70 spi 0x07 mode transport \
     reqid 0x07 replay-window 32 \
     aead 'rfc4106(gcm(aes))' 0x44434241343332312423222114131211f4f3f2f1 128 \
     sel src 14.0.0.70/24 dst 14.0.0.52/24 proto tcp offload dev eth4 dir in
 
For the right side (14.0.0.70):
  ip x p add dir out src 14.0.0.70/24 dst 14.0.0.52/24 proto tcp tmpl \
     proto esp src 14.0.0.70 dst 14.0.0.52 spi 0x07 mode transport reqid 0x07
  ip x p add dir in src 14.0.0.52/24 dst 14.0.0.70/24 proto tcp tmpl \
     proto esp dst 14.0.0.70 src 14.0.0.52 spi 0x07 mode transport reqid 0x07
  ip x s add proto esp src 14.0.0.70 dst 14.0.0.52 spi 0x07 mode transport \
     reqid 0x07 replay-window 32 \
     aead 'rfc4106(gcm(aes))' 0x44434241343332312423222114131211f4f3f2f1 128 \
     sel src 14.0.0.70/24 dst 14.0.0.52/24 proto tcp offload dev eth4 dir out
  ip x s add proto esp dst 14.0.0.70 src 14.0.0.52 spi 0x07 mode transport \
     reqid 0x07 replay-window 32 \
     aead 'rfc4106(gcm(aes))' 0x44434241343332312423222114131211f4f3f2f1 128 \
     sel src 14.0.0.52/24 dst 14.0.0.70/24 proto tcp offload dev eth4 dir in

In both cases, the command "ip x s flush ; ip x p flush" will clean
it all out and remove the offloads.

Lastly, thanks to Alex Duyck for his early comments.

Please see the individual patches for specific update info.

v3: fixes after comments from those wonderfully pesky kbuild robots
v2: fixes after comments from Alex

Shannon Nelson (10):
  ixgbe: clean up ipsec defines
  ixgbe: add ipsec register access routines
  ixgbe: add ipsec engine start and stop routines
  ixgbe: add ipsec data structures
  ixgbe: add ipsec offload add and remove SA
  ixgbe: restore offloaded SAs after a reset
  ixgbe: process the Rx ipsec offload
  ixgbe: process the Tx ipsec offload
  ixgbe: ipsec offload stats
  ixgbe: register ipsec offload with the xfrm subsystem

 drivers/net/ethernet/intel/ixgbe/Makefile        |   1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe.h         |  33 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |   2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c   | 923 +++++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h   |  92 +++
 drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c     |   4 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  39 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h    |  22 +-
 8 files changed, 1093 insertions(+), 23 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
 create mode 100644 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h

-- 
2.7.4

^ permalink raw reply

* [PATCH v3 next-queue 04/10] ixgbe: add ipsec data structures
From: Shannon Nelson @ 2017-12-19 23:59 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

Set up the data structures to be used by the ipsec offload.

v2: ipaddr[] becomes __be32
    increase the hash table size

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h       |  5 ++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h | 40 ++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 1e11462..9487750 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -622,6 +622,7 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG2_EEE_CAPABLE			BIT(14)
 #define IXGBE_FLAG2_EEE_ENABLED			BIT(15)
 #define IXGBE_FLAG2_RX_LEGACY			BIT(16)
+#define IXGBE_FLAG2_IPSEC_ENABLED		BIT(17)
 
 	/* Tx fast path data */
 	int num_tx_queues;
@@ -772,6 +773,10 @@ struct ixgbe_adapter {
 
 #define IXGBE_RSS_KEY_SIZE     40  /* size of RSS Hash Key in bytes */
 	u32 *rss_key;
+
+#ifdef CONFIG_XFRM
+	struct ixgbe_ipsec *ipsec;
+#endif /* CONFIG_XFRM */
 };
 
 static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
index 8fe8289..da3ce78 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -50,4 +50,44 @@ enum ixgbe_ipsec_tbl_sel {
 #define IXGBE_RXMOD_DECRYPT		0x00000008
 #define IXGBE_RXMOD_IPV6		0x00000010
 
+struct rx_sa {
+	struct hlist_node hlist;
+	struct xfrm_state *xs;
+	__be32 ipaddr[4];
+	u32 key[4];
+	u32 salt;
+	u32 mode;
+	u8  iptbl_ind;
+	bool used;
+	bool decrypt;
+};
+
+struct rx_ip_sa {
+	__be32 ipaddr[4];
+	u32 ref_cnt;
+	bool used;
+};
+
+struct tx_sa {
+	struct xfrm_state *xs;
+	u32 key[4];
+	u32 salt;
+	bool encrypt;
+	bool used;
+};
+
+struct ixgbe_ipsec_tx_data {
+	u32 flags;
+	u16 trailer_len;
+	u16 sa_idx;
+};
+
+struct ixgbe_ipsec {
+	u16 num_rx_sa;
+	u16 num_tx_sa;
+	struct rx_ip_sa *ip_tbl;
+	struct rx_sa *rx_tbl;
+	struct tx_sa *tx_tbl;
+	DECLARE_HASHTABLE(rx_sa_list, 10);
+};
 #endif /* _IXGBE_IPSEC_H_ */
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 next-queue 05/10] ixgbe: add ipsec offload add and remove SA
From: Shannon Nelson @ 2017-12-19 23:59 UTC (permalink / raw)
  To: intel-wired-lan, jeffrey.t.kirsher
  Cc: steffen.klassert, sowmini.varadhan, netdev
In-Reply-To: <1513728002-7643-1-git-send-email-shannon.nelson@oracle.com>

Add the functions for setting up and removing offloaded SAs (Security
Associations) with the x540 hardware.  We set up the callback structure
but we don't yet set the hardware feature bit to be sure the XFRM service
won't actually try to use us for an offload yet.

The software tables are made up to mimic the hardware tables to make it
easier to track what's in the hardware, and the SA table index is used
for the XFRM offload handle.  However, there is a hashing field in the
Rx SA tracking that will be used to facilitate faster table searches in
the Rx fast path.

v2: fix use of num_rx_sa that should be num_tx_sa
    change aes_gcm_name to a const array
    tighten up the key parsing code
    add another label to the init error handling
    move table deletion to a separate function

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h       |   2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 394 ++++++++++++++++++++++++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |   1 +
 3 files changed, 396 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 9487750..8f41508 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -1009,7 +1009,9 @@ s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
 		       u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
 #ifdef CONFIG_XFRM_OFFLOAD
 void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
+void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter);
 #else
 static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { };
 #endif /* CONFIG_XFRM_OFFLOAD */
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 85eaafc..96971ce 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -26,6 +26,8 @@
  ******************************************************************************/
 
 #include "ixgbe.h"
+#include <net/xfrm.h>
+#include <crypto/aead.h>
 
 /**
  * ixgbe_ipsec_set_tx_sa - set the Tx SA registers
@@ -131,6 +133,7 @@ static void ixgbe_ipsec_set_rx_ip(struct ixgbe_hw *hw, u16 idx, __be32 addr[])
  **/
 static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
 {
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 buf[4] = {0, 0, 0, 0};
 	u16 idx;
@@ -149,6 +152,9 @@ static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
 		ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
 		ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
 	}
+
+	ipsec->num_rx_sa = 0;
+	ipsec->num_tx_sa = 0;
 }
 
 /**
@@ -293,11 +299,397 @@ static void ixgbe_ipsec_start_engine(struct ixgbe_adapter *adapter)
 }
 
 /**
+ * ixgbe_ipsec_find_empty_idx - find the first unused security parameter index
+ * @ipsec: pointer to ipsec struct
+ * @rxtable: true if we need to look in the Rx table
+ *
+ * Returns the first unused index in either the Rx or Tx SA table
+ **/
+static int ixgbe_ipsec_find_empty_idx(struct ixgbe_ipsec *ipsec, bool rxtable)
+{
+	u32 i;
+
+	if (rxtable) {
+		if (ipsec->num_rx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+			return -ENOSPC;
+
+		/* search rx sa table */
+		for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+			if (!ipsec->rx_tbl[i].used)
+				return i;
+		}
+	} else {
+		if (ipsec->num_tx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+			return -ENOSPC;
+
+		/* search tx sa table */
+		for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+			if (!ipsec->tx_tbl[i].used)
+				return i;
+		}
+	}
+
+	return -ENOSPC;
+}
+
+/**
+ * ixgbe_ipsec_parse_proto_keys - find the key and salt based on the protocol
+ * @xs: pointer to xfrm_state struct
+ * @mykey: pointer to key array to populate
+ * @mysalt: pointer to salt value to populate
+ *
+ * This copies the protocol keys and salt to our own data tables.  The
+ * 82599 family only supports the one algorithm.
+ **/
+static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
+					u32 *mykey, u32 *mysalt)
+{
+	struct net_device *dev = xs->xso.dev;
+	unsigned char *key_data;
+	char *alg_name = NULL;
+	const char aes_gcm_name[] = "rfc4106(gcm(aes))";
+	int key_len;
+
+	if (xs->aead) {
+		key_data = &xs->aead->alg_key[0];
+		key_len = xs->aead->alg_key_len;
+		alg_name = xs->aead->alg_name;
+	} else {
+		netdev_err(dev, "Unsupported IPsec algorithm\n");
+		return -EINVAL;
+	}
+
+	if (strcmp(alg_name, aes_gcm_name)) {
+		netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
+			   aes_gcm_name);
+		return -EINVAL;
+	}
+
+	/* The key bytes come down in a bigendian array of bytes, so
+	 * we don't need to do any byteswapping.
+	 * 160 accounts for 16 byte key and 4 byte salt
+	 */
+	if (key_len == 160) {
+		*mysalt = ((u32 *)key_data)[4];
+	} else if (key_len != 128) {
+		netdev_err(dev, "IPsec hw offload only supports keys up to 128 bits with a 32 bit salt\n");
+		return -EINVAL;
+	} else {
+		netdev_info(dev, "IPsec hw offload parameters missing 32 bit salt value\n");
+		*mysalt = 0;
+	}
+	memcpy(mykey, key_data, 16);
+
+	return 0;
+}
+
+/**
+ * ixgbe_ipsec_add_sa - program device with a security association
+ * @xs: pointer to transformer state struct
+ **/
+static int ixgbe_ipsec_add_sa(struct xfrm_state *xs)
+{
+	struct net_device *dev = xs->xso.dev;
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct ixgbe_hw *hw = &adapter->hw;
+	int checked, match, first;
+	u16 sa_idx;
+	int ret;
+	int i;
+
+	if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) {
+		netdev_err(dev, "Unsupported protocol 0x%04x for ipsec offload\n",
+			   xs->id.proto);
+		return -EINVAL;
+	}
+
+	if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
+		struct rx_sa rsa;
+
+		if (xs->calg) {
+			netdev_err(dev, "Compression offload not supported\n");
+			return -EINVAL;
+		}
+
+		/* find the first unused index */
+		ret = ixgbe_ipsec_find_empty_idx(ipsec, true);
+		if (ret < 0) {
+			netdev_err(dev, "No space for SA in Rx table!\n");
+			return ret;
+		}
+		sa_idx = (u16)ret;
+
+		memset(&rsa, 0, sizeof(rsa));
+		rsa.used = true;
+		rsa.xs = xs;
+
+		if (rsa.xs->id.proto & IPPROTO_ESP)
+			rsa.decrypt = xs->ealg || xs->aead;
+
+		/* get the key and salt */
+		ret = ixgbe_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt);
+		if (ret) {
+			netdev_err(dev, "Failed to get key data for Rx SA table\n");
+			return ret;
+		}
+
+		/* get ip for rx sa table */
+		if (xs->xso.flags & XFRM_OFFLOAD_IPV6)
+			memcpy(rsa.ipaddr, &xs->id.daddr.a6, 16);
+		else
+			memcpy(&rsa.ipaddr[3], &xs->id.daddr.a4, 4);
+
+		/* The HW does not have a 1:1 mapping from keys to IP addrs, so
+		 * check for a matching IP addr entry in the table.  If the addr
+		 * already exists, use it; else find an unused slot and add the
+		 * addr.  If one does not exist and there are no unused table
+		 * entries, fail the request.
+		 */
+
+		/* Find an existing match or first not used, and stop looking
+		 * after we've checked all we know we have.
+		 */
+		checked = 0;
+		match = -1;
+		first = -1;
+		for (i = 0;
+		     i < IXGBE_IPSEC_MAX_RX_IP_COUNT &&
+		     (checked < ipsec->num_rx_sa || first < 0);
+		     i++) {
+			if (ipsec->ip_tbl[i].used) {
+				if (!memcmp(ipsec->ip_tbl[i].ipaddr,
+					    rsa.ipaddr, sizeof(rsa.ipaddr))) {
+					match = i;
+					break;
+				}
+				checked++;
+			} else if (first < 0) {
+				first = i;  /* track the first empty seen */
+			}
+		}
+
+		if (ipsec->num_rx_sa == 0)
+			first = 0;
+
+		if (match >= 0) {
+			/* addrs are the same, we should use this one */
+			rsa.iptbl_ind = match;
+			ipsec->ip_tbl[match].ref_cnt++;
+
+		} else if (first >= 0) {
+			/* no matches, but here's an empty slot */
+			rsa.iptbl_ind = first;
+
+			memcpy(ipsec->ip_tbl[first].ipaddr,
+			       rsa.ipaddr, sizeof(rsa.ipaddr));
+			ipsec->ip_tbl[first].ref_cnt = 1;
+			ipsec->ip_tbl[first].used = true;
+
+			ixgbe_ipsec_set_rx_ip(hw, rsa.iptbl_ind, rsa.ipaddr);
+
+		} else {
+			/* no match and no empty slot */
+			netdev_err(dev, "No space for SA in Rx IP SA table\n");
+			memset(&rsa, 0, sizeof(rsa));
+			return -ENOSPC;
+		}
+
+		rsa.mode = IXGBE_RXMOD_VALID;
+		if (rsa.xs->id.proto & IPPROTO_ESP)
+			rsa.mode |= IXGBE_RXMOD_PROTO_ESP;
+		if (rsa.decrypt)
+			rsa.mode |= IXGBE_RXMOD_DECRYPT;
+		if (rsa.xs->xso.flags & XFRM_OFFLOAD_IPV6)
+			rsa.mode |= IXGBE_RXMOD_IPV6;
+
+		/* the preparations worked, so save the info */
+		memcpy(&ipsec->rx_tbl[sa_idx], &rsa, sizeof(rsa));
+
+		ixgbe_ipsec_set_rx_sa(hw, sa_idx, rsa.xs->id.spi, rsa.key,
+				      rsa.salt, rsa.mode, rsa.iptbl_ind);
+		xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_RX_INDEX;
+
+		ipsec->num_rx_sa++;
+
+		/* hash the new entry for faster search in Rx path */
+		hash_add_rcu(ipsec->rx_sa_list, &ipsec->rx_tbl[sa_idx].hlist,
+			     rsa.xs->id.spi);
+	} else {
+		struct tx_sa tsa;
+
+		/* find the first unused index */
+		ret = ixgbe_ipsec_find_empty_idx(ipsec, false);
+		if (ret < 0) {
+			netdev_err(dev, "No space for SA in Tx table\n");
+			return ret;
+		}
+		sa_idx = (u16)ret;
+
+		memset(&tsa, 0, sizeof(tsa));
+		tsa.used = true;
+		tsa.xs = xs;
+
+		if (xs->id.proto & IPPROTO_ESP)
+			tsa.encrypt = xs->ealg || xs->aead;
+
+		ret = ixgbe_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt);
+		if (ret) {
+			netdev_err(dev, "Failed to get key data for Tx SA table\n");
+			memset(&tsa, 0, sizeof(tsa));
+			return ret;
+		}
+
+		/* the preparations worked, so save the info */
+		memcpy(&ipsec->tx_tbl[sa_idx], &tsa, sizeof(tsa));
+
+		ixgbe_ipsec_set_tx_sa(hw, sa_idx, tsa.key, tsa.salt);
+
+		xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_TX_INDEX;
+
+		ipsec->num_tx_sa++;
+	}
+
+	/* enable the engine if not already warmed up */
+	if (!(adapter->flags2 & IXGBE_FLAG2_IPSEC_ENABLED)) {
+		ixgbe_ipsec_start_engine(adapter);
+		adapter->flags2 |= IXGBE_FLAG2_IPSEC_ENABLED;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbe_ipsec_del_sa - clear out this specific SA
+ * @xs: pointer to transformer state struct
+ **/
+static void ixgbe_ipsec_del_sa(struct xfrm_state *xs)
+{
+	struct net_device *dev = xs->xso.dev;
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 zerobuf[4] = {0, 0, 0, 0};
+	u16 sa_idx;
+
+	if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
+		struct rx_sa *rsa;
+		u8 ipi;
+
+		sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX;
+		rsa = &ipsec->rx_tbl[sa_idx];
+
+		if (!rsa->used) {
+			netdev_err(dev, "Invalid Rx SA selected sa_idx=%d offload_handle=%lu\n",
+				   sa_idx, xs->xso.offload_handle);
+			return;
+		}
+
+		ixgbe_ipsec_set_rx_sa(hw, sa_idx, 0, zerobuf, 0, 0, 0);
+		hash_del_rcu(&rsa->hlist);
+
+		/* if the IP table entry is referenced by only this SA,
+		 * i.e. ref_cnt is only 1, clear the IP table entry as well
+		 */
+		ipi = rsa->iptbl_ind;
+		if (ipsec->ip_tbl[ipi].ref_cnt > 0) {
+			ipsec->ip_tbl[ipi].ref_cnt--;
+
+			if (!ipsec->ip_tbl[ipi].ref_cnt) {
+				memset(&ipsec->ip_tbl[ipi], 0,
+				       sizeof(struct rx_ip_sa));
+				ixgbe_ipsec_set_rx_ip(hw, ipi, zerobuf);
+			}
+		}
+
+		memset(rsa, 0, sizeof(struct rx_sa));
+		ipsec->num_rx_sa--;
+	} else {
+		sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+
+		if (!ipsec->tx_tbl[sa_idx].used) {
+			netdev_err(dev, "Invalid Tx SA selected sa_idx=%d offload_handle=%lu\n",
+				   sa_idx, xs->xso.offload_handle);
+			return;
+		}
+
+		ixgbe_ipsec_set_tx_sa(hw, sa_idx, zerobuf, 0);
+		memset(&ipsec->tx_tbl[sa_idx], 0, sizeof(struct tx_sa));
+		ipsec->num_tx_sa--;
+	}
+
+	/* if there are no SAs left, stop the engine to save energy */
+	if (ipsec->num_rx_sa == 0 && ipsec->num_tx_sa == 0) {
+		adapter->flags2 &= ~IXGBE_FLAG2_IPSEC_ENABLED;
+		ixgbe_ipsec_stop_engine(adapter);
+	}
+}
+
+static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
+	.xdo_dev_state_add = ixgbe_ipsec_add_sa,
+	.xdo_dev_state_delete = ixgbe_ipsec_del_sa,
+};
+
+/**
  * ixgbe_init_ipsec_offload - initialize security registers for IPSec operation
  * @adapter: board private structure
  **/
 void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
 {
-	ixgbe_ipsec_clear_hw_tables(adapter);
+	struct ixgbe_ipsec *ipsec;
+	size_t size;
+
+	ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+	if (!ipsec)
+		goto err1;
+	hash_init(ipsec->rx_sa_list);
+
+	size = sizeof(struct rx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+	ipsec->rx_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->rx_tbl)
+		goto err2;
+
+	size = sizeof(struct tx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+	ipsec->tx_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->tx_tbl)
+		goto err2;
+
+	size = sizeof(struct rx_ip_sa) * IXGBE_IPSEC_MAX_RX_IP_COUNT;
+	ipsec->ip_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->ip_tbl)
+		goto err2;
+
+	ipsec->num_rx_sa = 0;
+	ipsec->num_tx_sa = 0;
+
+	adapter->ipsec = ipsec;
 	ixgbe_ipsec_stop_engine(adapter);
+	ixgbe_ipsec_clear_hw_tables(adapter);
+
+	return;
+
+err2:
+	kfree(ipsec->ip_tbl);
+	kfree(ipsec->rx_tbl);
+	kfree(ipsec->tx_tbl);
+err1:
+	kfree(adapter->ipsec);
+	netdev_err(adapter->netdev, "Unable to allocate memory for SA tables");
+}
+
+/**
+ * ixgbe_stop_ipsec_offload - tear down the ipsec offload
+ * @adapter: board private structure
+ **/
+void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+
+	adapter->ipsec = NULL;
+	if (ipsec) {
+		kfree(ipsec->ip_tbl);
+		kfree(ipsec->rx_tbl);
+		kfree(ipsec->tx_tbl);
+		kfree(ipsec);
+	}
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 51fb3cf..2b3da0c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10565,6 +10565,7 @@ static void ixgbe_remove(struct pci_dev *pdev)
 	if (netdev->reg_state == NETREG_REGISTERED)
 		unregister_netdev(netdev);
 
+	ixgbe_stop_ipsec_offload(adapter);
 	ixgbe_clear_interrupt_scheme(adapter);
 
 	ixgbe_release_hw_control(adapter);
-- 
2.7.4

^ permalink raw reply related

* [PATCH v2] hv_netvsc: automatically name slave VF network device
From: Stephen Hemminger @ 2017-12-19 23:59 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger

Rename the VF device to ethX_vf based on the ethX as the
synthetic device.  This eliminates the need for delay on setup,
and the PCI (udev based) naming is not reproducible on Hyper-V
anyway. The name of the VF does not matter since all control
operations take place the primary device. It does make the
user experience better to associate the names.

Based on feedback from all.systems.go talk.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
v2 - also handle case where synthetic device gets renamed
     and the case where rename causes clash with pre-existing device name

 drivers/net/hyperv/hyperv_net.h |  1 -
 drivers/net/hyperv/netvsc_drv.c | 84 +++++++++++++++++++++--------------------
 net/core/dev.c                  |  1 +
 3 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 0db3bd1ea06f..5013189123e6 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -762,7 +762,6 @@ struct net_device_context {
 	/* State to manage the associated VF interface. */
 	struct net_device __rcu *vf_netdev;
 	struct netvsc_vf_pcpu_stats __percpu *vf_stats;
-	struct delayed_work vf_takeover;
 
 	/* 1: allocated, serial number is valid. 0: not allocated */
 	u32 vf_alloc;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index c5584c2d440e..420e11fd3e85 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -49,7 +49,6 @@
 #define RING_SIZE_MIN		64
 
 #define LINKCHANGE_INT (2 * HZ)
-#define VF_TAKEOVER_INT (HZ / 10)
 
 static unsigned int ring_size __ro_after_init = 128;
 module_param(ring_size, uint, S_IRUGO);
@@ -1757,10 +1756,19 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb)
 	return RX_HANDLER_ANOTHER;
 }
 
+/* set the name of VF device based on upper device name */
+static int netvsc_vf_rename(const struct net_device *ndev,
+			    struct net_device *vf_netdev)
+{
+	char vf_name[IFNAMSIZ];
+
+	snprintf(vf_name, IFNAMSIZ, "%s_vf", ndev->name);
+	return dev_change_name(vf_netdev, vf_name);
+}
+
 static int netvsc_vf_join(struct net_device *vf_netdev,
 			  struct net_device *ndev)
 {
-	struct net_device_context *ndev_ctx = netdev_priv(ndev);
 	int ret;
 
 	ret = netdev_rx_handler_register(vf_netdev,
@@ -1783,23 +1791,12 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
 	/* set slave flag before open to prevent IPv6 addrconf */
 	vf_netdev->flags |= IFF_SLAVE;
 
-	schedule_delayed_work(&ndev_ctx->vf_takeover, VF_TAKEOVER_INT);
-
 	call_netdevice_notifiers(NETDEV_JOIN, vf_netdev);
 
-	netdev_info(vf_netdev, "joined to %s\n", ndev->name);
-	return 0;
-
-upper_link_failed:
-	netdev_rx_handler_unregister(vf_netdev);
-rx_handler_failed:
-	return ret;
-}
-
-static void __netvsc_vf_setup(struct net_device *ndev,
-			      struct net_device *vf_netdev)
-{
-	int ret;
+	ret = netvsc_vf_rename(ndev, vf_netdev);
+	if (ret != 0)
+		netdev_warn(vf_netdev,
+			    "can not rename device: (%d)\n", ret);
 
 	/* Align MTU of VF with master */
 	ret = dev_set_mtu(vf_netdev, ndev->mtu);
@@ -1807,34 +1804,21 @@ static void __netvsc_vf_setup(struct net_device *ndev,
 		netdev_warn(vf_netdev,
 			    "unable to change mtu to %u\n", ndev->mtu);
 
+	/* If upper device is already up, bring up slave */
 	if (netif_running(ndev)) {
 		ret = dev_open(vf_netdev);
 		if (ret)
 			netdev_warn(vf_netdev,
 				    "unable to open: %d\n", ret);
 	}
-}
 
-/* Setup VF as slave of the synthetic device.
- * Runs in workqueue to avoid recursion in netlink callbacks.
- */
-static void netvsc_vf_setup(struct work_struct *w)
-{
-	struct net_device_context *ndev_ctx
-		= container_of(w, struct net_device_context, vf_takeover.work);
-	struct net_device *ndev = hv_get_drvdata(ndev_ctx->device_ctx);
-	struct net_device *vf_netdev;
-
-	if (!rtnl_trylock()) {
-		schedule_delayed_work(&ndev_ctx->vf_takeover, 0);
-		return;
-	}
-
-	vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
-	if (vf_netdev)
-		__netvsc_vf_setup(ndev, vf_netdev);
+	netdev_info(vf_netdev, "joined to %s\n", ndev->name);
+	return 0;
 
-	rtnl_unlock();
+ upper_link_failed:
+	netdev_rx_handler_unregister(vf_netdev);
+ rx_handler_failed:
+	return ret;
 }
 
 static int netvsc_register_vf(struct net_device *vf_netdev)
@@ -1894,6 +1878,22 @@ static int netvsc_vf_changed(struct net_device *vf_netdev)
 	return NOTIFY_OK;
 }
 
+/* Synthetic device name changed, rename VF */
+static int netvsc_name_change(struct net_device *ndev)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	struct net_device *vf_netdev = rtnl_dereference(ndev_ctx->vf_netdev);
+	int err;
+
+	if (vf_netdev) {
+		err = netvsc_vf_rename(ndev, vf_netdev);
+
+		return err ? notifier_from_errno(err) : NOTIFY_OK;
+	}
+
+	return NOTIFY_DONE;
+}
+
 static int netvsc_unregister_vf(struct net_device *vf_netdev)
 {
 	struct net_device *ndev;
@@ -1904,7 +1904,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
 		return NOTIFY_DONE;
 
 	net_device_ctx = netdev_priv(ndev);
-	cancel_delayed_work_sync(&net_device_ctx->vf_takeover);
 
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
@@ -1947,7 +1946,6 @@ static int netvsc_probe(struct hv_device *dev,
 
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
-	INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup);
 
 	net_device_ctx->vf_stats
 		= netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats);
@@ -2085,9 +2083,13 @@ static int netvsc_netdev_event(struct notifier_block *this,
 {
 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
 
-	/* Skip our own events */
-	if (event_dev->netdev_ops == &device_ops)
+	/* Events on synthetic device? */
+	if (event_dev->netdev_ops == &device_ops) {
+		if (event == NETDEV_CHANGENAME)
+			return netvsc_name_change(event_dev);
+
 		return NOTIFY_DONE;
+	}
 
 	/* Avoid non-Ethernet type devices */
 	if (event_dev->type != ARPHRD_ETHER)
diff --git a/net/core/dev.c b/net/core/dev.c
index c7db39926769..f51358a90efa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1238,6 +1238,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
 
 	return err;
 }
+EXPORT_SYMBOL(dev_change_name);
 
 /**
  *	dev_set_alias - change ifalias of a device
-- 
2.11.0

^ permalink raw reply related

* Re: [RFC] hv_netvsc: automatically name slave VF network device
From: Jakub Kicinski @ 2017-12-19 23:53 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Samudrala, Sridhar, netdev, Stephen Hemminger
In-Reply-To: <20171219154405.5bb35eff@xeon-e3>

On Tue, 19 Dec 2017 15:44:05 -0800, Stephen Hemminger wrote:
> On Tue, 19 Dec 2017 15:20:57 -0800
> Jakub Kicinski <jakub.kicinski@netronome.com> wrote:
> 
> > On Tue, 19 Dec 2017 14:50:17 -0800, Stephen Hemminger wrote:  
> > > On Tue, 19 Dec 2017 14:44:37 -0800
> > > "Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:
> > >     
> > > >  -static void __netvsc_vf_setup(struct net_device *ndev,      
> > > > > -			      struct net_device *vf_netdev)
> > > > > -{
> > > > > -	int ret;
> > > > > +	/* set the name of VF device based on upper device name */
> > > > > +	snprintf(vf_name, IFNAMSIZ, "%s_vf", ndev->name);
> > > > > +	ret = dev_change_name(vf_netdev, vf_name);
> > > > > +	if (ret != 0)
> > > > > +		netdev_warn(vf_netdev,
> > > > > +			    "can not rename device: (%d)\n", ret);        
> > > > 
> > > > It is possible that upper device name can change after this call.  I 
> > > > noticed this
> > > > when i tried this approach with virtio_net.
> > > > 
> > > > Also, what should happen if the upper device is unloaded? Should we rename
> > > > the VF name?      
> > > 
> > > Yes upper device can change name. So sure, netdevice could trap that
> > > in callback (it already has notifier) and rename VF. Will add that in V2.
> > > 
> > > If upper device is unloaded then it is already decoupled from the VF.
> > > There is no good value to change it back to. The orignal name probably
> > > has been reused by then.    
> > 
> > Both of those issues would be solved by just exposing phys_port_name
> > from the VF driver, and letting systemd do its thing independent of 
> > magic bonds.
> > 
> > Reluctance to do driver work aside :/  
> 
> The port name for Mellanox driver is already set in the driver as a numeric value.
> It indicates which port is used.
> This won't work.

You must be looking at representor ndos.  I don't think MLX NIC drivers
are implementing phys_port_name for normal netdevs at all today.

^ permalink raw reply

* Re: [PATCH net-next] qed*: Utilize FW 8.33.1.0
From: Jakub Kicinski @ 2017-12-19 23:46 UTC (permalink / raw)
  To: Tomer Tayar
  Cc: davem, netdev, linux-rdma, linux-scsi, Ariel Elior,
	Michal Kalderon, Yuval Bason, Ram Amrani, Manish Chopra,
	Chad Dupuis, Manish Rangankar
In-Reply-To: <1513692323-26684-1-git-send-email-Tomer.Tayar@cavium.com>

On Tue, 19 Dec 2017 16:05:23 +0200, Tomer Tayar wrote:
> Sorry for the very long patch.
> The firmware changes are spread all over w/o a good modularity.

Rings false.  Significant portion of this patch is just whitespace 
and comment changes.

^ permalink raw reply

* Re: [RFC] hv_netvsc: automatically name slave VF network device
From: Stephen Hemminger @ 2017-12-19 23:44 UTC (permalink / raw)
  To: Jakub Kicinski; +Cc: Samudrala, Sridhar, netdev, Stephen Hemminger
In-Reply-To: <20171219152057.1e19a0bc@cakuba.netronome.com>

On Tue, 19 Dec 2017 15:20:57 -0800
Jakub Kicinski <jakub.kicinski@netronome.com> wrote:

> On Tue, 19 Dec 2017 14:50:17 -0800, Stephen Hemminger wrote:
> > On Tue, 19 Dec 2017 14:44:37 -0800
> > "Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:
> >   
> > >  -static void __netvsc_vf_setup(struct net_device *ndev,    
> > > > -			      struct net_device *vf_netdev)
> > > > -{
> > > > -	int ret;
> > > > +	/* set the name of VF device based on upper device name */
> > > > +	snprintf(vf_name, IFNAMSIZ, "%s_vf", ndev->name);
> > > > +	ret = dev_change_name(vf_netdev, vf_name);
> > > > +	if (ret != 0)
> > > > +		netdev_warn(vf_netdev,
> > > > +			    "can not rename device: (%d)\n", ret);      
> > > 
> > > It is possible that upper device name can change after this call.  I 
> > > noticed this
> > > when i tried this approach with virtio_net.
> > > 
> > > Also, what should happen if the upper device is unloaded? Should we rename
> > > the VF name?    
> > 
> > Yes upper device can change name. So sure, netdevice could trap that
> > in callback (it already has notifier) and rename VF. Will add that in V2.
> > 
> > If upper device is unloaded then it is already decoupled from the VF.
> > There is no good value to change it back to. The orignal name probably
> > has been reused by then.  
> 
> Both of those issues would be solved by just exposing phys_port_name
> from the VF driver, and letting systemd do its thing independent of 
> magic bonds.
> 
> Reluctance to do driver work aside :/

The port name for Mellanox driver is already set in the driver as a numeric value.
It indicates which port is used.
This won't work.

^ permalink raw reply

* Re: [PATCH 3/3] trace: print address if symbol not found
From: kbuild test robot @ 2017-12-19 23:39 UTC (permalink / raw)
  To: Tobin C. Harding
  Cc: kbuild-all, kernel-hardening, Tobin C. Harding, Steven Rostedt,
	Tycho Andersen, Linus Torvalds, Kees Cook, Andrew Morton,
	Daniel Borkmann, Masahiro Yamada, Alexei Starovoitov,
	linux-kernel, Network Development
In-Reply-To: <1513554812-13014-4-git-send-email-me@tobin.cc>

[-- Attachment #1: Type: text/plain, Size: 1762 bytes --]

Hi Tobin,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.15-rc4 next-20171219]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Tobin-C-Harding/kallsyms-don-t-leak-address/20171220-062707
config: i386-randconfig-s1-201751 (attached as .config)
compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   kernel/trace/trace_events_hist.c: In function 'hist_trigger_stacktrace_print':
>> kernel/trace/trace_events_hist.c:985:3: error: implicit declaration of function 'trace_sprint_symbol_addr' [-Werror=implicit-function-declaration]
      trace_sprint_symbol_addr(str, stacktrace_entries[i]);
      ^~~~~~~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/trace_sprint_symbol_addr +985 kernel/trace/trace_events_hist.c

   971	
   972	static void hist_trigger_stacktrace_print(struct seq_file *m,
   973						  unsigned long *stacktrace_entries,
   974						  unsigned int max_entries)
   975	{
   976		char str[KSYM_SYMBOL_LEN];
   977		unsigned int spaces = 8;
   978		unsigned int i;
   979	
   980		for (i = 0; i < max_entries; i++) {
   981			if (stacktrace_entries[i] == ULONG_MAX)
   982				return;
   983	
   984			seq_printf(m, "%*c", 1 + spaces, ' ');
 > 985			trace_sprint_symbol_addr(str, stacktrace_entries[i]);
   986			seq_printf(m, "%s\n", str);
   987		}
   988	}
   989	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 31282 bytes --]

^ permalink raw reply

* Re: [pull request][for-next 00/11] Mellanox, mlx5 E-Switch updates 2017-12-19
From: Saeed Mahameed @ 2017-12-19 23:36 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner
  Cc: Saeed Mahameed, David S. Miller, Doug Ledford, Linux Netdev List,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA, Leon Romanovsky
In-Reply-To: <20171219224600.GJ6122-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>

On Tue, Dec 19, 2017 at 2:46 PM, Marcelo Ricardo Leitner
<marcelo.leitner-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> On Tue, Dec 19, 2017 at 02:39:38PM -0800, Saeed Mahameed wrote:
>> On Tue, Dec 19, 2017 at 1:54 PM, Marcelo Ricardo Leitner
>> <marcelo.leitner-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>> > On Tue, Dec 19, 2017 at 12:33:29PM -0800, Saeed Mahameed wrote:
>> >> Hi Dave and Doug,
>> >>
>> >> ==============
>> >> This series includes updates for mlx5 E-Switch infrastructures,
>> >> to be merged into net-next and rdma-next trees.
>> >>
>> >> Mark's patches provide E-Switch refactoring that generalize the mlx5
>> >> E-Switch vf representors interfaces and data structures. The serious is
>> >> mainly focused on moving ethernet (netdev) specific representors logic out
>> >> of E-Switch (eswitch.c) into mlx5e representor module (en_rep.c), which
>> >> provides better separation and allows future support for other types of vf
>> >> representors (e.g. RDMA).
>> >>
>> >> Gal's patches at the end of this serious, provide a simple syntax fix and
>> >> two other patches that handles vport ingress/egress ACL steering name
>> >> spaces to be aligned with the Firmware/Hardware specs.
>> >
>> > Patch 10 actually looks quite worrysome if a card would support only
>> > one ingress or egress, but if all of them support both, then it should
>> > be fine yes. Is that possible, to support only one direction?
>> >
>>
>> Hi Marcelo,
>>
>> Patch 10 is a function renaming patch that fixes a function name to
>> correspond with its behavior!
>>
>> Are you asking about all mlx5 cards or all cards in general ?
>> All patches in this series, and specifically patches 10,11 only
>> concern mlx5 eswitch specific implementation and mlx5 hardware spec,
>> they have nothing to do with other vendors cards.
>
> I was asking only bout Mellanox cards.
>
>>
>> All mlx5 cards do support both ingress and egress ACLs per vport (VF),
>> each has its own namespaces (separate pipeline), hence patch #11.
>
> Alright then.
>
>>
>> So i don't see why you worry so much :).
>
> Oh but because currently they are called like:
>
>                 if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
>                         err = init_egress_acl_root_ns(steering);
> ....
>
>                 if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
>                         err = init_ingress_acl_root_ns(steering);
> ....
>
> and it could cause bigger problems if only one kind were supported,
> like trying to setup egress while it's not supported by the card.
> But again, as it seems both are always supported, that's no issue.
>

Ok, now i see your point, and the driver is already ok for that
matter, even for the extreme non existing cases of when one of the
ACLs is not supported,
ACL users need to query for the ACL namespace before using it, e.g:

root_ns = mlx5_get_flow_vport_acl_namespace(dev,
MLX5_FLOW_NAMESPACE_ESW_EGRESS, vport->vport);
if (!root_ns)
           abort;

so if for some reason  MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support) is
false, then any following call to mlx5_get_flow_vport_acl_namespace
will return NULL, which means "not supported" and those cases are
already handled by the mlx5 eswitch driver.

Saeed.

> Thanks,
> Marcelo
>
>>
>> >   Marcelo
>> > --
>> > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
>> > the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
>> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH v3 ipsec-next 0/3] xfrm: offload api fixes
From: Shannon Nelson @ 2017-12-19 23:35 UTC (permalink / raw)
  To: steffen.klassert; +Cc: netdev

These are a couple of little fixes to the xfrm_offload API to make
life just a little easier for the poor driver developer.

Changes from v2:
 - fix up another kbuild robot complaint when CONFIG_XFRM_OFFLOAD is off
 - split out checks into a common function for register and feature check

Changes from v1:
 - removed netdev_err() notes  (Steffen)
 - fixed build when CONFIG_XFRM_OFFLOAD is off (kbuild robot)
 - split into multiple patches (me)


Shannon Nelson (3):
  xfrm: check for xdo_dev_state_free
  xfrm: check for xdo_dev_ops add and delete
  xfrm: wrap xfrmdev_ops with offload config

 include/linux/netdevice.h |  2 +-
 include/net/xfrm.h        |  3 ++-
 net/xfrm/xfrm_device.c    | 32 ++++++++++++++++++--------------
 3 files changed, 21 insertions(+), 16 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH v3 ipsec-next 3/3] xfrm: wrap xfrmdev_ops with offload config
From: Shannon Nelson @ 2017-12-19 23:35 UTC (permalink / raw)
  To: steffen.klassert; +Cc: netdev
In-Reply-To: <1513726549-7065-1-git-send-email-shannon.nelson@oracle.com>

There's no reason to define netdev->xfrmdev_ops if
the offload facility is not CONFIG'd in.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2eaac7d..145d0de 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1697,7 +1697,7 @@ struct net_device {
 	const struct ndisc_ops *ndisc_ops;
 #endif
 
-#ifdef CONFIG_XFRM
+#ifdef CONFIG_XFRM_OFFLOAD
 	const struct xfrmdev_ops *xfrmdev_ops;
 #endif
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 ipsec-next 1/3] xfrm: check for xdo_dev_state_free
From: Shannon Nelson @ 2017-12-19 23:35 UTC (permalink / raw)
  To: steffen.klassert; +Cc: netdev
In-Reply-To: <1513726549-7065-1-git-send-email-shannon.nelson@oracle.com>

The current XFRM code assumes that we've implemented the
xdo_dev_state_free() callback, even if it is meaningless to the driver.
This patch adds a check for it before calling, as done in other APIs,
to prevent a NULL function pointer kernel crash.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 include/net/xfrm.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e015e16..dfabd04 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1891,7 +1891,8 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
 	 struct net_device *dev = xso->dev;
 
 	if (dev && dev->xfrmdev_ops) {
-		dev->xfrmdev_ops->xdo_dev_state_free(x);
+		if (dev->xfrmdev_ops->xdo_dev_state_free)
+			dev->xfrmdev_ops->xdo_dev_state_free(x);
 		xso->dev = NULL;
 		dev_put(dev);
 	}
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 ipsec-next 2/3] xfrm: check for xdo_dev_ops add and delete
From: Shannon Nelson @ 2017-12-19 23:35 UTC (permalink / raw)
  To: steffen.klassert; +Cc: netdev
In-Reply-To: <1513726549-7065-1-git-send-email-shannon.nelson@oracle.com>

This adds a check for the required add and delete functions up front
at registration time to be sure both are defined.

Since both the features check and the registration check are looking
at the same things, break out the check for both to call.

Lastly, for some reason the feature check was setting xfrmdev_ops to
NULL if the NETIF_F_HW_ESP bit was missing, which would probably
surprise the driver later if the driver turned its NETIF_F_HW_ESP bit
back on.  We shouldn't be messing with the driver's callback list, so
we stop doing that with this patch.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
---
 net/xfrm/xfrm_device.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 30e5746..d271de5 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -142,17 +142,31 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok);
 #endif
 
-static int xfrm_dev_register(struct net_device *dev)
+static int xfrm_api_check(struct net_device *dev)
 {
-	if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
-		return NOTIFY_BAD;
+#ifdef CONFIG_XFRM_OFFLOAD
 	if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
 	    !(dev->features & NETIF_F_HW_ESP))
 		return NOTIFY_BAD;
 
+	if ((dev->features & NETIF_F_HW_ESP) &&
+	    (!(dev->xfrmdev_ops &&
+	       dev->xfrmdev_ops->xdo_dev_state_add &&
+	       dev->xfrmdev_ops->xdo_dev_state_delete)))
+		return NOTIFY_BAD;
+#else
+	if (dev->features & (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM))
+		return NOTIFY_BAD;
+#endif
+
 	return NOTIFY_DONE;
 }
 
+static int xfrm_dev_register(struct net_device *dev)
+{
+	return xfrm_api_check(dev);
+}
+
 static int xfrm_dev_unregister(struct net_device *dev)
 {
 	xfrm_policy_cache_flush();
@@ -161,16 +175,7 @@ static int xfrm_dev_unregister(struct net_device *dev)
 
 static int xfrm_dev_feat_change(struct net_device *dev)
 {
-	if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops)
-		return NOTIFY_BAD;
-	else if (!(dev->features & NETIF_F_HW_ESP))
-		dev->xfrmdev_ops = NULL;
-
-	if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) &&
-	    !(dev->features & NETIF_F_HW_ESP))
-		return NOTIFY_BAD;
-
-	return NOTIFY_DONE;
+	return xfrm_api_check(dev);
 }
 
 static int xfrm_dev_down(struct net_device *dev)
-- 
2.7.4

^ permalink raw reply related

* Re: [RFC] hv_netvsc: automatically name slave VF network device
From: Jakub Kicinski @ 2017-12-19 23:20 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Samudrala, Sridhar, netdev, Stephen Hemminger
In-Reply-To: <20171219145017.78b60959@xeon-e3>

On Tue, 19 Dec 2017 14:50:17 -0800, Stephen Hemminger wrote:
> On Tue, 19 Dec 2017 14:44:37 -0800
> "Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:
> 
> >  -static void __netvsc_vf_setup(struct net_device *ndev,  
> > > -			      struct net_device *vf_netdev)
> > > -{
> > > -	int ret;
> > > +	/* set the name of VF device based on upper device name */
> > > +	snprintf(vf_name, IFNAMSIZ, "%s_vf", ndev->name);
> > > +	ret = dev_change_name(vf_netdev, vf_name);
> > > +	if (ret != 0)
> > > +		netdev_warn(vf_netdev,
> > > +			    "can not rename device: (%d)\n", ret);    
> > 
> > It is possible that upper device name can change after this call.  I 
> > noticed this
> > when i tried this approach with virtio_net.
> > 
> > Also, what should happen if the upper device is unloaded? Should we rename
> > the VF name?  
> 
> Yes upper device can change name. So sure, netdevice could trap that
> in callback (it already has notifier) and rename VF. Will add that in V2.
> 
> If upper device is unloaded then it is already decoupled from the VF.
> There is no good value to change it back to. The orignal name probably
> has been reused by then.

Both of those issues would be solved by just exposing phys_port_name
from the VF driver, and letting systemd do its thing independent of 
magic bonds.

Reluctance to do driver work aside :/

^ permalink raw reply

* Re: [PATCH 3/3] trace: print address if symbol not found
From: kbuild test robot @ 2017-12-19 23:19 UTC (permalink / raw)
  To: Tobin C. Harding
  Cc: kbuild-all, kernel-hardening, Tobin C. Harding, Steven Rostedt,
	Tycho Andersen, Linus Torvalds, Kees Cook, Andrew Morton,
	Daniel Borkmann, Masahiro Yamada, Alexei Starovoitov,
	linux-kernel, Network Development
In-Reply-To: <1513554812-13014-4-git-send-email-me@tobin.cc>

[-- Attachment #1: Type: text/plain, Size: 1801 bytes --]

Hi Tobin,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.15-rc4 next-20171219]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Tobin-C-Harding/kallsyms-don-t-leak-address/20171220-062707
config: i386-randconfig-x016-201751 (attached as .config)
compiler: gcc-7 (Debian 7.2.0-12) 7.2.1 20171025
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   kernel/trace/trace_events_hist.c: In function 'hist_trigger_stacktrace_print':
>> kernel/trace/trace_events_hist.c:985:3: error: implicit declaration of function 'trace_sprint_symbol_addr'; did you mean 'trace_sprint_symbol'? [-Werror=implicit-function-declaration]
      trace_sprint_symbol_addr(str, stacktrace_entries[i]);
      ^~~~~~~~~~~~~~~~~~~~~~~~
      trace_sprint_symbol
   cc1: some warnings being treated as errors

vim +985 kernel/trace/trace_events_hist.c

   971	
   972	static void hist_trigger_stacktrace_print(struct seq_file *m,
   973						  unsigned long *stacktrace_entries,
   974						  unsigned int max_entries)
   975	{
   976		char str[KSYM_SYMBOL_LEN];
   977		unsigned int spaces = 8;
   978		unsigned int i;
   979	
   980		for (i = 0; i < max_entries; i++) {
   981			if (stacktrace_entries[i] == ULONG_MAX)
   982				return;
   983	
   984			seq_printf(m, "%*c", 1 + spaces, ' ');
 > 985			trace_sprint_symbol_addr(str, stacktrace_entries[i]);
   986			seq_printf(m, "%s\n", str);
   987		}
   988	}
   989	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 29958 bytes --]

^ permalink raw reply

* Re: [PATCH v2 iproute2 net-next] erspan: add erspan version II support
From: William Tu @ 2017-12-19 23:15 UTC (permalink / raw)
  To: David Ahern; +Cc: Linux Kernel Network Developers
In-Reply-To: <d7283b25-e7a2-76e3-3946-137bc5baf416@gmail.com>

On Tue, Dec 19, 2017 at 2:17 PM, David Ahern <dsahern@gmail.com> wrote:
> On 12/15/17 6:06 PM, William Tu wrote:
>> @@ -343,6 +355,22 @@ get_failed:
>>                               invarg("invalid erspan index\n", *argv);
>>                       if (erspan_idx & ~((1<<20) - 1) || erspan_idx == 0)
>>                               invarg("erspan index must be > 0 and <= 20-bit\n", *argv);
>> +             } else if (strcmp(*argv, "erspan_ver") == 0) {
>> +                     NEXT_ARG();
>> +                     if (get_u8(&erspan_ver, *argv, 0))
>> +                             invarg("invalid erspan version\n", *argv);
>> +                     if (erspan_ver != 1 && erspan_ver != 2)
>> +                             invarg("erspan version must be 1 or 2\n", *argv);
>> +             } else if (strcmp(*argv, "erspan_dir") == 0) {
>> +                     NEXT_ARG();
>> +                     if (get_u8(&erspan_dir, *argv, 0))
>> +                             invarg("invalid erspan direction\n", *argv);
>> +                     if (erspan_dir != 0 && erspan_dir != 1)
>> +                             invarg("erspan direction must be 0(Ingress) or 1(Egress)\n", *argv);
>
> Why not allow ingress and egress as strings and using matches()?
> e.g.,  '... erspan_dir in[gress] ...' or '... erspan_dir e[gress] ...'
>
> Seems much more intuitive than remembering "0" = ingress and "1" = egress.
>
>> @@ -374,8 +402,15 @@ get_failed:
>>               addattr_l(n, 1024, IFLA_GRE_TTL, &ttl, 1);
>>               addattr_l(n, 1024, IFLA_GRE_TOS, &tos, 1);
>>               addattr32(n, 1024, IFLA_GRE_FWMARK, fwmark);
>> -             if (erspan_idx != 0)
>> -                     addattr32(n, 1024, IFLA_GRE_ERSPAN_INDEX, erspan_idx);
>> +             if (erspan_ver) {
>> +                     addattr8(n, 1024, IFLA_GRE_ERSPAN_VER, erspan_ver);
>> +                     if (erspan_ver == 1 && erspan_idx != 0) {
>> +                             addattr32(n, 1024, IFLA_GRE_ERSPAN_INDEX, erspan_idx);
>> +                     } else if (erspan_ver == 2) {
>> +                             addattr8(n, 1024, IFLA_GRE_ERSPAN_DIR, erspan_dir);
>> +                             addattr16(n, 1024, IFLA_GRE_ERSPAN_HWID, erspan_hwid);
>> +                     }
>> +             }
>>       } else {
>>               addattr_l(n, 1024, IFLA_GRE_COLLECT_METADATA, NULL, 0);
>>       }
>> @@ -514,7 +549,25 @@ static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
>>       if (tb[IFLA_GRE_ERSPAN_INDEX]) {
>>               __u32 erspan_idx = rta_getattr_u32(tb[IFLA_GRE_ERSPAN_INDEX]);
>>
>> -             fprintf(f, "erspan_index %u ", erspan_idx);
>> +             print_uint(PRINT_ANY, "erspan_index", "erspan_index %u", erspan_idx);
>> +     }
>> +
>> +     if (tb[IFLA_GRE_ERSPAN_VER]) {
>> +             __u8 erspan_ver = rta_getattr_u8(tb[IFLA_GRE_ERSPAN_VER]);
>> +
>> +             print_uint(PRINT_ANY, "erspan_ver", "erspan_ver %u", erspan_ver);
>> +     }
>> +
>> +     if (tb[IFLA_GRE_ERSPAN_DIR]) {
>> +             __u8 erspan_dir = rta_getattr_u8(tb[IFLA_GRE_ERSPAN_DIR]);
>> +
>> +             print_uint(PRINT_ANY, "erspan_dir", "erspan_dir %u", erspan_dir);
>
> similarly, here can convert the 0/1 to a human string.
>
> Same comments for the changes to link_gre6.c
>
Hi David,
Thanks for the feedback. I will convert to human string and resubmit.
William

^ permalink raw reply

* Re: [net-next: PATCH 0/8] Armada 7k/8k PP2 ACPI support
From: Marcin Wojtas @ 2017-12-19 23:13 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: David Miller, linux-kernel, linux-arm-kernel, netdev,
	Russell King - ARM Linux, Rafael J. Wysocki, Florian Fainelli,
	Antoine Ténart, Thomas Petazzoni, Gregory Clément,
	Ezequiel Garcia, nadavh, Neta Zur Hershkovits, Ard Biesheuvel,
	Grzegorz Jaszczyk, Tomasz Nowicki
In-Reply-To: <20171219204614.GA24156@lunn.ch>

Hi Andrew,

2017-12-19 21:46 GMT+01:00 Andrew Lunn <andrew@lunn.ch>:
>> Of course! v2 will not have such problem, I've been waiting however
>> for the feedback about the ACPI representation. Anyway, I'm strongly
>> leaning towards using _ADR/_CID objects in PHY's nodes for ACPI, so
>> maybe I'll just issue the v2 in order to push the discussion a bit
>> forward.
>
> Hi Marcin
>
> I know ~0 about ACPI. But what seems to be missing for me is
> documentation. You are defining a ABI here, which all future MDIO
> busses, PHYs, and to some extent Ethernet switches need to follow. So
> i would expect this to be documented somewhere.

As the code derives straight from of_mdio.c, there is absolutely no
deviation from Documentation/devicetree/bindings/net/mdio.txt.

>
> How does documentation work in the ACPI world?
>

ACPI has its own specification, the newest one is 6.2
http://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf

It is pretty likely, that if we establish a generic solution for the
MDIO bus, a short section about it may probably require adding to
above - for now I see none.

Best regards,
Marcin

^ permalink raw reply

* Re: [RFC PATCH] virtio_net: Extend virtio to use VF datapath when available
From: Stephen Hemminger @ 2017-12-19 22:53 UTC (permalink / raw)
  To: Samudrala, Sridhar
  Cc: David Miller, mst, netdev, virtualization, alexander.duyck,
	jesse.brandeburg
In-Reply-To: <ddef3208-3cf5-faa2-7572-a561a6b35e2f@intel.com>

On Tue, 19 Dec 2017 14:37:50 -0800
"Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:

> On 12/19/2017 11:46 AM, Stephen Hemminger wrote:
> > On Tue, 19 Dec 2017 11:42:33 -0800
> > "Samudrala, Sridhar" <sridhar.samudrala@intel.com> wrote:
> >  
> >> On 12/19/2017 10:41 AM, Stephen Hemminger wrote:  
> >>> On Tue, 19 Dec 2017 13:21:17 -0500 (EST)
> >>> David Miller <davem@davemloft.net> wrote:
> >>>     
> >>>> From: Stephen Hemminger <stephen@networkplumber.org>
> >>>> Date: Tue, 19 Dec 2017 09:55:48 -0800
> >>>>     
> >>>>> could be 10ms, just enough to let udev do its renaming  
> >>>> Please, move to some kind of notification or event based handling of
> >>>> this problem.
> >>>>
> >>>> No delay is safe, what if userspace gets swapped out or whatever
> >>>> else might make userspace stall unexpectedly?
> >>>>     
> >>> The plan is to remove the delay and do the naming in the kernel.
> >>> This was suggested by Lennart since udev is only doing naming policy
> >>> because kernel names were not repeatable.
> >>>
> >>> This makes the VF show up as "ethN_vf" on Hyper-V which is user friendly.
> >>>
> >>> Patch is pending.  
> >> Do we really need to delay the setup until the name is changed?
> >> Can't we call dev_set_mtu() and dev_open() until dev_change_name() is done?
> >>
> >> Thanks
> >> Sridhar  
> > You can call dev_set_mtu, but when dev_open is done the device name
> > can not be changed by userspace.  
> I did a quick test to remove the delay and also the dev_open() call and 
> i don't see
> any issues with virtio taking over the VF datapath.
> Only the netdev_info() messages may show old device name.
> 
> Any specific scenario where we need to explicitly call  VF's dev_open() 
> in the VF setup process?
> I tried i40evf driver loaded after virtio_net  AND  virtio_net loading 
> after i40evf.
> 
> Thanks
> Sridhar

It happens with hotplug. It is possible on Hyper-V to hotplug SR-IOV on
and off while guest is running. If SR-IOV is disabled in host then the
VF device is removed (hotplug) and the inverse. If the master device is
up then the VF device should be brought up by the master device.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox