Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-next 02/13] ixgbevf: VF2VF TCP RSS
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem
  Cc: Sebastian Basierski, netdev, nhorman, sassmann, jogreene,
	Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Sebastian Basierski <sebastianx.basierski@intel.com>

While VF2VF with RSS communication, RSS Type were wrongly recognized
and RSS hash was not calculated as it should be. Packets was
distributed on various queues by accident.
This commit fixes that behaviour and causes proper RSS Type recognition.

Signed-off-by: Sebastian Basierski <sebastianx.basierski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index d86446d202d5..15deac07fd92 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -3849,6 +3849,10 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
 		skb_checksum_help(skb);
 		goto no_csum;
 	}
+
+	if (first->protocol == htons(ETH_P_IP))
+		type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
+
 	/* update TX checksum flag */
 	first->tx_flags |= IXGBE_TX_FLAGS_CSUM;
 	vlan_macip_lens = skb_checksum_start_offset(skb) -
-- 
2.17.1

^ permalink raw reply related

* [net-next 01/13] ixgbe: firmware recovery mode
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem
  Cc: Sebastian Basierski, netdev, nhorman, sassmann, jogreene,
	Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Sebastian Basierski <sebastianx.basierski@intel.com>

Add check for FW NVM recovery mode during driver initialization and
service task. If in recovery mode, log message and unregister device

Signed-off-by: Sebastian Basierski <sebastianx.basierski@intel.com>
Tested-by: Don Buchholz <donald.buchholz@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbe/ixgbe_common.c   | 11 +++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 41 +++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h |  4 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 15 +++++++
 4 files changed, 71 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 0bd1294ba517..970f71d5da04 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3484,6 +3484,17 @@ void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf)
 	IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof);
 }
 
+/**
+ * ixgbe_fw_recovery_mode - Check if in FW NVM recovery mode
+ * @hw: pointer to hardware structure
+ */
+bool ixgbe_fw_recovery_mode(struct ixgbe_hw *hw)
+{
+	if (hw->mac.ops.fw_recovery_mode)
+		return hw->mac.ops.fw_recovery_mode(hw);
+	return false;
+}
+
 /**
  *  ixgbe_get_device_caps_generic - Get additional device capabilities
  *  @hw: pointer to hardware structure
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 9a23d33a47ed..604282f03d23 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7774,6 +7774,33 @@ static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter)
 	rtnl_unlock();
 }
 
+/**
+ * ixgbe_check_fw_error - Check firmware for errors
+ * @adapter: the adapter private structure
+ *
+ * Check firmware errors in register FWSM
+ */
+static bool ixgbe_check_fw_error(struct ixgbe_adapter *adapter)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 fwsm;
+
+	/* read fwsm.ext_err_ind register and log errors */
+	fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
+
+	if (fwsm & IXGBE_FWSM_EXT_ERR_IND_MASK ||
+	    !(fwsm & IXGBE_FWSM_FW_VAL_BIT))
+		e_dev_warn("Warning firmware error detected FWSM: 0x%08X\n",
+			   fwsm);
+
+	if (hw->mac.ops.fw_recovery_mode && hw->mac.ops.fw_recovery_mode(hw)) {
+		e_dev_err("Firmware recovery mode detected. Limiting functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
+		return true;
+	}
+
+	return false;
+}
+
 /**
  * ixgbe_service_task - manages and runs subtasks
  * @work: pointer to work_struct containing our data
@@ -7792,6 +7819,15 @@ static void ixgbe_service_task(struct work_struct *work)
 		ixgbe_service_event_complete(adapter);
 		return;
 	}
+	if (ixgbe_check_fw_error(adapter)) {
+		if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
+			rtnl_lock();
+			unregister_netdev(adapter->netdev);
+			rtnl_unlock();
+		}
+		ixgbe_service_event_complete(adapter);
+		return;
+	}
 	if (adapter->flags2 & IXGBE_FLAG2_UDP_TUN_REREG_NEEDED) {
 		rtnl_lock();
 		adapter->flags2 &= ~IXGBE_FLAG2_UDP_TUN_REREG_NEEDED;
@@ -10716,6 +10752,11 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
 		netdev->features |= NETIF_F_LRO;
 
+	if (ixgbe_check_fw_error(adapter)) {
+		err = -EIO;
+		goto err_sw_init;
+	}
+
 	/* make sure the EEPROM is good */
 	if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) {
 		e_dev_err("The EEPROM Checksum Is Not Valid\n");
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 41bcbb337e83..84f2dba39e36 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -924,6 +924,9 @@ struct ixgbe_nvm_version {
 /* Firmware Semaphore Register */
 #define IXGBE_FWSM_MODE_MASK	0xE
 #define IXGBE_FWSM_FW_MODE_PT	0x4
+#define IXGBE_FWSM_FW_NVM_RECOVERY_MODE	BIT(5)
+#define IXGBE_FWSM_EXT_ERR_IND_MASK	0x01F80000
+#define IXGBE_FWSM_FW_VAL_BIT	BIT(15)
 
 /* ARC Subsystem registers */
 #define IXGBE_HICR      0x15F00
@@ -3461,6 +3464,7 @@ struct ixgbe_mac_operations {
 			      const char *);
 	s32 (*get_thermal_sensor_data)(struct ixgbe_hw *);
 	s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw);
+	bool (*fw_recovery_mode)(struct ixgbe_hw *hw);
 	void (*disable_rx)(struct ixgbe_hw *hw);
 	void (*enable_rx)(struct ixgbe_hw *hw);
 	void (*set_source_address_pruning)(struct ixgbe_hw *, bool,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index a8148c7126e5..10dbaf4f6e80 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -1247,6 +1247,20 @@ static s32 ixgbe_get_bus_info_X550em(struct ixgbe_hw *hw)
 	return 0;
 }
 
+/**
+ * ixgbe_fw_recovery_mode - Check FW NVM recovery mode
+ * @hw: pointer t hardware structure
+ *
+ * Returns true if in FW NVM recovery mode.
+ */
+static bool ixgbe_fw_recovery_mode_X550(struct ixgbe_hw *hw)
+{
+	u32 fwsm;
+
+	fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
+	return !!(fwsm & IXGBE_FWSM_FW_NVM_RECOVERY_MODE);
+}
+
 /** ixgbe_disable_rx_x550 - Disable RX unit
  *
  *  Enables the Rx DMA unit for x550
@@ -3816,6 +3830,7 @@ static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr,
 	.enable_rx_buff			= &ixgbe_enable_rx_buff_generic, \
 	.get_thermal_sensor_data	= NULL, \
 	.init_thermal_sensor_thresh	= NULL, \
+	.fw_recovery_mode		= &ixgbe_fw_recovery_mode_X550, \
 	.enable_rx			= &ixgbe_enable_rx_generic, \
 	.disable_rx			= &ixgbe_disable_rx_x550, \
 
-- 
2.17.1

^ permalink raw reply related

* [net-next 03/13] ixgbe: don't clear IPsec sa counters on HW clearing
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

The software SA record counters should not be cleared when clearing
the hardware tables.  This causes the counters to be out of sync
after a driver reset.

Fixes: 63a67fe229ea ("ixgbe: add ipsec offload add and remove SA")
Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index da4322e4daed..e515246d0bce 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -113,7 +113,6 @@ static void ixgbe_ipsec_set_rx_ip(struct ixgbe_hw *hw, u16 idx, __be32 addr[])
  **/
 static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
 {
-	struct ixgbe_ipsec *ipsec = adapter->ipsec;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 buf[4] = {0, 0, 0, 0};
 	u16 idx;
@@ -132,9 +131,6 @@ static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
 		ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
 		ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
 	}
-
-	ipsec->num_rx_sa = 0;
-	ipsec->num_tx_sa = 0;
 }
 
 /**
-- 
2.17.1

^ permalink raw reply related

* [net-next 05/13] ixgbe: prep IPsec constants for later use
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

Pull out a couple of values from a function so they can be used
later elsewhere.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 434065109b8d..3afb1fe766cd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -5,6 +5,9 @@
 #include <net/xfrm.h>
 #include <crypto/aead.h>
 
+#define IXGBE_IPSEC_KEY_BITS  160
+static const char aes_gcm_name[] = "rfc4106(gcm(aes))";
+
 /**
  * ixgbe_ipsec_set_tx_sa - set the Tx SA registers
  * @hw: hw specific details
@@ -407,7 +410,6 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
 	struct net_device *dev = xs->xso.dev;
 	unsigned char *key_data;
 	char *alg_name = NULL;
-	const char aes_gcm_name[] = "rfc4106(gcm(aes))";
 	int key_len;
 
 	if (!xs->aead) {
@@ -435,9 +437,9 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
 	 * we don't need to do any byteswapping.
 	 * 160 accounts for 16 byte key and 4 byte salt
 	 */
-	if (key_len == 160) {
+	if (key_len == IXGBE_IPSEC_KEY_BITS) {
 		*mysalt = ((u32 *)key_data)[4];
-	} else if (key_len != 128) {
+	} else if (key_len != (IXGBE_IPSEC_KEY_BITS - (sizeof(*mysalt) * 8))) {
 		netdev_err(dev, "IPsec hw offload only supports keys up to 128 bits with a 32 bit salt\n");
 		return -EINVAL;
 	} else {
-- 
2.17.1

^ permalink raw reply related

* [net-next 06/13] ixgbe: add VF IPsec management
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

Add functions to translate VF IPsec offload add and delete requests
into something the existing code can work with.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ixgbe/ixgbe_ipsec.c    | 256 +++++++++++++++++-
 .../net/ethernet/intel/ixgbe/ixgbe_ipsec.h    |  13 +
 2 files changed, 260 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 3afb1fe766cd..80108e12ab86 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -8,6 +8,8 @@
 #define IXGBE_IPSEC_KEY_BITS  160
 static const char aes_gcm_name[] = "rfc4106(gcm(aes))";
 
+static void ixgbe_ipsec_del_sa(struct xfrm_state *xs);
+
 /**
  * ixgbe_ipsec_set_tx_sa - set the Tx SA registers
  * @hw: hw specific details
@@ -289,6 +291,13 @@ static void ixgbe_ipsec_start_engine(struct ixgbe_adapter *adapter)
 /**
  * ixgbe_ipsec_restore - restore the ipsec HW settings after a reset
  * @adapter: board private structure
+ *
+ * Reload the HW tables from the SW tables after they've been bashed
+ * by a chip reset.
+ *
+ * Any VF entries are removed from the SW and HW tables since either
+ * (a) the VF also gets reset on PF reset and will ask again for the
+ * offloads, or (b) the VF has been removed by a change in the num_vfs.
  **/
 void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)
 {
@@ -306,16 +315,24 @@ void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)
 
 	/* reload the Rx and Tx keys */
 	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
-		struct rx_sa *rsa = &ipsec->rx_tbl[i];
-		struct tx_sa *tsa = &ipsec->tx_tbl[i];
-
-		if (rsa->used)
-			ixgbe_ipsec_set_rx_sa(hw, i, rsa->xs->id.spi,
-					      rsa->key, rsa->salt,
-					      rsa->mode, rsa->iptbl_ind);
+		struct rx_sa *r = &ipsec->rx_tbl[i];
+		struct tx_sa *t = &ipsec->tx_tbl[i];
+
+		if (r->used) {
+			if (r->mode & IXGBE_RXTXMOD_VF)
+				ixgbe_ipsec_del_sa(r->xs);
+			else
+				ixgbe_ipsec_set_rx_sa(hw, i, r->xs->id.spi,
+						      r->key, r->salt,
+						      r->mode, r->iptbl_ind);
+		}
 
-		if (tsa->used)
-			ixgbe_ipsec_set_tx_sa(hw, i, tsa->key, tsa->salt);
+		if (t->used) {
+			if (t->mode & IXGBE_RXTXMOD_VF)
+				ixgbe_ipsec_del_sa(t->xs);
+			else
+				ixgbe_ipsec_set_tx_sa(hw, i, t->key, t->salt);
+		}
 	}
 
 	/* reload the IP addrs */
@@ -381,6 +398,8 @@ static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec,
 	rcu_read_lock();
 	hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist,
 				   (__force u32)spi) {
+		if (rsa->mode & IXGBE_RXTXMOD_VF)
+			continue;
 		if (spi == rsa->xs->id.spi &&
 		    ((ip4 && *daddr == rsa->xs->id.daddr.a4) ||
 		      (!ip4 && !memcmp(daddr, &rsa->xs->id.daddr.a6,
@@ -808,6 +827,225 @@ static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
 	.xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
 };
 
+/**
+ * ixgbe_ipsec_vf_clear - clear the tables of data for a VF
+ * @adapter: board private structure
+ * @vf: VF id to be removed
+ **/
+void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	int i;
+
+	/* search rx sa table */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT && ipsec->num_rx_sa; i++) {
+		if (!ipsec->rx_tbl[i].used)
+			continue;
+		if (ipsec->rx_tbl[i].mode & IXGBE_RXTXMOD_VF &&
+		    ipsec->rx_tbl[i].vf == vf)
+			ixgbe_ipsec_del_sa(ipsec->rx_tbl[i].xs);
+	}
+
+	/* search tx sa table */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT && ipsec->num_tx_sa; i++) {
+		if (!ipsec->tx_tbl[i].used)
+			continue;
+		if (ipsec->tx_tbl[i].mode & IXGBE_RXTXMOD_VF &&
+		    ipsec->tx_tbl[i].vf == vf)
+			ixgbe_ipsec_del_sa(ipsec->tx_tbl[i].xs);
+	}
+}
+
+/**
+ * ixgbe_ipsec_vf_add_sa - translate VF request to SA add
+ * @adapter: board private structure
+ * @msgbuf: The message buffer
+ * @vf: the VF index
+ *
+ * Make up a new xs and algorithm info from the data sent by the VF.
+ * We only need to sketch in just enough to set up the HW offload.
+ * Put the resulting offload_handle into the return message to the VF.
+ *
+ * Returns 0 or error value
+ **/
+int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_algo_desc *algo;
+	struct sa_mbx_msg *sam;
+	struct xfrm_state *xs;
+	size_t aead_len;
+	u16 sa_idx;
+	u32 pfsa;
+	int err;
+
+	sam = (struct sa_mbx_msg *)(&msgbuf[1]);
+	if (!adapter->vfinfo[vf].trusted) {
+		e_warn(drv, "VF %d attempted to add an IPsec SA\n", vf);
+		err = -EACCES;
+		goto err_out;
+	}
+
+	/* Tx IPsec offload doesn't seem to work on this
+	 * device, so block these requests for now.
+	 */
+	if (!(sam->flags & XFRM_OFFLOAD_INBOUND)) {
+		err = -ENXIO;
+		goto err_out;
+	}
+
+	xs = kzalloc(sizeof(*xs), GFP_KERNEL);
+	if (unlikely(!xs)) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	xs->xso.flags = sam->flags;
+	xs->id.spi = sam->spi;
+	xs->id.proto = sam->proto;
+	xs->props.family = sam->family;
+	if (xs->props.family == AF_INET6)
+		memcpy(&xs->id.daddr.a6, sam->addr, sizeof(xs->id.daddr.a6));
+	else
+		memcpy(&xs->id.daddr.a4, sam->addr, sizeof(xs->id.daddr.a4));
+	xs->xso.dev = adapter->netdev;
+
+	algo = xfrm_aead_get_byname(aes_gcm_name, IXGBE_IPSEC_AUTH_BITS, 1);
+	if (unlikely(!algo)) {
+		err = -ENOENT;
+		goto err_xs;
+	}
+
+	aead_len = sizeof(*xs->aead) + IXGBE_IPSEC_KEY_BITS / 8;
+	xs->aead = kzalloc(aead_len, GFP_KERNEL);
+	if (unlikely(!xs->aead)) {
+		err = -ENOMEM;
+		goto err_xs;
+	}
+
+	xs->props.ealgo = algo->desc.sadb_alg_id;
+	xs->geniv = algo->uinfo.aead.geniv;
+	xs->aead->alg_icv_len = IXGBE_IPSEC_AUTH_BITS;
+	xs->aead->alg_key_len = IXGBE_IPSEC_KEY_BITS;
+	memcpy(xs->aead->alg_key, sam->key, sizeof(sam->key));
+	memcpy(xs->aead->alg_name, aes_gcm_name, sizeof(aes_gcm_name));
+
+	/* set up the HW offload */
+	err = ixgbe_ipsec_add_sa(xs);
+	if (err)
+		goto err_aead;
+
+	pfsa = xs->xso.offload_handle;
+	if (pfsa < IXGBE_IPSEC_BASE_TX_INDEX) {
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_RX_INDEX;
+		ipsec->rx_tbl[sa_idx].vf = vf;
+		ipsec->rx_tbl[sa_idx].mode |= IXGBE_RXTXMOD_VF;
+	} else {
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_TX_INDEX;
+		ipsec->tx_tbl[sa_idx].vf = vf;
+		ipsec->tx_tbl[sa_idx].mode |= IXGBE_RXTXMOD_VF;
+	}
+
+	msgbuf[1] = xs->xso.offload_handle;
+
+	return 0;
+
+err_aead:
+	memset(xs->aead, 0, sizeof(*xs->aead));
+	kfree(xs->aead);
+err_xs:
+	memset(xs, 0, sizeof(*xs));
+	kfree(xs);
+err_out:
+	msgbuf[1] = err;
+	return err;
+}
+
+/**
+ * ixgbe_ipsec_vf_del_sa - translate VF request to SA delete
+ * @adapter: board private structure
+ * @msgbuf: The message buffer
+ * @vf: the VF index
+ *
+ * Given the offload_handle sent by the VF, look for the related SA table
+ * entry and use its xs field to call for a delete of the SA.
+ *
+ * Note: We silently ignore requests to delete entries that are already
+ *       set to unused because when a VF is set to "DOWN", the PF first
+ *       gets a reset and clears all the VF's entries; then the VF's
+ *       XFRM stack sends individual deletes for each entry, which the
+ *       reset already removed.  In the future it might be good to try to
+ *       optimize this so not so many unnecessary delete messages are sent.
+ *
+ * Returns 0 or error value
+ **/
+int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+{
+	struct ixgbe_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_state *xs;
+	u32 pfsa = msgbuf[1];
+	u16 sa_idx;
+
+	if (!adapter->vfinfo[vf].trusted) {
+		e_err(drv, "vf %d attempted to delete an SA\n", vf);
+		return -EPERM;
+	}
+
+	if (pfsa < IXGBE_IPSEC_BASE_TX_INDEX) {
+		struct rx_sa *rsa;
+
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_RX_INDEX;
+		if (sa_idx >= IXGBE_IPSEC_MAX_SA_COUNT) {
+			e_err(drv, "vf %d SA index %d out of range\n",
+			      vf, sa_idx);
+			return -EINVAL;
+		}
+
+		rsa = &ipsec->rx_tbl[sa_idx];
+
+		if (!rsa->used)
+			return 0;
+
+		if (!(rsa->mode & IXGBE_RXTXMOD_VF) ||
+		    rsa->vf != vf) {
+			e_err(drv, "vf %d bad Rx SA index %d\n", vf, sa_idx);
+			return -ENOENT;
+		}
+
+		xs = ipsec->rx_tbl[sa_idx].xs;
+	} else {
+		struct tx_sa *tsa;
+
+		sa_idx = pfsa - IXGBE_IPSEC_BASE_TX_INDEX;
+		if (sa_idx >= IXGBE_IPSEC_MAX_SA_COUNT) {
+			e_err(drv, "vf %d SA index %d out of range\n",
+			      vf, sa_idx);
+			return -EINVAL;
+		}
+
+		tsa = &ipsec->tx_tbl[sa_idx];
+
+		if (!tsa->used)
+			return 0;
+
+		if (!(tsa->mode & IXGBE_RXTXMOD_VF) ||
+		    tsa->vf != vf) {
+			e_err(drv, "vf %d bad Tx SA index %d\n", vf, sa_idx);
+			return -ENOENT;
+		}
+
+		xs = ipsec->tx_tbl[sa_idx].xs;
+	}
+
+	ixgbe_ipsec_del_sa(xs);
+
+	/* remove the xs that was made-up in the add request */
+	memset(xs, 0, sizeof(*xs));
+	kfree(xs);
+
+	return 0;
+}
+
 /**
  * ixgbe_ipsec_tx - setup Tx flags for ipsec offload
  * @tx_ring: outgoing context
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
index 9ef7faadda69..d2b64ff8eb4e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -26,6 +26,7 @@ enum ixgbe_ipsec_tbl_sel {
 #define IXGBE_RXMOD_PROTO_ESP		0x00000004
 #define IXGBE_RXMOD_DECRYPT		0x00000008
 #define IXGBE_RXMOD_IPV6		0x00000010
+#define IXGBE_RXTXMOD_VF		0x00000020
 
 struct rx_sa {
 	struct hlist_node hlist;
@@ -37,6 +38,7 @@ struct rx_sa {
 	u8  iptbl_ind;
 	bool used;
 	bool decrypt;
+	u32 vf;
 };
 
 struct rx_ip_sa {
@@ -49,8 +51,10 @@ struct tx_sa {
 	struct xfrm_state *xs;
 	u32 key[4];
 	u32 salt;
+	u32 mode;
 	bool encrypt;
 	bool used;
+	u32 vf;
 };
 
 struct ixgbe_ipsec_tx_data {
@@ -67,4 +71,13 @@ struct ixgbe_ipsec {
 	struct tx_sa *tx_tbl;
 	DECLARE_HASHTABLE(rx_sa_list, 10);
 };
+
+struct sa_mbx_msg {
+	__be32 spi;
+	u8 flags;
+	u8 proto;
+	u16 family;
+	__be32 addr[4];
+	u32 key[5];
+};
 #endif /* _IXGBE_IPSEC_H_ */
-- 
2.17.1

^ permalink raw reply related

* [net-next 03/13] ixgbe: don't clear IPsec sa counters on HW clearing
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

The software SA record counters should not be cleared when clearing
the hardware tables.  This causes the counters to be out of sync
after a driver reset.

Fixes: 63a67fe229ea ("ixgbe: add ipsec offload add and remove SA")
Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index da4322e4daed..e515246d0bce 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -113,7 +113,6 @@ static void ixgbe_ipsec_set_rx_ip(struct ixgbe_hw *hw, u16 idx, __be32 addr[])
  **/
 static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
 {
-	struct ixgbe_ipsec *ipsec = adapter->ipsec;
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 buf[4] = {0, 0, 0, 0};
 	u16 idx;
@@ -132,9 +131,6 @@ static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
 		ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
 		ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
 	}
-
-	ipsec->num_rx_sa = 0;
-	ipsec->num_tx_sa = 0;
 }
 
 /**
-- 
2.17.1

^ permalink raw reply related

* [net-next 11/13] ixgbevf: enable VF IPsec offload operations
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

Add the IPsec initialization into the driver startup and
add the Rx and Tx processing hooks.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/defines.h  |  2 +-
 drivers/net/ethernet/intel/ixgbevf/ethtool.c  |  2 +
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h  | 25 +++++++
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 74 ++++++++++++++-----
 drivers/net/ethernet/intel/ixgbevf/vf.c       |  4 +
 5 files changed, 86 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h
index dd9cd4541d7a..6bace746eaac 100644
--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
@@ -234,7 +234,7 @@ union ixgbe_adv_rx_desc {
 /* Context descriptors */
 struct ixgbe_adv_tx_context_desc {
 	__le32 vlan_macip_lens;
-	__le32 seqnum_seed;
+	__le32 fceof_saidx;
 	__le32 type_tucmd_mlhl;
 	__le32 mss_l4len_idx;
 };
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 631c91046f39..5399787e07af 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -55,6 +55,8 @@ static struct ixgbe_stats ixgbevf_gstrings_stats[] = {
 	IXGBEVF_STAT("alloc_rx_page", alloc_rx_page),
 	IXGBEVF_STAT("alloc_rx_page_failed", alloc_rx_page_failed),
 	IXGBEVF_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed),
+	IXGBEVF_STAT("tx_ipsec", tx_ipsec),
+	IXGBEVF_STAT("rx_ipsec", rx_ipsec),
 };
 
 #define IXGBEVF_QUEUE_STATS_LEN ( \
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 172637e2f2e6..e399e1c0c54a 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -459,6 +459,31 @@ int ethtool_ioctl(struct ifreq *ifr);
 
 extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector);
 
+#ifdef CONFIG_XFRM_OFFLOAD
+void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter);
+void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter);
+void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter);
+void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
+		      union ixgbe_adv_rx_desc *rx_desc,
+		      struct sk_buff *skb);
+int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
+		     struct ixgbevf_tx_buffer *first,
+		     struct ixgbevf_ipsec_tx_data *itd);
+#else
+static inline void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter)
+{ }
+static inline void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter)
+{ }
+static inline void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter) { }
+static inline void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
+				    union ixgbe_adv_rx_desc *rx_desc,
+				    struct sk_buff *skb) { }
+static inline int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
+				   struct ixgbevf_tx_buffer *first,
+				   struct ixgbevf_ipsec_tx_data *itd)
+{ return 0; }
+#endif /* CONFIG_XFRM_OFFLOAD */
+
 void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter);
 void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter);
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 15deac07fd92..17e23f609d74 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -40,7 +40,7 @@ static const char ixgbevf_driver_string[] =
 #define DRV_VERSION "4.1.0-k"
 const char ixgbevf_driver_version[] = DRV_VERSION;
 static char ixgbevf_copyright[] =
-	"Copyright (c) 2009 - 2015 Intel Corporation.";
+	"Copyright (c) 2009 - 2018 Intel Corporation.";
 
 static const struct ixgbevf_info *ixgbevf_info_tbl[] = {
 	[board_82599_vf]	= &ixgbevf_82599_vf_info,
@@ -268,7 +268,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 	struct ixgbevf_adapter *adapter = q_vector->adapter;
 	struct ixgbevf_tx_buffer *tx_buffer;
 	union ixgbe_adv_tx_desc *tx_desc;
-	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0;
 	unsigned int budget = tx_ring->count / 2;
 	unsigned int i = tx_ring->next_to_clean;
 
@@ -299,6 +299,8 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 		/* update the statistics for this packet */
 		total_bytes += tx_buffer->bytecount;
 		total_packets += tx_buffer->gso_segs;
+		if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC)
+			total_ipsec++;
 
 		/* free the skb */
 		if (ring_is_xdp(tx_ring))
@@ -361,6 +363,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
 	u64_stats_update_end(&tx_ring->syncp);
 	q_vector->tx.total_bytes += total_bytes;
 	q_vector->tx.total_packets += total_packets;
+	adapter->tx_ipsec += total_ipsec;
 
 	if (check_for_tx_hang(tx_ring) && ixgbevf_check_tx_hang(tx_ring)) {
 		struct ixgbe_hw *hw = &adapter->hw;
@@ -516,6 +519,9 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring,
 			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
 	}
 
+	if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
+		ixgbevf_ipsec_rx(rx_ring, rx_desc, skb);
+
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
@@ -1012,7 +1018,7 @@ static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring,
 		context_desc = IXGBEVF_TX_CTXTDESC(ring, 0);
 		context_desc->vlan_macip_lens	=
 			cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT);
-		context_desc->seqnum_seed	= 0;
+		context_desc->fceof_saidx	= 0;
 		context_desc->type_tucmd_mlhl	=
 			cpu_to_le32(IXGBE_TXD_CMD_DEXT |
 				    IXGBE_ADVTXD_DTYP_CTXT);
@@ -2200,6 +2206,7 @@ static void ixgbevf_configure(struct ixgbevf_adapter *adapter)
 	ixgbevf_set_rx_mode(adapter->netdev);
 
 	ixgbevf_restore_vlan(adapter);
+	ixgbevf_ipsec_restore(adapter);
 
 	ixgbevf_configure_tx(adapter);
 	ixgbevf_configure_rx(adapter);
@@ -2246,7 +2253,8 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter)
 static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	int api[] = { ixgbe_mbox_api_13,
+	int api[] = { ixgbe_mbox_api_14,
+		      ixgbe_mbox_api_13,
 		      ixgbe_mbox_api_12,
 		      ixgbe_mbox_api_11,
 		      ixgbe_mbox_api_10,
@@ -2605,6 +2613,7 @@ static void ixgbevf_set_num_queues(struct ixgbevf_adapter *adapter)
 		case ixgbe_mbox_api_11:
 		case ixgbe_mbox_api_12:
 		case ixgbe_mbox_api_13:
+		case ixgbe_mbox_api_14:
 			if (adapter->xdp_prog &&
 			    hw->mac.max_tx_queues == rss)
 				rss = rss > 3 ? 2 : 1;
@@ -3700,8 +3709,8 @@ static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter)
 }
 
 static void ixgbevf_tx_ctxtdesc(struct ixgbevf_ring *tx_ring,
-				u32 vlan_macip_lens, u32 type_tucmd,
-				u32 mss_l4len_idx)
+				u32 vlan_macip_lens, u32 fceof_saidx,
+				u32 type_tucmd, u32 mss_l4len_idx)
 {
 	struct ixgbe_adv_tx_context_desc *context_desc;
 	u16 i = tx_ring->next_to_use;
@@ -3715,14 +3724,15 @@ static void ixgbevf_tx_ctxtdesc(struct ixgbevf_ring *tx_ring,
 	type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
 
 	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
-	context_desc->seqnum_seed	= 0;
+	context_desc->fceof_saidx	= cpu_to_le32(fceof_saidx);
 	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
 	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
 }
 
 static int ixgbevf_tso(struct ixgbevf_ring *tx_ring,
 		       struct ixgbevf_tx_buffer *first,
-		       u8 *hdr_len)
+		       u8 *hdr_len,
+		       struct ixgbevf_ipsec_tx_data *itd)
 {
 	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
 	struct sk_buff *skb = first->skb;
@@ -3736,6 +3746,7 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring,
 		unsigned char *hdr;
 	} l4;
 	u32 paylen, l4_offset;
+	u32 fceof_saidx = 0;
 	int err;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -3761,13 +3772,15 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring,
 	if (ip.v4->version == 4) {
 		unsigned char *csum_start = skb_checksum_start(skb);
 		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+		int len = csum_start - trans_start;
 
 		/* IP header will have to cancel out any data that
-		 * is not a part of the outer IP header
+		 * is not a part of the outer IP header, so set to
+		 * a reverse csum if needed, else init check to 0.
 		 */
-		ip.v4->check = csum_fold(csum_partial(trans_start,
-						      csum_start - trans_start,
-						      0));
+		ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ?
+					   csum_fold(csum_partial(trans_start,
+								  len, 0)) : 0;
 		type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
 
 		ip.v4->tot_len = 0;
@@ -3799,13 +3812,16 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring,
 	mss_l4len_idx |= skb_shinfo(skb)->gso_size << IXGBE_ADVTXD_MSS_SHIFT;
 	mss_l4len_idx |= (1u << IXGBE_ADVTXD_IDX_SHIFT);
 
+	fceof_saidx |= itd->pfsa;
+	type_tucmd |= itd->flags | itd->trailer_len;
+
 	/* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */
 	vlan_macip_lens = l4.hdr - ip.hdr;
 	vlan_macip_lens |= (ip.hdr - skb->data) << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
-	ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens,
-			    type_tucmd, mss_l4len_idx);
+	ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd,
+			    mss_l4len_idx);
 
 	return 1;
 }
@@ -3820,10 +3836,12 @@ static inline bool ixgbevf_ipv6_csum_is_sctp(struct sk_buff *skb)
 }
 
 static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
-			    struct ixgbevf_tx_buffer *first)
+			    struct ixgbevf_tx_buffer *first,
+			    struct ixgbevf_ipsec_tx_data *itd)
 {
 	struct sk_buff *skb = first->skb;
 	u32 vlan_macip_lens = 0;
+	u32 fceof_saidx = 0;
 	u32 type_tucmd = 0;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -3862,7 +3880,11 @@ static void ixgbevf_tx_csum(struct ixgbevf_ring *tx_ring,
 	vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
 
-	ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, 0);
+	fceof_saidx |= itd->pfsa;
+	type_tucmd |= itd->flags | itd->trailer_len;
+
+	ixgbevf_tx_ctxtdesc(tx_ring, vlan_macip_lens,
+			    fceof_saidx, type_tucmd, 0);
 }
 
 static __le32 ixgbevf_tx_cmd_type(u32 tx_flags)
@@ -3896,8 +3918,12 @@ static void ixgbevf_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc,
 	if (tx_flags & IXGBE_TX_FLAGS_IPV4)
 		olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IXSM);
 
-	/* use index 1 context for TSO/FSO/FCOE */
-	if (tx_flags & IXGBE_TX_FLAGS_TSO)
+	/* enable IPsec */
+	if (tx_flags & IXGBE_TX_FLAGS_IPSEC)
+		olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IPSEC);
+
+	/* use index 1 context for TSO/FSO/FCOE/IPSEC */
+	if (tx_flags & (IXGBE_TX_FLAGS_TSO | IXGBE_TX_FLAGS_IPSEC))
 		olinfo_status |= cpu_to_le32(1u << IXGBE_ADVTXD_IDX_SHIFT);
 
 	/* Check Context must be set if Tx switch is enabled, which it
@@ -4079,6 +4105,7 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
 	int tso;
 	u32 tx_flags = 0;
 	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	struct ixgbevf_ipsec_tx_data ipsec_tx = { 0 };
 #if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD
 	unsigned short f;
 #endif
@@ -4123,11 +4150,15 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
 	first->tx_flags = tx_flags;
 	first->protocol = vlan_get_protocol(skb);
 
-	tso = ixgbevf_tso(tx_ring, first, &hdr_len);
+#ifdef CONFIG_XFRM_OFFLOAD
+	if (skb->sp && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx))
+		goto out_drop;
+#endif
+	tso = ixgbevf_tso(tx_ring, first, &hdr_len, &ipsec_tx);
 	if (tso < 0)
 		goto out_drop;
 	else if (!tso)
-		ixgbevf_tx_csum(tx_ring, first);
+		ixgbevf_tx_csum(tx_ring, first, &ipsec_tx);
 
 	ixgbevf_tx_map(tx_ring, first, hdr_len);
 
@@ -4638,6 +4669,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	case ixgbe_mbox_api_11:
 	case ixgbe_mbox_api_12:
 	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
 		netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE -
 				  (ETH_HLEN + ETH_FCS_LEN);
 		break;
@@ -4673,6 +4705,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_drvdata(pdev, netdev);
 	netif_carrier_off(netdev);
+	ixgbevf_init_ipsec_offload(adapter);
 
 	ixgbevf_init_last_counter_stats(adapter);
 
@@ -4739,6 +4772,7 @@ static void ixgbevf_remove(struct pci_dev *pdev)
 	if (netdev->reg_state == NETREG_REGISTERED)
 		unregister_netdev(netdev);
 
+	ixgbevf_stop_ipsec_offload(adapter);
 	ixgbevf_clear_interrupt_scheme(adapter);
 	ixgbevf_reset_interrupt_capability(adapter);
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index bf0577e819e1..cd3b81300cc7 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -309,6 +309,7 @@ int ixgbevf_get_reta_locked(struct ixgbe_hw *hw, u32 *reta, int num_rx_queues)
 	 * is not supported for this device type.
 	 */
 	switch (hw->api_version) {
+	case ixgbe_mbox_api_14:
 	case ixgbe_mbox_api_13:
 	case ixgbe_mbox_api_12:
 		if (hw->mac.type < ixgbe_mac_X550_vf)
@@ -376,6 +377,7 @@ int ixgbevf_get_rss_key_locked(struct ixgbe_hw *hw, u8 *rss_key)
 	 * or if the operation is not supported for this device type.
 	 */
 	switch (hw->api_version) {
+	case ixgbe_mbox_api_14:
 	case ixgbe_mbox_api_13:
 	case ixgbe_mbox_api_12:
 		if (hw->mac.type < ixgbe_mac_X550_vf)
@@ -540,6 +542,7 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
 		if (xcast_mode == IXGBEVF_XCAST_MODE_PROMISC)
 			return -EOPNOTSUPP;
 		/* Fall threw */
+	case ixgbe_mbox_api_14:
 	case ixgbe_mbox_api_13:
 		break;
 	default:
@@ -890,6 +893,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
 	case ixgbe_mbox_api_11:
 	case ixgbe_mbox_api_12:
 	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
 		break;
 	default:
 		return 0;
-- 
2.17.1

^ permalink raw reply related

* [net-next 10/13] ixgbevf: add VF IPsec offload code
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

Add the IPsec offload support code.  This is based off of the similar
code in ixgbe, but instead of writing the SA registers, the VF asks
the PF to setup the offload by sending the offload information to the
PF via the standard mailbox.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/Makefile  |   1 +
 drivers/net/ethernet/intel/ixgbevf/ipsec.c   | 673 +++++++++++++++++++
 drivers/net/ethernet/intel/ixgbevf/ipsec.h   |  66 ++
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h |   8 +
 4 files changed, 748 insertions(+)
 create mode 100644 drivers/net/ethernet/intel/ixgbevf/ipsec.c
 create mode 100644 drivers/net/ethernet/intel/ixgbevf/ipsec.h

diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile
index aba1e6a37a6a..297d0f0858b5 100644
--- a/drivers/net/ethernet/intel/ixgbevf/Makefile
+++ b/drivers/net/ethernet/intel/ixgbevf/Makefile
@@ -10,4 +10,5 @@ ixgbevf-objs := vf.o \
                 mbx.o \
                 ethtool.o \
                 ixgbevf_main.o
+ixgbevf-$(CONFIG_XFRM_OFFLOAD) += ipsec.o
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c
new file mode 100644
index 000000000000..997cea675a37
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Oracle and/or its affiliates. All rights reserved. */
+
+#include "ixgbevf.h"
+#include <net/xfrm.h>
+#include <crypto/aead.h>
+
+#define IXGBE_IPSEC_KEY_BITS  160
+static const char aes_gcm_name[] = "rfc4106(gcm(aes))";
+
+/**
+ * ixgbevf_ipsec_set_pf_sa - ask the PF to set up an SA
+ * @adapter: board private structure
+ * @xs: xfrm info to be sent to the PF
+ *
+ * Returns: positive offload handle from the PF, or negative error code
+ **/
+static int ixgbevf_ipsec_set_pf_sa(struct ixgbevf_adapter *adapter,
+				   struct xfrm_state *xs)
+{
+	u32 msgbuf[IXGBE_VFMAILBOX_SIZE] = { 0 };
+	struct ixgbe_hw *hw = &adapter->hw;
+	struct sa_mbx_msg *sam;
+	u16 msglen;
+	int ret;
+
+	/* send the important bits to the PF */
+	sam = (struct sa_mbx_msg *)(&msgbuf[1]);
+	sam->flags = xs->xso.flags;
+	sam->spi = xs->id.spi;
+	sam->proto = xs->id.proto;
+	sam->family = xs->props.family;
+
+	if (xs->props.family == AF_INET6)
+		memcpy(sam->addr, &xs->id.daddr.a6, sizeof(xs->id.daddr.a6));
+	else
+		memcpy(sam->addr, &xs->id.daddr.a4, sizeof(xs->id.daddr.a4));
+	memcpy(sam->key, xs->aead->alg_key, sizeof(sam->key));
+
+	msgbuf[0] = IXGBE_VF_IPSEC_ADD;
+	msglen = sizeof(*sam) + sizeof(msgbuf[0]);
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	ret = hw->mbx.ops.write_posted(hw, msgbuf, msglen);
+	if (ret)
+		goto out;
+
+	msglen = sizeof(msgbuf[0]) * 2;
+	ret = hw->mbx.ops.read_posted(hw, msgbuf, msglen);
+	if (ret)
+		goto out;
+
+	ret = (int)msgbuf[1];
+	if (msgbuf[0] & IXGBE_VT_MSGTYPE_NACK && ret >= 0)
+		ret = -1;
+
+out:
+	spin_unlock_bh(&adapter->mbx_lock);
+
+	return ret;
+}
+
+/**
+ * ixgbevf_ipsec_del_pf_sa - ask the PF to delete an SA
+ * @adapter: board private structure
+ * @pfsa: sa index returned from PF when created, -1 for all
+ *
+ * Returns: 0 on success, or negative error code
+ **/
+static int ixgbevf_ipsec_del_pf_sa(struct ixgbevf_adapter *adapter, int pfsa)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 msgbuf[2];
+	int err;
+
+	memset(msgbuf, 0, sizeof(msgbuf));
+	msgbuf[0] = IXGBE_VF_IPSEC_DEL;
+	msgbuf[1] = (u32)pfsa;
+
+	spin_lock_bh(&adapter->mbx_lock);
+
+	err = hw->mbx.ops.write_posted(hw, msgbuf, sizeof(msgbuf));
+	if (err)
+		goto out;
+
+	err = hw->mbx.ops.read_posted(hw, msgbuf, sizeof(msgbuf));
+	if (err)
+		goto out;
+
+out:
+	spin_unlock_bh(&adapter->mbx_lock);
+	return err;
+}
+
+/**
+ * ixgbevf_ipsec_restore - restore the IPsec HW settings after a reset
+ * @adapter: board private structure
+ *
+ * Reload the HW tables from the SW tables after they've been bashed
+ * by a chip reset.  While we're here, make sure any stale VF data is
+ * removed, since we go through reset when num_vfs changes.
+ **/
+void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	struct net_device *netdev = adapter->netdev;
+	int i;
+
+	if (!(adapter->netdev->features & NETIF_F_HW_ESP))
+		return;
+
+	/* reload the Rx and Tx keys */
+	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+		struct rx_sa *r = &ipsec->rx_tbl[i];
+		struct tx_sa *t = &ipsec->tx_tbl[i];
+		int ret;
+
+		if (r->used) {
+			ret = ixgbevf_ipsec_set_pf_sa(adapter, r->xs);
+			if (ret < 0)
+				netdev_err(netdev, "reload rx_tbl[%d] failed = %d\n",
+					   i, ret);
+		}
+
+		if (t->used) {
+			ret = ixgbevf_ipsec_set_pf_sa(adapter, t->xs);
+			if (ret < 0)
+				netdev_err(netdev, "reload tx_tbl[%d] failed = %d\n",
+					   i, ret);
+		}
+	}
+}
+
+/**
+ * ixgbevf_ipsec_find_empty_idx - find the first unused security parameter index
+ * @ipsec: pointer to IPsec struct
+ * @rxtable: true if we need to look in the Rx table
+ *
+ * Returns the first unused index in either the Rx or Tx SA table
+ **/
+static
+int ixgbevf_ipsec_find_empty_idx(struct ixgbevf_ipsec *ipsec, bool rxtable)
+{
+	u32 i;
+
+	if (rxtable) {
+		if (ipsec->num_rx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+			return -ENOSPC;
+
+		/* search rx sa table */
+		for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+			if (!ipsec->rx_tbl[i].used)
+				return i;
+		}
+	} else {
+		if (ipsec->num_tx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+			return -ENOSPC;
+
+		/* search tx sa table */
+		for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+			if (!ipsec->tx_tbl[i].used)
+				return i;
+		}
+	}
+
+	return -ENOSPC;
+}
+
+/**
+ * ixgbevf_ipsec_find_rx_state - find the state that matches
+ * @ipsec: pointer to IPsec struct
+ * @daddr: inbound address to match
+ * @proto: protocol to match
+ * @spi: SPI to match
+ * @ip4: true if using an IPv4 address
+ *
+ * Returns a pointer to the matching SA state information
+ **/
+static
+struct xfrm_state *ixgbevf_ipsec_find_rx_state(struct ixgbevf_ipsec *ipsec,
+					       __be32 *daddr, u8 proto,
+					       __be32 spi, bool ip4)
+{
+	struct xfrm_state *ret = NULL;
+	struct rx_sa *rsa;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist,
+				   (__force u32)spi) {
+		if (spi == rsa->xs->id.spi &&
+		    ((ip4 && *daddr == rsa->xs->id.daddr.a4) ||
+		      (!ip4 && !memcmp(daddr, &rsa->xs->id.daddr.a6,
+				       sizeof(rsa->xs->id.daddr.a6)))) &&
+		    proto == rsa->xs->id.proto) {
+			ret = rsa->xs;
+			xfrm_state_hold(ret);
+			break;
+		}
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+/**
+ * ixgbevf_ipsec_parse_proto_keys - find the key and salt based on the protocol
+ * @xs: pointer to xfrm_state struct
+ * @mykey: pointer to key array to populate
+ * @mysalt: pointer to salt value to populate
+ *
+ * This copies the protocol keys and salt to our own data tables.  The
+ * 82599 family only supports the one algorithm.
+ **/
+static int ixgbevf_ipsec_parse_proto_keys(struct xfrm_state *xs,
+					  u32 *mykey, u32 *mysalt)
+{
+	struct net_device *dev = xs->xso.dev;
+	unsigned char *key_data;
+	char *alg_name = NULL;
+	int key_len;
+
+	if (!xs->aead) {
+		netdev_err(dev, "Unsupported IPsec algorithm\n");
+		return -EINVAL;
+	}
+
+	if (xs->aead->alg_icv_len != IXGBE_IPSEC_AUTH_BITS) {
+		netdev_err(dev, "IPsec offload requires %d bit authentication\n",
+			   IXGBE_IPSEC_AUTH_BITS);
+		return -EINVAL;
+	}
+
+	key_data = &xs->aead->alg_key[0];
+	key_len = xs->aead->alg_key_len;
+	alg_name = xs->aead->alg_name;
+
+	if (strcmp(alg_name, aes_gcm_name)) {
+		netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
+			   aes_gcm_name);
+		return -EINVAL;
+	}
+
+	/* The key bytes come down in a big endian array of bytes, so
+	 * we don't need to do any byte swapping.
+	 * 160 accounts for 16 byte key and 4 byte salt
+	 */
+	if (key_len > IXGBE_IPSEC_KEY_BITS) {
+		*mysalt = ((u32 *)key_data)[4];
+	} else if (key_len == IXGBE_IPSEC_KEY_BITS) {
+		*mysalt = 0;
+	} else {
+		netdev_err(dev, "IPsec hw offload only supports keys up to 128 bits with a 32 bit salt\n");
+		return -EINVAL;
+	}
+	memcpy(mykey, key_data, 16);
+
+	return 0;
+}
+
+/**
+ * ixgbevf_ipsec_add_sa - program device with a security association
+ * @xs: pointer to transformer state struct
+ **/
+static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs)
+{
+	struct net_device *dev = xs->xso.dev;
+	struct ixgbevf_adapter *adapter = netdev_priv(dev);
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	u16 sa_idx;
+	int ret;
+
+	if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) {
+		netdev_err(dev, "Unsupported protocol 0x%04x for IPsec offload\n",
+			   xs->id.proto);
+		return -EINVAL;
+	}
+
+	if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
+		struct rx_sa rsa;
+
+		if (xs->calg) {
+			netdev_err(dev, "Compression offload not supported\n");
+			return -EINVAL;
+		}
+
+		/* find the first unused index */
+		ret = ixgbevf_ipsec_find_empty_idx(ipsec, true);
+		if (ret < 0) {
+			netdev_err(dev, "No space for SA in Rx table!\n");
+			return ret;
+		}
+		sa_idx = (u16)ret;
+
+		memset(&rsa, 0, sizeof(rsa));
+		rsa.used = true;
+		rsa.xs = xs;
+
+		if (rsa.xs->id.proto & IPPROTO_ESP)
+			rsa.decrypt = xs->ealg || xs->aead;
+
+		/* get the key and salt */
+		ret = ixgbevf_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt);
+		if (ret) {
+			netdev_err(dev, "Failed to get key data for Rx SA table\n");
+			return ret;
+		}
+
+		/* get ip for rx sa table */
+		if (xs->props.family == AF_INET6)
+			memcpy(rsa.ipaddr, &xs->id.daddr.a6, 16);
+		else
+			memcpy(&rsa.ipaddr[3], &xs->id.daddr.a4, 4);
+
+		rsa.mode = IXGBE_RXMOD_VALID;
+		if (rsa.xs->id.proto & IPPROTO_ESP)
+			rsa.mode |= IXGBE_RXMOD_PROTO_ESP;
+		if (rsa.decrypt)
+			rsa.mode |= IXGBE_RXMOD_DECRYPT;
+		if (rsa.xs->props.family == AF_INET6)
+			rsa.mode |= IXGBE_RXMOD_IPV6;
+
+		ret = ixgbevf_ipsec_set_pf_sa(adapter, xs);
+		if (ret < 0)
+			return ret;
+		rsa.pfsa = ret;
+
+		/* the preparations worked, so save the info */
+		memcpy(&ipsec->rx_tbl[sa_idx], &rsa, sizeof(rsa));
+
+		xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_RX_INDEX;
+
+		ipsec->num_rx_sa++;
+
+		/* hash the new entry for faster search in Rx path */
+		hash_add_rcu(ipsec->rx_sa_list, &ipsec->rx_tbl[sa_idx].hlist,
+			     (__force u32)rsa.xs->id.spi);
+	} else {
+		struct tx_sa tsa;
+
+		/* find the first unused index */
+		ret = ixgbevf_ipsec_find_empty_idx(ipsec, false);
+		if (ret < 0) {
+			netdev_err(dev, "No space for SA in Tx table\n");
+			return ret;
+		}
+		sa_idx = (u16)ret;
+
+		memset(&tsa, 0, sizeof(tsa));
+		tsa.used = true;
+		tsa.xs = xs;
+
+		if (xs->id.proto & IPPROTO_ESP)
+			tsa.encrypt = xs->ealg || xs->aead;
+
+		ret = ixgbevf_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt);
+		if (ret) {
+			netdev_err(dev, "Failed to get key data for Tx SA table\n");
+			memset(&tsa, 0, sizeof(tsa));
+			return ret;
+		}
+
+		ret = ixgbevf_ipsec_set_pf_sa(adapter, xs);
+		if (ret < 0)
+			return ret;
+		tsa.pfsa = ret;
+
+		/* the preparations worked, so save the info */
+		memcpy(&ipsec->tx_tbl[sa_idx], &tsa, sizeof(tsa));
+
+		xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_TX_INDEX;
+
+		ipsec->num_tx_sa++;
+	}
+
+	return 0;
+}
+
+/**
+ * ixgbevf_ipsec_del_sa - clear out this specific SA
+ * @xs: pointer to transformer state struct
+ **/
+static void ixgbevf_ipsec_del_sa(struct xfrm_state *xs)
+{
+	struct net_device *dev = xs->xso.dev;
+	struct ixgbevf_adapter *adapter = netdev_priv(dev);
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	u16 sa_idx;
+
+	if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
+		sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX;
+
+		if (!ipsec->rx_tbl[sa_idx].used) {
+			netdev_err(dev, "Invalid Rx SA selected sa_idx=%d offload_handle=%lu\n",
+				   sa_idx, xs->xso.offload_handle);
+			return;
+		}
+
+		ixgbevf_ipsec_del_pf_sa(adapter, ipsec->rx_tbl[sa_idx].pfsa);
+		hash_del_rcu(&ipsec->rx_tbl[sa_idx].hlist);
+		memset(&ipsec->rx_tbl[sa_idx], 0, sizeof(struct rx_sa));
+		ipsec->num_rx_sa--;
+	} else {
+		sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+
+		if (!ipsec->tx_tbl[sa_idx].used) {
+			netdev_err(dev, "Invalid Tx SA selected sa_idx=%d offload_handle=%lu\n",
+				   sa_idx, xs->xso.offload_handle);
+			return;
+		}
+
+		ixgbevf_ipsec_del_pf_sa(adapter, ipsec->tx_tbl[sa_idx].pfsa);
+		memset(&ipsec->tx_tbl[sa_idx], 0, sizeof(struct tx_sa));
+		ipsec->num_tx_sa--;
+	}
+}
+
+/**
+ * ixgbevf_ipsec_offload_ok - can this packet use the xfrm hw offload
+ * @skb: current data packet
+ * @xs: pointer to transformer state struct
+ **/
+static bool ixgbevf_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
+{
+	if (xs->props.family == AF_INET) {
+		/* Offload with IPv4 options is not supported yet */
+		if (ip_hdr(skb)->ihl != 5)
+			return false;
+	} else {
+		/* Offload with IPv6 extension headers is not support yet */
+		if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+			return false;
+	}
+
+	return true;
+}
+
+static const struct xfrmdev_ops ixgbevf_xfrmdev_ops = {
+	.xdo_dev_state_add = ixgbevf_ipsec_add_sa,
+	.xdo_dev_state_delete = ixgbevf_ipsec_del_sa,
+	.xdo_dev_offload_ok = ixgbevf_ipsec_offload_ok,
+};
+
+/**
+ * ixgbevf_ipsec_tx - setup Tx flags for IPsec offload
+ * @tx_ring: outgoing context
+ * @first: current data packet
+ * @itd: ipsec Tx data for later use in building context descriptor
+ **/
+int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
+		     struct ixgbevf_tx_buffer *first,
+		     struct ixgbevf_ipsec_tx_data *itd)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev);
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_state *xs;
+	struct tx_sa *tsa;
+	u16 sa_idx;
+
+	if (unlikely(!first->skb->sp->len)) {
+		netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
+			   __func__, first->skb->sp->len);
+		return 0;
+	}
+
+	xs = xfrm_input_state(first->skb);
+	if (unlikely(!xs)) {
+		netdev_err(tx_ring->netdev, "%s: no xfrm_input_state() xs = %p\n",
+			   __func__, xs);
+		return 0;
+	}
+
+	sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+	if (unlikely(sa_idx > IXGBE_IPSEC_MAX_SA_COUNT)) {
+		netdev_err(tx_ring->netdev, "%s: bad sa_idx=%d handle=%lu\n",
+			   __func__, sa_idx, xs->xso.offload_handle);
+		return 0;
+	}
+
+	tsa = &ipsec->tx_tbl[sa_idx];
+	if (unlikely(!tsa->used)) {
+		netdev_err(tx_ring->netdev, "%s: unused sa_idx=%d\n",
+			   __func__, sa_idx);
+		return 0;
+	}
+
+	itd->pfsa = tsa->pfsa - IXGBE_IPSEC_BASE_TX_INDEX;
+
+	first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CSUM;
+
+	if (xs->id.proto == IPPROTO_ESP) {
+		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
+			      IXGBE_ADVTXD_TUCMD_L4T_TCP;
+		if (first->protocol == htons(ETH_P_IP))
+			itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
+
+		/* The actual trailer length is authlen (16 bytes) plus
+		 * 2 bytes for the proto and the padlen values, plus
+		 * padlen bytes of padding.  This ends up not the same
+		 * as the static value found in xs->props.trailer_len (21).
+		 *
+		 * ... but if we're doing GSO, don't bother as the stack
+		 * doesn't add a trailer for those.
+		 */
+		if (!skb_is_gso(first->skb)) {
+			/* The "correct" way to get the auth length would be
+			 * to use
+			 *    authlen = crypto_aead_authsize(xs->data);
+			 * but since we know we only have one size to worry
+			 * about * we can let the compiler use the constant
+			 * and save us a few CPU cycles.
+			 */
+			const int authlen = IXGBE_IPSEC_AUTH_BITS / 8;
+			struct sk_buff *skb = first->skb;
+			u8 padlen;
+			int ret;
+
+			ret = skb_copy_bits(skb, skb->len - (authlen + 2),
+					    &padlen, 1);
+			if (unlikely(ret))
+				return 0;
+			itd->trailer_len = authlen + 2 + padlen;
+		}
+	}
+	if (tsa->encrypt)
+		itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
+
+	return 1;
+}
+
+/**
+ * ixgbevf_ipsec_rx - decode IPsec bits from Rx descriptor
+ * @rx_ring: receiving ring
+ * @rx_desc: receive data descriptor
+ * @skb: current data packet
+ *
+ * Determine if there was an IPsec encapsulation noticed, and if so set up
+ * the resulting status for later in the receive stack.
+ **/
+void ixgbevf_ipsec_rx(struct ixgbevf_ring *rx_ring,
+		      union ixgbe_adv_rx_desc *rx_desc,
+		      struct sk_buff *skb)
+{
+	struct ixgbevf_adapter *adapter = netdev_priv(rx_ring->netdev);
+	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+	__le16 ipsec_pkt_types = cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH |
+					     IXGBE_RXDADV_PKTTYPE_IPSEC_ESP);
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+	struct xfrm_offload *xo = NULL;
+	struct xfrm_state *xs = NULL;
+	struct ipv6hdr *ip6 = NULL;
+	struct iphdr *ip4 = NULL;
+	void *daddr;
+	__be32 spi;
+	u8 *c_hdr;
+	u8 proto;
+
+	/* Find the IP and crypto headers in the data.
+	 * We can assume no VLAN header in the way, b/c the
+	 * hw won't recognize the IPsec packet and anyway the
+	 * currently VLAN device doesn't support xfrm offload.
+	 */
+	if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV4)) {
+		ip4 = (struct iphdr *)(skb->data + ETH_HLEN);
+		daddr = &ip4->daddr;
+		c_hdr = (u8 *)ip4 + ip4->ihl * 4;
+	} else if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV6)) {
+		ip6 = (struct ipv6hdr *)(skb->data + ETH_HLEN);
+		daddr = &ip6->daddr;
+		c_hdr = (u8 *)ip6 + sizeof(struct ipv6hdr);
+	} else {
+		return;
+	}
+
+	switch (pkt_info & ipsec_pkt_types) {
+	case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH):
+		spi = ((struct ip_auth_hdr *)c_hdr)->spi;
+		proto = IPPROTO_AH;
+		break;
+	case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_ESP):
+		spi = ((struct ip_esp_hdr *)c_hdr)->spi;
+		proto = IPPROTO_ESP;
+		break;
+	default:
+		return;
+	}
+
+	xs = ixgbevf_ipsec_find_rx_state(ipsec, daddr, proto, spi, !!ip4);
+	if (unlikely(!xs))
+		return;
+
+	skb->sp = secpath_dup(skb->sp);
+	if (unlikely(!skb->sp))
+		return;
+
+	skb->sp->xvec[skb->sp->len++] = xs;
+	skb->sp->olen++;
+	xo = xfrm_offload(skb);
+	xo->flags = CRYPTO_DONE;
+	xo->status = CRYPTO_SUCCESS;
+
+	adapter->rx_ipsec++;
+}
+
+/**
+ * ixgbevf_init_ipsec_offload - initialize registers for IPsec operation
+ * @adapter: board private structure
+ **/
+void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbevf_ipsec *ipsec;
+	size_t size;
+
+	switch (adapter->hw.api_version) {
+	case ixgbe_mbox_api_14:
+		break;
+	default:
+		return;
+	}
+
+	ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+	if (!ipsec)
+		goto err1;
+	hash_init(ipsec->rx_sa_list);
+
+	size = sizeof(struct rx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+	ipsec->rx_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->rx_tbl)
+		goto err2;
+
+	size = sizeof(struct tx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+	ipsec->tx_tbl = kzalloc(size, GFP_KERNEL);
+	if (!ipsec->tx_tbl)
+		goto err2;
+
+	ipsec->num_rx_sa = 0;
+	ipsec->num_tx_sa = 0;
+
+	adapter->ipsec = ipsec;
+
+	adapter->netdev->xfrmdev_ops = &ixgbevf_xfrmdev_ops;
+
+#define IXGBEVF_ESP_FEATURES	(NETIF_F_HW_ESP | \
+				 NETIF_F_HW_ESP_TX_CSUM | \
+				 NETIF_F_GSO_ESP)
+
+	adapter->netdev->features |= IXGBEVF_ESP_FEATURES;
+	adapter->netdev->hw_enc_features |= IXGBEVF_ESP_FEATURES;
+
+	return;
+
+err2:
+	kfree(ipsec->rx_tbl);
+	kfree(ipsec->tx_tbl);
+	kfree(ipsec);
+err1:
+	netdev_err(adapter->netdev, "Unable to allocate memory for SA tables");
+}
+
+/**
+ * ixgbevf_stop_ipsec_offload - tear down the IPsec offload
+ * @adapter: board private structure
+ **/
+void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter)
+{
+	struct ixgbevf_ipsec *ipsec = adapter->ipsec;
+
+	adapter->ipsec = NULL;
+	if (ipsec) {
+		kfree(ipsec->rx_tbl);
+		kfree(ipsec->tx_tbl);
+		kfree(ipsec);
+	}
+}
diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.h b/drivers/net/ethernet/intel/ixgbevf/ipsec.h
new file mode 100644
index 000000000000..3740725041c3
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2018 Oracle and/or its affiliates. All rights reserved. */
+
+#ifndef _IXGBEVF_IPSEC_H_
+#define _IXGBEVF_IPSEC_H_
+
+#define IXGBE_IPSEC_MAX_SA_COUNT	1024
+#define IXGBE_IPSEC_BASE_RX_INDEX	0
+#define IXGBE_IPSEC_BASE_TX_INDEX	IXGBE_IPSEC_MAX_SA_COUNT
+#define IXGBE_IPSEC_AUTH_BITS		128
+
+#define IXGBE_RXMOD_VALID		0x00000001
+#define IXGBE_RXMOD_PROTO_ESP		0x00000004
+#define IXGBE_RXMOD_DECRYPT		0x00000008
+#define IXGBE_RXMOD_IPV6		0x00000010
+
+struct rx_sa {
+	struct hlist_node hlist;
+	struct xfrm_state *xs;
+	__be32 ipaddr[4];
+	u32 key[4];
+	u32 salt;
+	u32 mode;
+	u32 pfsa;
+	bool used;
+	bool decrypt;
+};
+
+struct rx_ip_sa {
+	__be32 ipaddr[4];
+	u32 ref_cnt;
+	bool used;
+};
+
+struct tx_sa {
+	struct xfrm_state *xs;
+	u32 key[4];
+	u32 salt;
+	u32 pfsa;
+	bool encrypt;
+	bool used;
+};
+
+struct ixgbevf_ipsec_tx_data {
+	u32 flags;
+	u16 trailer_len;
+	u16 pfsa;
+};
+
+struct ixgbevf_ipsec {
+	u16 num_rx_sa;
+	u16 num_tx_sa;
+	struct rx_sa *rx_tbl;
+	struct tx_sa *tx_tbl;
+	DECLARE_HASHTABLE(rx_sa_list, 10);
+};
+
+struct sa_mbx_msg {
+	__be32 spi;
+	u8 flags;
+	u8 proto;
+	u16 family;
+	__be32 addr[4];
+	u32 key[5];
+};
+#endif /* _IXGBEVF_IPSEC_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 56a1031dcc07..172637e2f2e6 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -14,6 +14,7 @@
 #include <net/xdp.h>
 
 #include "vf.h"
+#include "ipsec.h"
 
 #define IXGBE_MAX_TXD_PWR	14
 #define IXGBE_MAX_DATA_PER_TXD	BIT(IXGBE_MAX_TXD_PWR)
@@ -163,6 +164,7 @@ struct ixgbevf_ring {
 #define IXGBE_TX_FLAGS_VLAN		BIT(1)
 #define IXGBE_TX_FLAGS_TSO		BIT(2)
 #define IXGBE_TX_FLAGS_IPV4		BIT(3)
+#define IXGBE_TX_FLAGS_IPSEC		BIT(4)
 #define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
 #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0x0000e000
 #define IXGBE_TX_FLAGS_VLAN_SHIFT	16
@@ -338,6 +340,7 @@ struct ixgbevf_adapter {
 	struct ixgbevf_ring *tx_ring[MAX_TX_QUEUES]; /* One per active queue */
 	u64 restart_queue;
 	u32 tx_timeout_count;
+	u64 tx_ipsec;
 
 	/* RX */
 	int num_rx_queues;
@@ -348,6 +351,7 @@ struct ixgbevf_adapter {
 	u64 alloc_rx_page_failed;
 	u64 alloc_rx_buff_failed;
 	u64 alloc_rx_page;
+	u64 rx_ipsec;
 
 	struct msix_entry *msix_entries;
 
@@ -384,6 +388,10 @@ struct ixgbevf_adapter {
 	u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
 	u32 flags;
 #define IXGBEVF_FLAGS_LEGACY_RX		BIT(1)
+
+#ifdef CONFIG_XFRM
+	struct ixgbevf_ipsec *ipsec;
+#endif /* CONFIG_XFRM */
 };
 
 enum ixbgevf_state_t {
-- 
2.17.1

^ permalink raw reply related

* [net-next 04/13] ixgbe: reload IPsec IP table after sa tables
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

Restore the IPsec hardware IP table after reloading the SA tables.
This doesn't make much difference now, but will matter when we add
support for VF IPsec offloads.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index e515246d0bce..434065109b8d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -301,14 +301,6 @@ void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)
 	ixgbe_ipsec_clear_hw_tables(adapter);
 	ixgbe_ipsec_start_engine(adapter);
 
-	/* reload the IP addrs */
-	for (i = 0; i < IXGBE_IPSEC_MAX_RX_IP_COUNT; i++) {
-		struct rx_ip_sa *ipsa = &ipsec->ip_tbl[i];
-
-		if (ipsa->used)
-			ixgbe_ipsec_set_rx_ip(hw, i, ipsa->ipaddr);
-	}
-
 	/* reload the Rx and Tx keys */
 	for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
 		struct rx_sa *rsa = &ipsec->rx_tbl[i];
@@ -322,6 +314,14 @@ void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)
 		if (tsa->used)
 			ixgbe_ipsec_set_tx_sa(hw, i, tsa->key, tsa->salt);
 	}
+
+	/* reload the IP addrs */
+	for (i = 0; i < IXGBE_IPSEC_MAX_RX_IP_COUNT; i++) {
+		struct rx_ip_sa *ipsa = &ipsec->ip_tbl[i];
+
+		if (ipsa->used)
+			ixgbe_ipsec_set_rx_ip(hw, i, ipsa->ipaddr);
+	}
 }
 
 /**
-- 
2.17.1

^ permalink raw reply related

* [net-next 07/13] ixgbe: add VF IPsec offload enable flag
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

Add a private flag to expressly enable support for VF IPsec offload.
The VF will have to be "trusted" in order to use the hardware offload,
but because of the general concerns of managing VF access, we want to
be sure the user specifically is enabling the feature.

This is likely a candidate for becoming a netdev feature flag.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h         | 1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 9 +++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c   | 3 ++-
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 4fc906c6166b..89e709ce1947 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -605,6 +605,7 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG2_EEE_ENABLED			BIT(15)
 #define IXGBE_FLAG2_RX_LEGACY			BIT(16)
 #define IXGBE_FLAG2_IPSEC_ENABLED		BIT(17)
+#define IXGBE_FLAG2_VF_IPSEC_ENABLED		BIT(18)
 
 	/* Tx fast path data */
 	int num_tx_queues;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index e5a8461fe6a9..732b1e6ecc43 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -136,6 +136,8 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
 static const char ixgbe_priv_flags_strings[][ETH_GSTRING_LEN] = {
 #define IXGBE_PRIV_FLAGS_LEGACY_RX	BIT(0)
 	"legacy-rx",
+#define IXGBE_PRIV_FLAGS_VF_IPSEC_EN	BIT(1)
+	"vf-ipsec",
 };
 
 #define IXGBE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbe_priv_flags_strings)
@@ -3409,6 +3411,9 @@ static u32 ixgbe_get_priv_flags(struct net_device *netdev)
 	if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
 		priv_flags |= IXGBE_PRIV_FLAGS_LEGACY_RX;
 
+	if (adapter->flags2 & IXGBE_FLAG2_VF_IPSEC_ENABLED)
+		priv_flags |= IXGBE_PRIV_FLAGS_VF_IPSEC_EN;
+
 	return priv_flags;
 }
 
@@ -3421,6 +3426,10 @@ static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags)
 	if (priv_flags & IXGBE_PRIV_FLAGS_LEGACY_RX)
 		flags2 |= IXGBE_FLAG2_RX_LEGACY;
 
+	flags2 &= ~IXGBE_FLAG2_VF_IPSEC_ENABLED;
+	if (priv_flags & IXGBE_PRIV_FLAGS_VF_IPSEC_EN)
+		flags2 |= IXGBE_FLAG2_VF_IPSEC_ENABLED;
+
 	if (flags2 != adapter->flags2) {
 		adapter->flags2 = flags2;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 80108e12ab86..ecd01fade960 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -880,7 +880,8 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 	int err;
 
 	sam = (struct sa_mbx_msg *)(&msgbuf[1]);
-	if (!adapter->vfinfo[vf].trusted) {
+	if (!adapter->vfinfo[vf].trusted ||
+	    !(adapter->flags2 & IXGBE_FLAG2_VF_IPSEC_ENABLED)) {
 		e_warn(drv, "VF %d attempted to add an IPsec SA\n", vf);
 		err = -EACCES;
 		goto err_out;
-- 
2.17.1

^ permalink raw reply related

* [net-next 08/13] ixgbe: add VF IPsec offload request message handling
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

Add an add and a delete message for IPsec offload requests from
the VF.  These call into the IPsec functions that can translate
the message buffer into a useful IPsec offload.

These new messages bump the mbox API version to 1.4.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      | 19 ++++++++++++++-----
 drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h  |  5 +++++
 .../net/ethernet/intel/ixgbe/ixgbe_sriov.c    | 17 ++++++++++++++++-
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 89e709ce1947..5c6fd42e90ed 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -1004,15 +1004,24 @@ void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
 		    struct sk_buff *skb);
 int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first,
 		   struct ixgbe_ipsec_tx_data *itd);
+void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf);
+int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *mbuf, u32 vf);
+int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *mbuf, u32 vf);
 #else
-static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { };
-static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { };
-static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { }
+static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { }
+static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { }
 static inline void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
 				  union ixgbe_adv_rx_desc *rx_desc,
-				  struct sk_buff *skb) { };
+				  struct sk_buff *skb) { }
 static inline int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
 				 struct ixgbe_tx_buffer *first,
-				 struct ixgbe_ipsec_tx_data *itd) { return 0; };
+				 struct ixgbe_ipsec_tx_data *itd) { return 0; }
+static inline void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter,
+					u32 vf) { }
+static inline int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter,
+					u32 *mbuf, u32 vf) { return -EACCES; }
+static inline int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter,
+					u32 *mbuf, u32 vf) { return -EACCES; }
 #endif /* CONFIG_XFRM_OFFLOAD */
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
index e085b6520dac..a148534d7256 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
@@ -50,6 +50,7 @@ enum ixgbe_pfvf_api_rev {
 	ixgbe_mbox_api_11,	/* API version 1.1, linux/freebsd VF driver */
 	ixgbe_mbox_api_12,	/* API version 1.2, linux/freebsd VF driver */
 	ixgbe_mbox_api_13,	/* API version 1.3, linux/freebsd VF driver */
+	ixgbe_mbox_api_14,	/* API version 1.4, linux/freebsd VF driver */
 	/* This value should always be last */
 	ixgbe_mbox_api_unknown,	/* indicates that API version is not known */
 };
@@ -80,6 +81,10 @@ enum ixgbe_pfvf_api_rev {
 
 #define IXGBE_VF_UPDATE_XCAST_MODE	0x0c
 
+/* mailbox API, version 1.4 VF requests */
+#define IXGBE_VF_IPSEC_ADD	0x0d
+#define IXGBE_VF_IPSEC_DEL	0x0e
+
 /* length of permanent address message returned from PF */
 #define IXGBE_VF_PERMADDR_MSG_LEN 4
 /* word in permanent address message with the current multicast type */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 3c6f01c41b78..af25a8fffeb8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -496,6 +496,7 @@ static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 		case ixgbe_mbox_api_11:
 		case ixgbe_mbox_api_12:
 		case ixgbe_mbox_api_13:
+		case ixgbe_mbox_api_14:
 			/* Version 1.1 supports jumbo frames on VFs if PF has
 			 * jumbo frames enabled which means legacy VFs are
 			 * disabled
@@ -728,6 +729,9 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
 	/* reset multicast table array for vf */
 	adapter->vfinfo[vf].num_vf_mc_hashes = 0;
 
+	/* clear any ipsec table info */
+	ixgbe_ipsec_vf_clear(adapter, vf);
+
 	/* Flush and reset the mta with the new values */
 	ixgbe_set_rx_mode(adapter->netdev);
 
@@ -1000,6 +1004,7 @@ static int ixgbe_negotiate_vf_api(struct ixgbe_adapter *adapter,
 	case ixgbe_mbox_api_11:
 	case ixgbe_mbox_api_12:
 	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
 		adapter->vfinfo[vf].vf_api = api;
 		return 0;
 	default:
@@ -1025,6 +1030,7 @@ static int ixgbe_get_vf_queues(struct ixgbe_adapter *adapter,
 	case ixgbe_mbox_api_11:
 	case ixgbe_mbox_api_12:
 	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
 		break;
 	default:
 		return -1;
@@ -1065,6 +1071,7 @@ static int ixgbe_get_vf_reta(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 
 	/* verify the PF is supporting the correct API */
 	switch (adapter->vfinfo[vf].vf_api) {
+	case ixgbe_mbox_api_14:
 	case ixgbe_mbox_api_13:
 	case ixgbe_mbox_api_12:
 		break;
@@ -1097,6 +1104,7 @@ static int ixgbe_get_vf_rss_key(struct ixgbe_adapter *adapter,
 
 	/* verify the PF is supporting the correct API */
 	switch (adapter->vfinfo[vf].vf_api) {
+	case ixgbe_mbox_api_14:
 	case ixgbe_mbox_api_13:
 	case ixgbe_mbox_api_12:
 		break;
@@ -1122,8 +1130,9 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter,
 		/* promisc introduced in 1.3 version */
 		if (xcast_mode == IXGBEVF_XCAST_MODE_PROMISC)
 			return -EOPNOTSUPP;
-		/* Fall threw */
+		/* Fall through */
 	case ixgbe_mbox_api_13:
+	case ixgbe_mbox_api_14:
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -1249,6 +1258,12 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 	case IXGBE_VF_UPDATE_XCAST_MODE:
 		retval = ixgbe_update_vf_xcast_mode(adapter, msgbuf, vf);
 		break;
+	case IXGBE_VF_IPSEC_ADD:
+		retval = ixgbe_ipsec_vf_add_sa(adapter, msgbuf, vf);
+		break;
+	case IXGBE_VF_IPSEC_DEL:
+		retval = ixgbe_ipsec_vf_del_sa(adapter, msgbuf, vf);
+		break;
 	default:
 		e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
-- 
2.17.1

^ permalink raw reply related

* [net-next 09/13] ixgbevf: add defines for IPsec offload request
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

Fix up the register definitions for using IPsec offloads and
add the new mailbox message IDs.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/defines.h | 8 ++++++++
 drivers/net/ethernet/intel/ixgbevf/mbx.h     | 5 +++++
 2 files changed, 13 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h
index 700d8eb2f6f8..dd9cd4541d7a 100644
--- a/drivers/net/ethernet/intel/ixgbevf/defines.h
+++ b/drivers/net/ethernet/intel/ixgbevf/defines.h
@@ -133,9 +133,14 @@ typedef u32 ixgbe_link_speed;
 #define IXGBE_RXDADV_STAT_FCSTAT_NODDP	0x00000010 /* 01: Ctxt w/o DDP */
 #define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP	0x00000020 /* 10: Recv. FCP_RSP */
 #define IXGBE_RXDADV_STAT_FCSTAT_DDP	0x00000030 /* 11: Ctxt w/ DDP */
+#define IXGBE_RXDADV_STAT_SECP		0x00020000 /* IPsec/MACsec pkt found */
 
 #define IXGBE_RXDADV_RSSTYPE_MASK	0x0000000F
 #define IXGBE_RXDADV_PKTTYPE_MASK	0x0000FFF0
+#define IXGBE_RXDADV_PKTTYPE_IPV4	0x00000010 /* IPv4 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPV6	0x00000040 /* IPv6 hdr present */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP	0x00001000 /* IPSec ESP */
+#define IXGBE_RXDADV_PKTTYPE_IPSEC_AH	0x00002000 /* IPSec AH */
 #define IXGBE_RXDADV_PKTTYPE_MASK_EX	0x0001FFF0
 #define IXGBE_RXDADV_HDRBUFLEN_MASK	0x00007FE0
 #define IXGBE_RXDADV_RSCCNT_MASK	0x001E0000
@@ -250,9 +255,12 @@ struct ixgbe_adv_tx_context_desc {
 #define IXGBE_ADVTXD_TUCMD_L4T_UDP	0x00000000  /* L4 Packet TYPE of UDP */
 #define IXGBE_ADVTXD_TUCMD_L4T_TCP	0x00000800  /* L4 Packet TYPE of TCP */
 #define IXGBE_ADVTXD_TUCMD_L4T_SCTP	0x00001000  /* L4 Packet TYPE of SCTP */
+#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP   0x00002000 /* IPSec Type ESP */
+#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000 /* ESP Encrypt Enable */
 #define IXGBE_ADVTXD_IDX_SHIFT	4 /* Adv desc Index shift */
 #define IXGBE_ADVTXD_CC		0x00000080 /* Check Context */
 #define IXGBE_ADVTXD_POPTS_SHIFT	8  /* Adv desc POPTS shift */
+#define IXGBE_ADVTXD_POPTS_IPSEC	0x00000400 /* IPSec offload request */
 #define IXGBE_ADVTXD_POPTS_IXSM	(IXGBE_TXD_POPTS_IXSM << \
 				 IXGBE_ADVTXD_POPTS_SHIFT)
 #define IXGBE_ADVTXD_POPTS_TXSM	(IXGBE_TXD_POPTS_TXSM << \
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h
index bfd9ae150808..853796c8ef0e 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h
@@ -62,6 +62,7 @@ enum ixgbe_pfvf_api_rev {
 	ixgbe_mbox_api_11,	/* API version 1.1, linux/freebsd VF driver */
 	ixgbe_mbox_api_12,	/* API version 1.2, linux/freebsd VF driver */
 	ixgbe_mbox_api_13,	/* API version 1.3, linux/freebsd VF driver */
+	ixgbe_mbox_api_14,	/* API version 1.4, linux/freebsd VF driver */
 	/* This value should always be last */
 	ixgbe_mbox_api_unknown,	/* indicates that API version is not known */
 };
@@ -92,6 +93,10 @@ enum ixgbe_pfvf_api_rev {
 
 #define IXGBE_VF_UPDATE_XCAST_MODE	0x0c
 
+/* mailbox API, version 1.4 VF requests */
+#define IXGBE_VF_IPSEC_ADD	0x0d
+#define IXGBE_VF_IPSEC_DEL	0x0e
+
 /* length of permanent address message returned from PF */
 #define IXGBE_VF_PERMADDR_MSG_LEN	4
 /* word in permanent address message with the current multicast type */
-- 
2.17.1

^ permalink raw reply related

* [net-next 12/13] ixgbe: disallow IPsec Tx offload when in SR-IOV mode
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

There seems to be a problem in the x540's internal switch wherein if SR-IOV
mode is enabled and an offloaded IPsec packet is sent to a local VF,
the packet is silently dropped.  This might never be a problem as it is
somewhat a corner case, but if someone happens to be using IPsec offload
from the PF to a VF that just happens to get migrated to the local box,
communication will mysteriously fail.

Not good.

A simple way to protect from this is to simply not allow any IPsec offloads
for outgoing packets when num_vfs != 0.  This doesn't help any offloads that
were created before SR-IOV was enabled, but we'll get to that later.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index ecd01fade960..0a1c8bf3f74f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -693,6 +693,9 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs)
 	} else {
 		struct tx_sa tsa;

+		if (adapter->num_vfs)
+			return -EOPNOTSUPP;
+
 		/* find the first unused index */
 		ret = ixgbe_ipsec_find_empty_idx(ipsec, false);
 		if (ret < 0) {
-- 
2.17.1

^ permalink raw reply related

* [net-next 13/13] ixgbe: fix the return value for unsupported VF offload
From: Jeff Kirsher @ 2018-08-28 21:35 UTC (permalink / raw)
  To: davem; +Cc: Shannon Nelson, netdev, nhorman, sassmann, jogreene, Jeff Kirsher
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Shannon Nelson <shannon.nelson@oracle.com>

When failing the request because we can't support that offload,
reporting EOPNOTSUPP makes much more sense than ENXIO.

Signed-off-by: Shannon Nelson <shannon.nelson@oracle.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 0a1c8bf3f74f..fd1b0546fd67 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -894,7 +894,7 @@ int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 	 * device, so block these requests for now.
 	 */
 	if (!(sam->flags & XFRM_OFFLOAD_INBOUND)) {
-		err = -ENXIO;
+		err = -EOPNOTSUPP;
 		goto err_out;
 	}
 
-- 
2.17.1

^ permalink raw reply related

* [PATCH] mac80211: fix HWMP sequence numbering to follow standard
From: Yuan-Chi Pang @ 2018-08-29  1:30 UTC (permalink / raw)
  To: johannes; +Cc: davem, linux-wireless, netdev, linux-kernel, Yuan-Chi Pang

IEEE 802.11-2016 14.10.8.3 HWMP sequence numbering says:
If it is a target mesh STA, it shall update its own HWMP SN to
maximum (current HWMP SN, target HWMP SN in the PREQ element) + 1
immediately before it generates a PREP element in response to a
PREQ element.

Signed-off-by: Yuan-Chi Pang <fu3mo6goo@gmail.com>
---
 net/mac80211/mesh_hwmp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 35ad398..daf9db3 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -572,6 +572,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 		forward = false;
 		reply = true;
 		target_metric = 0;
+
+		if (SN_GT(target_sn, ifmsh->sn))
+			ifmsh->sn = target_sn;
+
 		if (time_after(jiffies, ifmsh->last_sn_update +
 					net_traversal_jiffies(sdata)) ||
 		    time_before(jiffies, ifmsh->last_sn_update)) {
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH v2] net: ethernet: Convert to using %pOFn instead of device_node.name
From: Sean Wang @ 2018-08-29  2:06 UTC (permalink / raw)
  To: Rob Herring
  Cc: linux-kernel, David S. Miller, Yisen Zhuang, Salil Mehta,
	Sebastian Hesselbarth, Felix Fietkau, John Crispin, Nelson Chang,
	Matthias Brugger, Wingman Kwok, Murali Karicheri, netdev
In-Reply-To: <20180828154433.5693-4-robh@kernel.org>

On Tue, 2018-08-28 at 10:44 -0500, Rob Herring wrote:
> In preparation to remove the node name pointer from struct device_node,
> convert printf users to use the %pOFn format specifier.
> 
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Yisen Zhuang <yisen.zhuang@huawei.com>
> Cc: Salil Mehta <salil.mehta@huawei.com>
> Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
> Cc: Felix Fietkau <nbd@openwrt.org>
> Cc: John Crispin <john@phrozen.org>
> Cc: Sean Wang <sean.wang@mediatek.com>
> Cc: Nelson Chang <nelson.chang@mediatek.com>
> Cc: Matthias Brugger <matthias.bgg@gmail.com>
> Cc: Wingman Kwok <w-kwok2@ti.com>
> Cc: Murali Karicheri <m-karicheri2@ti.com>
> Cc: netdev@vger.kernel.org
> 
> Signed-off-by: Rob Herring <robh@kernel.org>
> ---
> v2:
> - fix missing brackets in netcp
> 
>  drivers/net/ethernet/freescale/fsl_pq_mdio.c  |  4 +-
>  .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c |  8 ++--
>  drivers/net/ethernet/marvell/mv643xx_eth.c    |  6 +--
>  drivers/net/ethernet/mediatek/mtk_eth_soc.c   |  2 +-

for mtk-eth

Acked-by: Sean Wang <sean.wang@mediatek.com>

>  drivers/net/ethernet/sun/sunhme.c             |  2 +-
>  drivers/net/ethernet/ti/netcp_core.c          | 22 ++++------
>  drivers/net/ethernet/ti/netcp_ethss.c         | 42 +++++++++----------
>  7 files changed, 40 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
> index ac2c3f6a12bc..82722d05fedb 100644
> --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
> +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
> @@ -446,8 +446,8 @@ static int fsl_pq_mdio_probe(struct platform_device *pdev)
>  		goto error;
>  	}
> 
> -	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s@%llx", np->name,
> -		(unsigned long long)res.start);
> +	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%pOFn@%llx", np,
> +		 (unsigned long long)res.start);
> 
>  	priv->map = of_iomap(np, 0);
>  	if (!priv->map) {
> diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
> index 1c2326bd76e2..6521d8d53745 100644
> --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
> +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
> @@ -807,8 +807,8 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb)
>  			 */
>  			put_device(&mac_cb->phy_dev->mdio.dev);
> 
> -			dev_dbg(mac_cb->dev, "mac%d phy_node: %s\n",
> -				mac_cb->mac_id, np->name);
> +			dev_dbg(mac_cb->dev, "mac%d phy_node: %pOFn\n",
> +				mac_cb->mac_id, np);
>  		}
>  		of_node_put(np);
> 
> @@ -825,8 +825,8 @@ static int hns_mac_get_info(struct hns_mac_cb *mac_cb)
>  			 * if the phy_dev is found
>  			 */
>  			put_device(&mac_cb->phy_dev->mdio.dev);
> -			dev_dbg(mac_cb->dev, "mac%d phy_node: %s\n",
> -				mac_cb->mac_id, np->name);
> +			dev_dbg(mac_cb->dev, "mac%d phy_node: %pOFn\n",
> +				mac_cb->mac_id, np);
>  		}
>  		of_node_put(np);
> 
> diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
> index 62f204f32316..1e9bcbdc6a90 100644
> --- a/drivers/net/ethernet/marvell/mv643xx_eth.c
> +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
> @@ -2733,17 +2733,17 @@ static int mv643xx_eth_shared_of_add_port(struct platform_device *pdev,
> 
>  	memset(&res, 0, sizeof(res));
>  	if (of_irq_to_resource(pnp, 0, &res) <= 0) {
> -		dev_err(&pdev->dev, "missing interrupt on %s\n", pnp->name);
> +		dev_err(&pdev->dev, "missing interrupt on %pOFn\n", pnp);
>  		return -EINVAL;
>  	}
> 
>  	if (of_property_read_u32(pnp, "reg", &ppd.port_number)) {
> -		dev_err(&pdev->dev, "missing reg property on %s\n", pnp->name);
> +		dev_err(&pdev->dev, "missing reg property on %pOFn\n", pnp);
>  		return -EINVAL;
>  	}
> 
>  	if (ppd.port_number >= 3) {
> -		dev_err(&pdev->dev, "invalid reg property on %s\n", pnp->name);
> +		dev_err(&pdev->dev, "invalid reg property on %pOFn\n", pnp);
>  		return -EINVAL;
>  	}
> 
> diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> index 6e6abdc399de..b44bcfd85b05 100644
> --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
> @@ -405,7 +405,7 @@ static int mtk_mdio_init(struct mtk_eth *eth)
>  	eth->mii_bus->priv = eth;
>  	eth->mii_bus->parent = eth->dev;
> 
> -	snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%s", mii_np->name);
> +	snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%pOFn", mii_np);
>  	ret = of_mdiobus_register(eth->mii_bus, mii_np);
> 
>  err_put_node:
> diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
> index 06da2f59fcbf..863fd602fd33 100644
> --- a/drivers/net/ethernet/sun/sunhme.c
> +++ b/drivers/net/ethernet/sun/sunhme.c
> @@ -2999,7 +2999,7 @@ static int happy_meal_pci_probe(struct pci_dev *pdev,
>  	/* Now make sure pci_dev cookie is there. */
>  #ifdef CONFIG_SPARC
>  	dp = pci_device_to_OF_node(pdev);
> -	strcpy(prom_name, dp->name);
> +	snprintf(prom_name, sizeof(prom_name), "%pOFn", dp);
>  #else
>  	if (is_quattro_p(pdev))
>  		strcpy(prom_name, "SUNW,qfe");
> diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
> index a1d335a3c5e4..1f612268c998 100644
> --- a/drivers/net/ethernet/ti/netcp_core.c
> +++ b/drivers/net/ethernet/ti/netcp_core.c
> @@ -225,17 +225,6 @@ static int emac_arch_get_mac_addr(char *x, void __iomem *efuse_mac, u32 swap)
>  	return 0;
>  }
> 
> -static const char *netcp_node_name(struct device_node *node)
> -{
> -	const char *name;
> -
> -	if (of_property_read_string(node, "label", &name) < 0)
> -		name = node->name;
> -	if (!name)
> -		name = "unknown";
> -	return name;
> -}
> -
>  /* Module management routines */
>  static int netcp_register_interface(struct netcp_intf *netcp)
>  {
> @@ -267,8 +256,13 @@ static int netcp_module_probe(struct netcp_device *netcp_device,
>  	}
> 
>  	for_each_available_child_of_node(devices, child) {
> -		const char *name = netcp_node_name(child);
> +		const char *name;
> +		char node_name[32];
> 
> +		if (of_property_read_string(node, "label", &name) < 0) {
> +			snprintf(node_name, sizeof(node_name), "%pOFn", child);
> +			name = node_name;
> +		}
>  		if (!strcasecmp(module->name, name))
>  			break;
>  	}
> @@ -2209,8 +2203,8 @@ static int netcp_probe(struct platform_device *pdev)
>  	for_each_available_child_of_node(interfaces, child) {
>  		ret = netcp_create_interface(netcp_device, child);
>  		if (ret) {
> -			dev_err(dev, "could not create interface(%s)\n",
> -				child->name);
> +			dev_err(dev, "could not create interface(%pOFn)\n",
> +				child);
>  			goto probe_quit_interface;
>  		}
>  	}
> diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
> index 72b98e27c992..0397ccb6597e 100644
> --- a/drivers/net/ethernet/ti/netcp_ethss.c
> +++ b/drivers/net/ethernet/ti/netcp_ethss.c
> @@ -3137,15 +3137,15 @@ static void init_secondary_ports(struct gbe_priv *gbe_dev,
>  	for_each_child_of_node(node, port) {
>  		slave = devm_kzalloc(dev, sizeof(*slave), GFP_KERNEL);
>  		if (!slave) {
> -			dev_err(dev, "memory alloc failed for secondary port(%s), skipping...\n",
> -				port->name);
> +			dev_err(dev, "memory alloc failed for secondary port(%pOFn), skipping...\n",
> +				port);
>  			continue;
>  		}
> 
>  		if (init_slave(gbe_dev, slave, port)) {
>  			dev_err(dev,
> -				"Failed to initialize secondary port(%s), skipping...\n",
> -				port->name);
> +				"Failed to initialize secondary port(%pOFn), skipping...\n",
> +				port);
>  			devm_kfree(dev, slave);
>  			continue;
>  		}
> @@ -3239,8 +3239,8 @@ static int set_xgbe_ethss10_priv(struct gbe_priv *gbe_dev,
>  	ret = of_address_to_resource(node, XGBE_SS_REG_INDEX, &res);
>  	if (ret) {
>  		dev_err(gbe_dev->dev,
> -			"Can't xlate xgbe of node(%s) ss address at %d\n",
> -			node->name, XGBE_SS_REG_INDEX);
> +			"Can't xlate xgbe of node(%pOFn) ss address at %d\n",
> +			node, XGBE_SS_REG_INDEX);
>  		return ret;
>  	}
> 
> @@ -3254,8 +3254,8 @@ static int set_xgbe_ethss10_priv(struct gbe_priv *gbe_dev,
>  	ret = of_address_to_resource(node, XGBE_SM_REG_INDEX, &res);
>  	if (ret) {
>  		dev_err(gbe_dev->dev,
> -			"Can't xlate xgbe of node(%s) sm address at %d\n",
> -			node->name, XGBE_SM_REG_INDEX);
> +			"Can't xlate xgbe of node(%pOFn) sm address at %d\n",
> +			node, XGBE_SM_REG_INDEX);
>  		return ret;
>  	}
> 
> @@ -3269,8 +3269,8 @@ static int set_xgbe_ethss10_priv(struct gbe_priv *gbe_dev,
>  	ret = of_address_to_resource(node, XGBE_SERDES_REG_INDEX, &res);
>  	if (ret) {
>  		dev_err(gbe_dev->dev,
> -			"Can't xlate xgbe serdes of node(%s) address at %d\n",
> -			node->name, XGBE_SERDES_REG_INDEX);
> +			"Can't xlate xgbe serdes of node(%pOFn) address at %d\n",
> +			node, XGBE_SERDES_REG_INDEX);
>  		return ret;
>  	}
> 
> @@ -3347,8 +3347,8 @@ static int get_gbe_resource_version(struct gbe_priv *gbe_dev,
>  	ret = of_address_to_resource(node, GBE_SS_REG_INDEX, &res);
>  	if (ret) {
>  		dev_err(gbe_dev->dev,
> -			"Can't translate of node(%s) of gbe ss address at %d\n",
> -			node->name, GBE_SS_REG_INDEX);
> +			"Can't translate of node(%pOFn) of gbe ss address at %d\n",
> +			node, GBE_SS_REG_INDEX);
>  		return ret;
>  	}
> 
> @@ -3372,8 +3372,8 @@ static int set_gbe_ethss14_priv(struct gbe_priv *gbe_dev,
>  	ret = of_address_to_resource(node, GBE_SGMII34_REG_INDEX, &res);
>  	if (ret) {
>  		dev_err(gbe_dev->dev,
> -			"Can't translate of gbe node(%s) address at index %d\n",
> -			node->name, GBE_SGMII34_REG_INDEX);
> +			"Can't translate of gbe node(%pOFn) address at index %d\n",
> +			node, GBE_SGMII34_REG_INDEX);
>  		return ret;
>  	}
> 
> @@ -3388,8 +3388,8 @@ static int set_gbe_ethss14_priv(struct gbe_priv *gbe_dev,
>  	ret = of_address_to_resource(node, GBE_SM_REG_INDEX, &res);
>  	if (ret) {
>  		dev_err(gbe_dev->dev,
> -			"Can't translate of gbe node(%s) address at index %d\n",
> -			node->name, GBE_SM_REG_INDEX);
> +			"Can't translate of gbe node(%pOFn) address at index %d\n",
> +			node, GBE_SM_REG_INDEX);
>  		return ret;
>  	}
> 
> @@ -3498,8 +3498,8 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev,
>  	ret = of_address_to_resource(node, GBENU_SM_REG_INDEX, &res);
>  	if (ret) {
>  		dev_err(gbe_dev->dev,
> -			"Can't translate of gbenu node(%s) addr at index %d\n",
> -			node->name, GBENU_SM_REG_INDEX);
> +			"Can't translate of gbenu node(%pOFn) addr at index %d\n",
> +			node, GBENU_SM_REG_INDEX);
>  		return ret;
>  	}
> 
> @@ -3642,7 +3642,7 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
>  		ret = netcp_xgbe_serdes_init(gbe_dev->xgbe_serdes_regs,
>  					     gbe_dev->ss_regs);
>  	} else {
> -		dev_err(dev, "unknown GBE node(%s)\n", node->name);
> +		dev_err(dev, "unknown GBE node(%pOFn)\n", node);
>  		ret = -ENODEV;
>  	}
> 
> @@ -3667,8 +3667,8 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
>  	for_each_child_of_node(interfaces, interface) {
>  		ret = of_property_read_u32(interface, "slave-port", &slave_num);
>  		if (ret) {
> -			dev_err(dev, "missing slave-port parameter, skipping interface configuration for %s\n",
> -				interface->name);
> +			dev_err(dev, "missing slave-port parameter, skipping interface configuration for %pOFn\n",
> +				interface);
>  			continue;
>  		}
>  		gbe_dev->num_slaves++;
> --
> 2.17.1

^ permalink raw reply

* Waiting for the reply
From: Ruby @ 2018-08-28 13:42 UTC (permalink / raw)
  To: netdev

We provide photoshop services to some of the companies from around the
world.
We have worked on tons of images ever since our team establishment in 2009.

Many online retail companies use our services for retouching electronics,
jewelry, apparels, furniture
etc. by getting the images of their products enhanced.

Here are the details of what we provide:
Clipping path;
Deep etch process
Image masking
Remove background
Portrait retouching
Jewelry retouching
Fashion retouching

Please reply back for further info.
We can provide testing for your photos if needed.

Thanks,
Ruby

^ permalink raw reply

* Re: [Patch iproute2] ss: add UNIX_DIAG_VFS and UNIX_DIAG_ICONS for unix sockets
From: Cong Wang @ 2018-08-28 23:16 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Linux Kernel Network Developers
In-Reply-To: <20180827152725.479342e7@shemminger-XPS-13-9360>

On Mon, Aug 27, 2018 at 3:27 PM Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> On Mon, 27 Aug 2018 14:46:52 -0700
> Cong Wang <xiyou.wangcong@gmail.com> wrote:
>
> > UNIX_DIAG_VFS and UNIX_DIAG_ICONS are never used by ss,
> > make them available in ss -e output.
> >
> > Cc: Stephen Hemminger <stephen@networkplumber.org>
> > Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
> > ---
> >  misc/ss.c | 25 +++++++++++++++++++++++++
> >  1 file changed, 25 insertions(+)
> >
> > diff --git a/misc/ss.c b/misc/ss.c
> > index 41e7762b..d28bc1ec 100644
> > --- a/misc/ss.c
> > +++ b/misc/ss.c
> > @@ -16,6 +16,7 @@
> >  #include <sys/ioctl.h>
> >  #include <sys/socket.h>
> >  #include <sys/uio.h>
> > +#include <sys/sysmacros.h>
>
> Why is this included, it isn't on my system.

It is for major() and minor().

$ find /usr/include/ -name sysmacros.h
/usr/include/bits/sysmacros.h
/usr/include/sys/sysmacros.h
$ rpm -qf /usr/include/sys/sysmacros.h
glibc-headers-2.26-28.fc27.x86_64

So you are not using glibc? Or iproute2 should be built with non-glibc?

>
> >  #include <netinet/in.h>
> >  #include <string.h>
> >  #include <errno.h>
> > @@ -3604,6 +3605,28 @@ static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh,
> >                       out(" %c-%c",
> >                           mask & 1 ? '-' : '<', mask & 2 ? '-' : '>');
> >               }
> > +             if (tb[UNIX_DIAG_VFS]) {
> > +                     struct unix_diag_vfs uv;
> > +
> > +                     memcpy(&uv, RTA_DATA(tb[UNIX_DIAG_VFS]), sizeof(uv));
>
> Copy here is unnecessary, you can just do:
>                         const struct unix_diag_vfs *uv
>                                 = RTA_DATA(tb[UNIX_DIAG_VFS]);


Oh, good point!


>
> > +                     out(" ino:%u dev:%u/%u", uv.udiag_vfs_ino, major(uv.udiag_vfs_dev),
> > +                                              minor(uv.udiag_vfs_dev));
> > +             }
> > +             if (tb[UNIX_DIAG_ICONS]) {
> > +                     int len = RTA_PAYLOAD(tb[UNIX_DIAG_ICONS]);
> > +                     __u32 *peers = malloc(len);
> > +                     int i;
>
> Ditto, allocation and copy are not necessary, just reference the data.
>

Sure, will update.

Thanks.

^ permalink raw reply

* [PATCH net-next,v5] net/tls: Calculate nsg for zerocopy path without skb_cow_data.
From: Doron Roberts-Kedes @ 2018-08-28 23:33 UTC (permalink / raw)
  To: David S . Miller
  Cc: Aviad Yehezkel, Vakul Garg, Dave Watson, Boris Pismenny, netdev,
	Doron Roberts-Kedes

decrypt_skb fails if the number of sg elements required to map it
is greater than MAX_SKB_FRAGS. nsg must always be calculated, but
skb_cow_data adds unnecessary memcpy's for the zerocopy case.

The new function skb_nsg calculates the number of scatterlist elements
required to map the skb without the extra overhead of skb_cow_data.
This patch reduces memcpy by 50% on my encrypted NBD benchmarks.

Reported-by: Vakul Garg <Vakul.garg@nxp.com>
Reviewed-by: Vakul Garg <Vakul.garg@nxp.com>
Tested-by: Vakul Garg <Vakul.garg@nxp.com>
Signed-off-by: Doron Roberts-Kedes <doronrk@fb.com>
---
 net/tls/tls_sw.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 52fbe727d7c1..4ba62cd00a94 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -43,6 +43,82 @@
 
 #define MAX_IV_SIZE	TLS_CIPHER_AES_GCM_128_IV_SIZE
 
+static int __skb_nsg(struct sk_buff *skb, int offset, int len,
+                     unsigned int recursion_level)
+{
+        int start = skb_headlen(skb);
+        int i, chunk = start - offset;
+        struct sk_buff *frag_iter;
+        int elt = 0;
+
+        if (unlikely(recursion_level >= 24))
+                return -EMSGSIZE;
+
+        if (chunk > 0) {
+                if (chunk > len)
+                        chunk = len;
+                elt++;
+                len -= chunk;
+                if (len == 0)
+                        return elt;
+                offset += chunk;
+        }
+
+        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                int end;
+
+                WARN_ON(start > offset + len);
+
+                end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]);
+                chunk = end - offset;
+                if (chunk > 0) {
+                        if (chunk > len)
+                                chunk = len;
+                        elt++;
+                        len -= chunk;
+                        if (len == 0)
+                                return elt;
+                        offset += chunk;
+                }
+                start = end;
+        }
+
+        if (unlikely(skb_has_frag_list(skb))) {
+                skb_walk_frags(skb, frag_iter) {
+                        int end, ret;
+
+                        WARN_ON(start > offset + len);
+
+                        end = start + frag_iter->len;
+                        chunk = end - offset;
+                        if (chunk > 0) {
+                                if (chunk > len)
+                                        chunk = len;
+                                ret = __skb_nsg(frag_iter, offset - start, chunk,
+                                                recursion_level + 1);
+                                if (unlikely(ret < 0))
+                                        return ret;
+                                elt += ret;
+                                len -= chunk;
+                                if (len == 0)
+                                        return elt;
+                                offset += chunk;
+                        }
+                        start = end;
+                }
+        }
+        BUG_ON(len);
+        return elt;
+}
+
+/* Return the number of scatterlist elements required to completely map the
+ * skb, or -EMSGSIZE if the recursion depth is exceeded.
+ */
+static int skb_nsg(struct sk_buff *skb, int offset, int len)
+{
+        return __skb_nsg(skb, offset, len, 0);
+}
+
 static int tls_do_decryption(struct sock *sk,
 			     struct scatterlist *sgin,
 			     struct scatterlist *sgout,
@@ -678,12 +754,14 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 			n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1;
 		else
 			n_sgout = sg_nents(out_sg);
+		n_sgin = skb_nsg(skb, rxm->offset + tls_ctx->rx.prepend_size,
+				 rxm->full_len - tls_ctx->rx.prepend_size);
 	} else {
 		n_sgout = 0;
 		*zc = false;
+		n_sgin = skb_cow_data(skb, 0, &unused);
 	}
 
-	n_sgin = skb_cow_data(skb, 0, &unused);
 	if (n_sgin < 1)
 		return -EBADMSG;
 
-- 
2.17.1

^ permalink raw reply related

* Re: [PATCH net-next 00/15] nfp: add NFP5000 support
From: David Miller @ 2018-08-29  0:01 UTC (permalink / raw)
  To: jakub.kicinski; +Cc: netdev, oss-drivers
In-Reply-To: <20180828202047.1305-1-jakub.kicinski@netronome.com>

From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 28 Aug 2018 13:20:32 -0700

> This series broadly speaking adds support for NFP5000 and
> related products.
 ...

Series applied, thanks Jakub.

^ permalink raw reply

* Re: [net-next 00/15][pull request] 100GbE Intel Wired LAN Driver Updates 2018-08-28
From: David Miller @ 2018-08-29  0:01 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, nhorman, sassmann, jogreene
In-Reply-To: <20180828190413.29869-1-jeffrey.t.kirsher@intel.com>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Tue, 28 Aug 2018 12:03:58 -0700

> This series contains new features and implementation updates for the
> ice driver.
 ...

Pulled.

^ permalink raw reply

* Re: [net-next 00/13][pull request] 10GbE Intel Wired LAN Driver Updates 2018-08-28
From: David Miller @ 2018-08-29  0:01 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, nhorman, sassmann, jogreene
In-Reply-To: <20180828213558.19273-1-jeffrey.t.kirsher@intel.com>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Tue, 28 Aug 2018 14:35:44 -0700

> This series contains updates to ixgbe and ixgbevf only.
 ...

Pulled.

^ permalink raw reply

* Re: [V9fs-developer] [PATCH v2 2/2] 9p: Add refcount to p9_req_t
From: Dominique Martinet @ 2018-08-29  4:43 UTC (permalink / raw)
  To: Tomas Bortoli
  Cc: lucho, Dominique Martinet, ericvh, netdev, linux-kernel,
	syzkaller, v9fs-developer, rminnich, davem
In-Reply-To: <20180827230954.GA21513@nautica>

Dominique Martinet wrote on Tue, Aug 28, 2018:
> I think I've found why (see below), so I'll push a fixed version after
> some more testing and another thorough read -- at some point today, but
> this hasn't been 'approved' explicitely so please review! :)

While the issue I pointed at was real, it wasn't what was causing the
refcount leak I was observing -- the problem is that we didn't drop a
ref when the request was successfully cancelled (e.g. the reply to the
flush came and the original request didn't get replied to)

The reason for this was that there were multiple versions of the patch
which alternated between doing the put in client.c after the cancelled
callback inconditionally, and doing the put in each transport's
cancelled() function, but virtio does not have this callback so that
didn't get added in the final version (codeveloping is hard); so I've
added an else() close to just issue a put if there is no callback.

(In the end, it felt better to have the req_put in the transport because
trans_fd is making refcounting difficult with its list handling, and
separating the put from the list removal would be more confusing than is
gained by sharing code)

Anyway, that's starting to be quite different from the v2 so I'll send a
v3 keeping Tomas as the author -- please check my edits are alright with
you, Tomas.

Meanwhile I'll keep running tests, I'm now confident about virtio but
want to spend more time on other transports again, so delaying the push
to linux-next for a few more days...

-- 
Dominique

^ permalink raw reply

* Re: [PATCH v2] sh_eth: Add R7S9210 support
From: Rob Herring @ 2018-08-29  0:53 UTC (permalink / raw)
  To: Chris Brandt
  Cc: Sergei Shtylyov, David S . Miller, Mark Rutland, netdev,
	devicetree, linux-renesas-soc, Simon Horman
In-Reply-To: <20180827174202.80750-1-chris.brandt@renesas.com>

On Mon, Aug 27, 2018 at 12:42:02PM -0500, Chris Brandt wrote:
> Add support for the R7S9210 which is part of the RZ/A2 series.
> 
> Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
> ---
> v2:
>  * Use sh_eth_offset_fast_sh4 instead of sh_eth_offset_fast_rza2
>  * Use sh_eth_set_rate_rcar instead of sh_eth_set_rate_r7s9210()
>  * Removed enum SH_ETH_REG_FAST_RZA2
> ---
>  Documentation/devicetree/bindings/net/sh_eth.txt |  1 +

Acked-by: Rob Herring <robh@kernel.org>

>  drivers/net/ethernet/renesas/sh_eth.c            | 36 ++++++++++++++++++++++++
>  2 files changed, 37 insertions(+)

^ permalink raw reply

* [PATCH v3] 9p: Add refcount to p9_req_t
From: Dominique Martinet @ 2018-08-29  4:59 UTC (permalink / raw)
  To: Tomas Bortoli, Eric Van Hensbergen, Latchesar Ionkov
  Cc: v9fs-developer, netdev, linux-kernel, syzkaller,
	Dominique Martinet

From: Tomas Bortoli <tomasbortoli@gmail.com>

To avoid use-after-free(s), use a refcount to keep track of the
usable references to any instantiated struct p9_req_t.

This commit adds p9_req_put(), p9_req_get() and p9_req_try_get() as
wrappers to kref_put(), kref_get() and kref_get_unless_zero().
These are used by the client and the transports to keep track of
valid requests' references.

p9_free_req() is added back and used as callback by kref_put().

Add SLAB_TYPESAFE_BY_RCU as it ensures that the memory freed by
kmem_cache_free() will not be reused for another type until the rcu
synchronisation period is over, so an address gotten under rcu read
lock is safe to inc_ref() without corrupting random memory while
the lock is held.

Co-developed-by: Dominique Martinet <dominique.martinet@cea.fr>
Signed-off-by: Tomas Bortoli <tomasbortoli@gmail.com>
Reported-by: syzbot+467050c1ce275af2a5b8@syzkaller.appspotmail.com
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
---
 include/net/9p/client.h | 14 ++++++++++
 net/9p/client.c         | 59 ++++++++++++++++++++++++++++++++++++-----
 net/9p/trans_fd.c       | 11 +++++++-
 net/9p/trans_rdma.c     |  2 ++
 net/9p/trans_virtio.c   | 18 ++++++++++---
 5 files changed, 92 insertions(+), 12 deletions(-)

diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 735f3979d559..947a570307a6 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -94,6 +94,7 @@ enum p9_req_status_t {
 struct p9_req_t {
 	int status;
 	int t_err;
+	struct kref refcount;
 	wait_queue_head_t wq;
 	struct p9_fcall tc;
 	struct p9_fcall rc;
@@ -233,6 +234,19 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status);
 int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *fl);
 void p9_fcall_fini(struct p9_fcall *fc);
 struct p9_req_t *p9_tag_lookup(struct p9_client *, u16);
+
+static inline void p9_req_get(struct p9_req_t *r)
+{
+	kref_get(&r->refcount);
+}
+
+static inline int p9_req_try_get(struct p9_req_t *r)
+{
+	return kref_get_unless_zero(&r->refcount);
+}
+
+int p9_req_put(struct p9_req_t *r);
+
 void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status);
 
 int p9_parse_header(struct p9_fcall *, int32_t *, int8_t *, int16_t *, int);
diff --git a/net/9p/client.c b/net/9p/client.c
index 7942c0bfcc5b..4cf11adad819 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -310,6 +310,18 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
 	if (tag < 0)
 		goto free;
 
+	/* Init ref to two because in the general case there is one ref
+	 * that is put asynchronously by a writer thread, one ref
+	 * temporarily given by p9_tag_lookup and put by p9_client_cb
+	 * in the recv thread, and one ref put by p9_tag_remove in the
+	 * main thread. The only exception is virtio that does not use
+	 * p9_tag_lookup but does not have a writer thread either
+	 * (the write happens synchronously in the request/zc_request
+	 * callback), so p9_client_cb eats the second ref there
+	 * as the pointer is duplicated directly by virtqueue_add_sgs()
+	 */
+	refcount_set(&req->refcount.refcount, 2);
+
 	return req;
 
 free:
@@ -333,10 +345,21 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)
 	struct p9_req_t *req;
 
 	rcu_read_lock();
+again:
 	req = idr_find(&c->reqs, tag);
-	/* There's no refcount on the req; a malicious server could cause
-	 * us to dereference a NULL pointer
-	 */
+	if (req) {
+		/* We have to be careful with the req found under rcu_read_lock
+		 * Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the
+		 * ref again without corrupting other data, then check again
+		 * that the tag matches once we have the ref
+		 */
+		if (!p9_req_try_get(req))
+			goto again;
+		if (req->tc.tag != tag) {
+			p9_req_put(req);
+			goto again;
+		}
+	}
 	rcu_read_unlock();
 
 	return req;
@@ -350,7 +373,7 @@ EXPORT_SYMBOL(p9_tag_lookup);
  *
  * Context: Any context.
  */
-static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
+static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
 {
 	unsigned long flags;
 	u16 tag = r->tc.tag;
@@ -359,11 +382,23 @@ static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r)
 	spin_lock_irqsave(&c->lock, flags);
 	idr_remove(&c->reqs, tag);
 	spin_unlock_irqrestore(&c->lock, flags);
+	return p9_req_put(r);
+}
+
+static void p9_req_free(struct kref *ref)
+{
+	struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount);
 	p9_fcall_fini(&r->tc);
 	p9_fcall_fini(&r->rc);
 	kmem_cache_free(p9_req_cache, r);
 }
 
+int p9_req_put(struct p9_req_t *r)
+{
+	return kref_put(&r->refcount, p9_req_free);
+}
+EXPORT_SYMBOL(p9_req_put);
+
 /**
  * p9_tag_cleanup - cleans up tags structure and reclaims resources
  * @c:  v9fs client struct
@@ -379,7 +414,9 @@ static void p9_tag_cleanup(struct p9_client *c)
 	rcu_read_lock();
 	idr_for_each_entry(&c->reqs, req, id) {
 		pr_info("Tag %d still in use\n", id);
-		p9_tag_remove(c, req);
+		if (p9_tag_remove(c, req) == 0)
+			pr_warn("Packet with tag %d has still references",
+				req->tc.tag);
 	}
 	rcu_read_unlock();
 }
@@ -403,6 +440,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
 
 	wake_up(&req->wq);
 	p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
+	p9_req_put(req);
 }
 EXPORT_SYMBOL(p9_client_cb);
 
@@ -643,9 +681,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
 	 * if we haven't received a response for oldreq,
 	 * remove it from the list
 	 */
-	if (oldreq->status == REQ_STATUS_SENT)
+	if (oldreq->status == REQ_STATUS_SENT) {
 		if (c->trans_mod->cancelled)
 			c->trans_mod->cancelled(c, oldreq);
+		else
+			p9_req_put(oldreq);
+	}
 
 	p9_tag_remove(c, req);
 	return 0;
@@ -682,6 +723,8 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
 	return req;
 reterr:
 	p9_tag_remove(c, req);
+	/* We have to put also the 2nd reference as it won't be used */
+	p9_req_put(req);
 	return ERR_PTR(err);
 }
 
@@ -716,6 +759,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 
 	err = c->trans_mod->request(c, req);
 	if (err < 0) {
+		/* write won't happen */
+		p9_req_put(req);
 		if (err != -ERESTARTSYS && err != -EFAULT)
 			c->status = Disconnected;
 		goto recalc_sigpending;
@@ -2241,7 +2286,7 @@ EXPORT_SYMBOL(p9_client_readlink);
 
 int __init p9_client_init(void)
 {
-	p9_req_cache = KMEM_CACHE(p9_req_t, 0);
+	p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU);
 	return p9_req_cache ? 0 : -ENOMEM;
 }
 
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 20f46f13fe83..686e24e355d0 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -132,6 +132,7 @@ struct p9_conn {
 	struct list_head req_list;
 	struct list_head unsent_req_list;
 	struct p9_req_t *req;
+	struct p9_req_t *wreq;
 	char tmp_buf[7];
 	struct p9_fcall rc;
 	int wpos;
@@ -383,6 +384,7 @@ static void p9_read_work(struct work_struct *work)
 		m->rc.sdata = NULL;
 		m->rc.offset = 0;
 		m->rc.capacity = 0;
+		p9_req_put(m->req);
 		m->req = NULL;
 	}
 
@@ -472,6 +474,8 @@ static void p9_write_work(struct work_struct *work)
 		m->wbuf = req->tc.sdata;
 		m->wsize = req->tc.size;
 		m->wpos = 0;
+		p9_req_get(req);
+		m->wreq = req;
 		spin_unlock(&m->client->lock);
 	}
 
@@ -492,8 +496,11 @@ static void p9_write_work(struct work_struct *work)
 	}
 
 	m->wpos += err;
-	if (m->wpos == m->wsize)
+	if (m->wpos == m->wsize) {
 		m->wpos = m->wsize = 0;
+		p9_req_put(m->wreq);
+		m->wreq = NULL;
+	}
 
 end_clear:
 	clear_bit(Wworksched, &m->wsched);
@@ -694,6 +701,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 	if (req->status == REQ_STATUS_UNSENT) {
 		list_del(&req->req_list);
 		req->status = REQ_STATUS_FLSHD;
+		p9_req_put(req);
 		ret = 0;
 	}
 	spin_unlock(&client->lock);
@@ -711,6 +719,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
 	spin_lock(&client->lock);
 	list_del(&req->req_list);
 	spin_unlock(&client->lock);
+	p9_req_put(req);
 
 	return 0;
 }
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 5b0cda1aaa7a..9ea9e2eb318e 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -365,6 +365,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc)
 			    c->busa, c->req->tc.size,
 			    DMA_TO_DEVICE);
 	up(&rdma->sq_sem);
+	p9_req_put(c->req);
 	kfree(c);
 }
 
@@ -611,6 +612,7 @@ static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
 {
 	struct p9_trans_rdma *rdma = client->trans;
 	atomic_inc(&rdma->excess_rc);
+	p9_req_put(req);
 	return 0;
 }
 
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 3dd6ce1c0f2d..c7aaea74cdc1 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -404,6 +404,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 	struct scatterlist *sgs[4];
 	size_t offs;
 	int need_drop = 0;
+	int kicked = 0;
 
 	p9_debug(P9_DEBUG_TRANS, "virtio request\n");
 
@@ -411,8 +412,10 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 		__le32 sz;
 		int n = p9_get_mapped_pages(chan, &out_pages, uodata,
 					    outlen, &offs, &need_drop);
-		if (n < 0)
-			return n;
+		if (n < 0) {
+			err = n;
+			goto err_out;
+		}
 		out_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
 		if (n != outlen) {
 			__le32 v = cpu_to_le32(n);
@@ -428,8 +431,10 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 	} else if (uidata) {
 		int n = p9_get_mapped_pages(chan, &in_pages, uidata,
 					    inlen, &offs, &need_drop);
-		if (n < 0)
-			return n;
+		if (n < 0) {
+			err = n;
+			goto err_out;
+		}
 		in_nr_pages = DIV_ROUND_UP(n + offs, PAGE_SIZE);
 		if (n != inlen) {
 			__le32 v = cpu_to_le32(n);
@@ -498,6 +503,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 	}
 	virtqueue_kick(chan->vq);
 	spin_unlock_irqrestore(&chan->lock, flags);
+	kicked = 1;
 	p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
 	err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
 	/*
@@ -518,6 +524,10 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 	}
 	kvfree(in_pages);
 	kvfree(out_pages);
+	if (!kicked) {
+		/* reply won't come */
+		p9_req_put(req);
+	}
 	return err;
 }
 
-- 
2.17.1

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox