Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-stable 19/24] hv_netvsc: common detach logic
From: Stephen Hemminger @ 2018-05-14 22:32 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger, Stephen Hemminger
In-Reply-To: <20180514223223.25433-1-sthemmin@microsoft.com>

From: Stephen Hemminger <stephen@networkplumber.org>

commit 7b2ee50c0cd513a176a26a71f2989facdd75bfea upstream

Make common function for detaching internals of device
during changes to MTU and RSS. Make sure no more packets
are transmitted and all packets have been received before
doing device teardown.

Change the wait logic to be common and use usleep_range().

Changes transmit enabling logic so that transmit queues are disabled
during the period when lower device is being changed. And enabled
only after sub channels are setup. This avoids issue where it could
be that a packet was being sent while subchannel was not initialized.

Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug")
Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/hyperv_net.h   |   1 -
 drivers/net/hyperv/netvsc.c       |  19 +-
 drivers/net/hyperv/netvsc_drv.c   | 278 +++++++++++++++++-------------
 drivers/net/hyperv/rndis_filter.c |  15 +-
 4 files changed, 173 insertions(+), 140 deletions(-)

diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index fd51a329e36e..01017dd88802 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -208,7 +208,6 @@ void netvsc_channel_cb(void *context);
 int netvsc_poll(struct napi_struct *napi, int budget);
 
 void rndis_set_subchannel(struct work_struct *w);
-bool rndis_filter_opened(const struct netvsc_device *nvdev);
 int rndis_filter_open(struct netvsc_device *nvdev);
 int rndis_filter_close(struct netvsc_device *nvdev);
 struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 3f3c6489ade2..0c548748fba8 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -554,8 +554,6 @@ void netvsc_device_remove(struct hv_device *device)
 		= rtnl_dereference(net_device_ctx->nvdev);
 	int i;
 
-	cancel_work_sync(&net_device->subchan_work);
-
 	netvsc_revoke_buf(device, net_device);
 
 	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
@@ -644,13 +642,18 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
 	queue_sends =
 		atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
 
-	if (net_device->destroy && queue_sends == 0)
-		wake_up(&net_device->wait_drain);
+	if (unlikely(net_device->destroy)) {
+		if (queue_sends == 0)
+			wake_up(&net_device->wait_drain);
+	} else {
+		struct netdev_queue *txq = netdev_get_tx_queue(ndev, q_idx);
 
-	if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
-	    (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
-	     queue_sends < 1))
-		netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
+		if (netif_tx_queue_stopped(txq) &&
+		    (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
+		     queue_sends < 1)) {
+			netif_tx_wake_queue(txq);
+		}
+	}
 }
 
 static void netvsc_send_completion(struct netvsc_device *net_device,
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index daa450e4e2a4..be4b15b63355 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -45,7 +45,10 @@
 
 #include "hyperv_net.h"
 
-#define RING_SIZE_MIN		64
+#define RING_SIZE_MIN	64
+#define RETRY_US_LO	5000
+#define RETRY_US_HI	10000
+#define RETRY_MAX	2000	/* >10 sec */
 
 #define LINKCHANGE_INT (2 * HZ)
 #define VF_TAKEOVER_INT (HZ / 10)
@@ -89,10 +92,8 @@ static int netvsc_open(struct net_device *net)
 	}
 
 	rdev = nvdev->extension;
-	if (!rdev->link_state) {
+	if (!rdev->link_state)
 		netif_carrier_on(net);
-		netif_tx_wake_all_queues(net);
-	}
 
 	if (vf_netdev) {
 		/* Setting synthetic device up transparently sets
@@ -108,36 +109,25 @@ static int netvsc_open(struct net_device *net)
 	return 0;
 }
 
-static int netvsc_close(struct net_device *net)
+static int netvsc_wait_until_empty(struct netvsc_device *nvdev)
 {
-	struct net_device_context *net_device_ctx = netdev_priv(net);
-	struct net_device *vf_netdev
-		= rtnl_dereference(net_device_ctx->vf_netdev);
-	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
-	int ret = 0;
-	u32 aread, i, msec = 10, retry = 0, retry_max = 20;
-	struct vmbus_channel *chn;
-
-	netif_tx_disable(net);
-
-	/* No need to close rndis filter if it is removed already */
-	if (!nvdev)
-		goto out;
-
-	ret = rndis_filter_close(nvdev);
-	if (ret != 0) {
-		netdev_err(net, "unable to close device (ret %d).\n", ret);
-		return ret;
-	}
+	unsigned int retry = 0;
+	int i;
 
 	/* Ensure pending bytes in ring are read */
-	while (true) {
-		aread = 0;
+	for (;;) {
+		u32 aread = 0;
+
 		for (i = 0; i < nvdev->num_chn; i++) {
-			chn = nvdev->chan_table[i].channel;
+			struct vmbus_channel *chn
+				= nvdev->chan_table[i].channel;
+
 			if (!chn)
 				continue;
 
+			/* make sure receive not running now */
+			napi_synchronize(&nvdev->chan_table[i].napi);
+
 			aread = hv_get_bytes_to_read(&chn->inbound);
 			if (aread)
 				break;
@@ -147,22 +137,40 @@ static int netvsc_close(struct net_device *net)
 				break;
 		}
 
-		retry++;
-		if (retry > retry_max || aread == 0)
-			break;
+		if (aread == 0)
+			return 0;
 
-		msleep(msec);
+		if (++retry > RETRY_MAX)
+			return -ETIMEDOUT;
 
-		if (msec < 1000)
-			msec *= 2;
+		usleep_range(RETRY_US_LO, RETRY_US_HI);
 	}
+}
 
-	if (aread) {
-		netdev_err(net, "Ring buffer not empty after closing rndis\n");
-		ret = -ETIMEDOUT;
+static int netvsc_close(struct net_device *net)
+{
+	struct net_device_context *net_device_ctx = netdev_priv(net);
+	struct net_device *vf_netdev
+		= rtnl_dereference(net_device_ctx->vf_netdev);
+	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
+	int ret;
+
+	netif_tx_disable(net);
+
+	/* No need to close rndis filter if it is removed already */
+	if (!nvdev)
+		return 0;
+
+	ret = rndis_filter_close(nvdev);
+	if (ret != 0) {
+		netdev_err(net, "unable to close device (ret %d).\n", ret);
+		return ret;
 	}
 
-out:
+	ret = netvsc_wait_until_empty(nvdev);
+	if (ret)
+		netdev_err(net, "Ring buffer not empty after closing rndis\n");
+
 	if (vf_netdev)
 		dev_close(vf_netdev);
 
@@ -820,16 +828,81 @@ static void netvsc_get_channels(struct net_device *net,
 	}
 }
 
+static int netvsc_detach(struct net_device *ndev,
+			 struct netvsc_device *nvdev)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	struct hv_device *hdev = ndev_ctx->device_ctx;
+	int ret;
+
+	/* Don't try continuing to try and setup sub channels */
+	if (cancel_work_sync(&nvdev->subchan_work))
+		nvdev->num_chn = 1;
+
+	/* If device was up (receiving) then shutdown */
+	if (netif_running(ndev)) {
+		netif_tx_disable(ndev);
+
+		ret = rndis_filter_close(nvdev);
+		if (ret) {
+			netdev_err(ndev,
+				   "unable to close device (ret %d).\n", ret);
+			return ret;
+		}
+
+		ret = netvsc_wait_until_empty(nvdev);
+		if (ret) {
+			netdev_err(ndev,
+				   "Ring buffer not empty after closing rndis\n");
+			return ret;
+		}
+	}
+
+	netif_device_detach(ndev);
+
+	rndis_filter_device_remove(hdev, nvdev);
+
+	return 0;
+}
+
+static int netvsc_attach(struct net_device *ndev,
+			 struct netvsc_device_info *dev_info)
+{
+	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+	struct hv_device *hdev = ndev_ctx->device_ctx;
+	struct netvsc_device *nvdev;
+	struct rndis_device *rdev;
+	int ret;
+
+	nvdev = rndis_filter_device_add(hdev, dev_info);
+	if (IS_ERR(nvdev))
+		return PTR_ERR(nvdev);
+
+	/* Note: enable and attach happen when sub-channels setup */
+
+	netif_carrier_off(ndev);
+
+	if (netif_running(ndev)) {
+		ret = rndis_filter_open(nvdev);
+		if (ret)
+			return ret;
+
+		rdev = nvdev->extension;
+		if (!rdev->link_state)
+			netif_carrier_on(ndev);
+	}
+
+	return 0;
+}
+
 static int netvsc_set_channels(struct net_device *net,
 			       struct ethtool_channels *channels)
 {
 	struct net_device_context *net_device_ctx = netdev_priv(net);
-	struct hv_device *dev = net_device_ctx->device_ctx;
 	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
 	unsigned int orig, count = channels->combined_count;
 	struct netvsc_device_info device_info;
-	bool was_opened;
-	int ret = 0;
+	int ret;
 
 	/* We do not support separate count for rx, tx, or other */
 	if (count == 0 ||
@@ -846,9 +919,6 @@ static int netvsc_set_channels(struct net_device *net,
 		return -EINVAL;
 
 	orig = nvdev->num_chn;
-	was_opened = rndis_filter_opened(nvdev);
-	if (was_opened)
-		rndis_filter_close(nvdev);
 
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.num_chn = count;
@@ -858,28 +928,17 @@ static int netvsc_set_channels(struct net_device *net,
 	device_info.recv_sections = nvdev->recv_section_cnt;
 	device_info.recv_section_size = nvdev->recv_section_size;
 
-	rndis_filter_device_remove(dev, nvdev);
+	ret = netvsc_detach(net, nvdev);
+	if (ret)
+		return ret;
 
-	nvdev = rndis_filter_device_add(dev, &device_info);
-	if (IS_ERR(nvdev)) {
-		ret = PTR_ERR(nvdev);
+	ret = netvsc_attach(net, &device_info);
+	if (ret) {
 		device_info.num_chn = orig;
-		nvdev = rndis_filter_device_add(dev, &device_info);
-
-		if (IS_ERR(nvdev)) {
-			netdev_err(net, "restoring channel setting failed: %ld\n",
-				   PTR_ERR(nvdev));
-			return ret;
-		}
+		if (netvsc_attach(net, &device_info))
+			netdev_err(net, "restoring channel setting failed\n");
 	}
 
-	if (was_opened)
-		rndis_filter_open(nvdev);
-
-	/* We may have missed link change notifications */
-	net_device_ctx->last_reconfig = 0;
-	schedule_delayed_work(&net_device_ctx->dwork, 0);
-
 	return ret;
 }
 
@@ -946,10 +1005,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	struct net_device_context *ndevctx = netdev_priv(ndev);
 	struct net_device *vf_netdev = rtnl_dereference(ndevctx->vf_netdev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
-	struct hv_device *hdev = ndevctx->device_ctx;
 	int orig_mtu = ndev->mtu;
 	struct netvsc_device_info device_info;
-	bool was_opened;
 	int ret = 0;
 
 	if (!nvdev || nvdev->destroy)
@@ -962,11 +1019,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 			return ret;
 	}
 
-	netif_device_detach(ndev);
-	was_opened = rndis_filter_opened(nvdev);
-	if (was_opened)
-		rndis_filter_close(nvdev);
-
 	memset(&device_info, 0, sizeof(device_info));
 	device_info.ring_size = ring_size;
 	device_info.num_chn = nvdev->num_chn;
@@ -975,35 +1027,27 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
 	device_info.recv_sections = nvdev->recv_section_cnt;
 	device_info.recv_section_size = nvdev->recv_section_size;
 
-	rndis_filter_device_remove(hdev, nvdev);
+	ret = netvsc_detach(ndev, nvdev);
+	if (ret)
+		goto rollback_vf;
 
 	ndev->mtu = mtu;
 
-	nvdev = rndis_filter_device_add(hdev, &device_info);
-	if (IS_ERR(nvdev)) {
-		ret = PTR_ERR(nvdev);
-
-		/* Attempt rollback to original MTU */
-		ndev->mtu = orig_mtu;
-		nvdev = rndis_filter_device_add(hdev, &device_info);
-
-		if (vf_netdev)
-			dev_set_mtu(vf_netdev, orig_mtu);
-
-		if (IS_ERR(nvdev)) {
-			netdev_err(ndev, "restoring mtu failed: %ld\n",
-				   PTR_ERR(nvdev));
-			return ret;
-		}
-	}
+	ret = netvsc_attach(ndev, &device_info);
+	if (ret)
+		goto rollback;
 
-	if (was_opened)
-		rndis_filter_open(nvdev);
+	return 0;
 
-	netif_device_attach(ndev);
+rollback:
+	/* Attempt rollback to original MTU */
+	ndev->mtu = orig_mtu;
 
-	/* We may have missed link change notifications */
-	schedule_delayed_work(&ndevctx->dwork, 0);
+	if (netvsc_attach(ndev, &device_info))
+		netdev_err(ndev, "restoring mtu failed\n");
+rollback_vf:
+	if (vf_netdev)
+		dev_set_mtu(vf_netdev, orig_mtu);
 
 	return ret;
 }
@@ -1469,11 +1513,9 @@ static int netvsc_set_ringparam(struct net_device *ndev,
 {
 	struct net_device_context *ndevctx = netdev_priv(ndev);
 	struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
-	struct hv_device *hdev = ndevctx->device_ctx;
 	struct netvsc_device_info device_info;
 	struct ethtool_ringparam orig;
 	u32 new_tx, new_rx;
-	bool was_opened;
 	int ret = 0;
 
 	if (!nvdev || nvdev->destroy)
@@ -1499,34 +1541,18 @@ static int netvsc_set_ringparam(struct net_device *ndev,
 	device_info.recv_sections = new_rx;
 	device_info.recv_section_size = nvdev->recv_section_size;
 
-	netif_device_detach(ndev);
-	was_opened = rndis_filter_opened(nvdev);
-	if (was_opened)
-		rndis_filter_close(nvdev);
-
-	rndis_filter_device_remove(hdev, nvdev);
-
-	nvdev = rndis_filter_device_add(hdev, &device_info);
-	if (IS_ERR(nvdev)) {
-		ret = PTR_ERR(nvdev);
+	ret = netvsc_detach(ndev, nvdev);
+	if (ret)
+		return ret;
 
+	ret = netvsc_attach(ndev, &device_info);
+	if (ret) {
 		device_info.send_sections = orig.tx_pending;
 		device_info.recv_sections = orig.rx_pending;
-		nvdev = rndis_filter_device_add(hdev, &device_info);
-		if (IS_ERR(nvdev)) {
-			netdev_err(ndev, "restoring ringparam failed: %ld\n",
-				   PTR_ERR(nvdev));
-			return ret;
-		}
-	}
-
-	if (was_opened)
-		rndis_filter_open(nvdev);
-	netif_device_attach(ndev);
 
-	/* We may have missed link change notifications */
-	ndevctx->last_reconfig = 0;
-	schedule_delayed_work(&ndevctx->dwork, 0);
+		if (netvsc_attach(ndev, &device_info))
+			netdev_err(ndev, "restoring ringparam failed");
+	}
 
 	return ret;
 }
@@ -2002,8 +2028,8 @@ static int netvsc_probe(struct hv_device *dev,
 static int netvsc_remove(struct hv_device *dev)
 {
 	struct net_device_context *ndev_ctx;
-	struct net_device *vf_netdev;
-	struct net_device *net;
+	struct net_device *vf_netdev, *net;
+	struct netvsc_device *nvdev;
 
 	net = hv_get_drvdata(dev);
 	if (net == NULL) {
@@ -2013,10 +2039,14 @@ static int netvsc_remove(struct hv_device *dev)
 
 	ndev_ctx = netdev_priv(net);
 
-	netif_device_detach(net);
-
 	cancel_delayed_work_sync(&ndev_ctx->dwork);
 
+	rcu_read_lock();
+	nvdev = rcu_dereference(ndev_ctx->nvdev);
+
+	if  (nvdev)
+		cancel_work_sync(&nvdev->subchan_work);
+
 	/*
 	 * Call to the vsc driver to let it know that the device is being
 	 * removed. Also blocks mtu and channel changes.
@@ -2026,11 +2056,13 @@ static int netvsc_remove(struct hv_device *dev)
 	if (vf_netdev)
 		netvsc_unregister_vf(vf_netdev);
 
+	if (nvdev)
+		rndis_filter_device_remove(dev, nvdev);
+
 	unregister_netdevice(net);
 
-	rndis_filter_device_remove(dev,
-				   rtnl_dereference(ndev_ctx->nvdev));
 	rtnl_unlock();
+	rcu_read_unlock();
 
 	hv_set_drvdata(dev, NULL);
 
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 056499014a1f..3bfa56560286 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1112,6 +1112,7 @@ void rndis_set_subchannel(struct work_struct *w)
 	for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
 		ndev_ctx->tx_table[i] = i % nvdev->num_chn;
 
+	netif_device_attach(ndev);
 	rtnl_unlock();
 	return;
 
@@ -1122,6 +1123,8 @@ void rndis_set_subchannel(struct work_struct *w)
 
 	nvdev->max_chn = 1;
 	nvdev->num_chn = 1;
+
+	netif_device_attach(ndev);
 unlock:
 	rtnl_unlock();
 }
@@ -1324,6 +1327,10 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
 		net_device->num_chn = 1;
 	}
 
+	/* No sub channels, device is ready */
+	if (net_device->num_chn == 1)
+		netif_device_attach(net);
+
 	return net_device;
 
 err_dev_remv:
@@ -1336,9 +1343,6 @@ void rndis_filter_device_remove(struct hv_device *dev,
 {
 	struct rndis_device *rndis_dev = net_dev->extension;
 
-	/* Don't try and setup sub channels if about to halt */
-	cancel_work_sync(&net_dev->subchan_work);
-
 	/* Halt and release the rndis device */
 	rndis_filter_halt_device(rndis_dev);
 
@@ -1368,8 +1372,3 @@ int rndis_filter_close(struct netvsc_device *nvdev)
 
 	return rndis_filter_close_device(nvdev->extension);
 }
-
-bool rndis_filter_opened(const struct netvsc_device *nvdev)
-{
-	return atomic_read(&nvdev->open_cnt) > 0;
-}
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-stable 20/24] hv_netvsc: Use Windows version instead of NVSP version on GPAD teardown
From: Stephen Hemminger @ 2018-05-14 22:32 UTC (permalink / raw)
  To: davem; +Cc: netdev, Mohammed Gamal
In-Reply-To: <20180514223223.25433-1-sthemmin@microsoft.com>

From: Mohammed Gamal <mgamal@redhat.com>

commit 2afc5d61a7197de25a61f54ea4ecfb4cb62b1d42A upstram

When changing network interface settings, Windows guests
older than WS2016 can no longer shutdown. This was addressed
by commit 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order
on older versions"), however the issue also occurs on WS2012
guests that share NVSP protocol versions with WS2016 guests.
Hence we use Windows version directly to differentiate them.

Fixes: 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order on older versions")
Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 0c548748fba8..aa9b7a912c31 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -569,13 +569,13 @@ void netvsc_device_remove(struct hv_device *device)
 	netdev_dbg(ndev, "net device safe to remove\n");
 
 	/* older versions require that buffer be revoked before close */
-	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_4)
+	if (vmbus_proto_version < VERSION_WIN10)
 		netvsc_teardown_gpadl(device, net_device);
 
 	/* Now, we can close the channel safely */
 	vmbus_close(device->channel);
 
-	if (net_device->nvsp_version >= NVSP_PROTOCOL_VERSION_4)
+	if (vmbus_proto_version >= VERSION_WIN10)
 		netvsc_teardown_gpadl(device, net_device);
 
 	/* Release all resources */
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-stable 21/24] hv_netvsc: Split netvsc_revoke_buf() and netvsc_teardown_gpadl()
From: Stephen Hemminger @ 2018-05-14 22:32 UTC (permalink / raw)
  To: davem; +Cc: netdev, Mohammed Gamal
In-Reply-To: <20180514223223.25433-1-sthemmin@microsoft.com>

From: Mohammed Gamal <mgamal@redhat.com>

commit 7992894c305eaf504d005529637ff8283d0a849d upstream

Split each of the functions into two for each of send/recv buffers.
This will be needed in order to implement a fine-grained messaging
sequence to the host so that we accommodate the requirements of
different Windows versions

Fixes: 0ef58b0a05c12 ("hv_netvsc: change GPAD teardown order on older versions")
Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc.c | 46 +++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index aa9b7a912c31..25fcba506ac5 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -105,11 +105,11 @@ static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
 	call_rcu(&nvdev->rcu, free_netvsc_device);
 }
 
-static void netvsc_revoke_buf(struct hv_device *device,
-			      struct netvsc_device *net_device)
+static void netvsc_revoke_recv_buf(struct hv_device *device,
+				   struct netvsc_device *net_device)
 {
-	struct nvsp_message *revoke_packet;
 	struct net_device *ndev = hv_get_drvdata(device);
+	struct nvsp_message *revoke_packet;
 	int ret;
 
 	/*
@@ -151,6 +151,14 @@ static void netvsc_revoke_buf(struct hv_device *device,
 		}
 		net_device->recv_section_cnt = 0;
 	}
+}
+
+static void netvsc_revoke_send_buf(struct hv_device *device,
+				   struct netvsc_device *net_device)
+{
+	struct net_device *ndev = hv_get_drvdata(device);
+	struct nvsp_message *revoke_packet;
+	int ret;
 
 	/* Deal with the send buffer we may have setup.
 	 * If we got a  send section size, it means we received a
@@ -194,8 +202,8 @@ static void netvsc_revoke_buf(struct hv_device *device,
 	}
 }
 
-static void netvsc_teardown_gpadl(struct hv_device *device,
-				  struct netvsc_device *net_device)
+static void netvsc_teardown_recv_gpadl(struct hv_device *device,
+				       struct netvsc_device *net_device)
 {
 	struct net_device *ndev = hv_get_drvdata(device);
 	int ret;
@@ -214,6 +222,13 @@ static void netvsc_teardown_gpadl(struct hv_device *device,
 		}
 		net_device->recv_buf_gpadl_handle = 0;
 	}
+}
+
+static void netvsc_teardown_send_gpadl(struct hv_device *device,
+				       struct netvsc_device *net_device)
+{
+	struct net_device *ndev = hv_get_drvdata(device);
+	int ret;
 
 	if (net_device->send_buf_gpadl_handle) {
 		ret = vmbus_teardown_gpadl(device->channel,
@@ -423,8 +438,10 @@ static int netvsc_init_buf(struct hv_device *device,
 	goto exit;
 
 cleanup:
-	netvsc_revoke_buf(device, net_device);
-	netvsc_teardown_gpadl(device, net_device);
+	netvsc_revoke_recv_buf(device, net_device);
+	netvsc_revoke_send_buf(device, net_device);
+	netvsc_teardown_recv_gpadl(device, net_device);
+	netvsc_teardown_send_gpadl(device, net_device);
 
 exit:
 	return ret;
@@ -554,7 +571,8 @@ void netvsc_device_remove(struct hv_device *device)
 		= rtnl_dereference(net_device_ctx->nvdev);
 	int i;
 
-	netvsc_revoke_buf(device, net_device);
+	netvsc_revoke_recv_buf(device, net_device);
+	netvsc_revoke_send_buf(device, net_device);
 
 	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 
@@ -569,14 +587,18 @@ void netvsc_device_remove(struct hv_device *device)
 	netdev_dbg(ndev, "net device safe to remove\n");
 
 	/* older versions require that buffer be revoked before close */
-	if (vmbus_proto_version < VERSION_WIN10)
-		netvsc_teardown_gpadl(device, net_device);
+	if (vmbus_proto_version < VERSION_WIN10) {
+		netvsc_teardown_recv_gpadl(device, net_device);
+		netvsc_teardown_send_gpadl(device, net_device);
+	}
 
 	/* Now, we can close the channel safely */
 	vmbus_close(device->channel);
 
-	if (vmbus_proto_version >= VERSION_WIN10)
-		netvsc_teardown_gpadl(device, net_device);
+	if (vmbus_proto_version >= VERSION_WIN10) {
+		netvsc_teardown_recv_gpadl(device, net_device);
+		netvsc_teardown_send_gpadl(device, net_device);
+	}
 
 	/* Release all resources */
 	free_netvsc_device_rcu(net_device);
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-stable 22/24] hv_netvsc: Ensure correct teardown message sequence order
From: Stephen Hemminger @ 2018-05-14 22:32 UTC (permalink / raw)
  To: davem; +Cc: netdev, Mohammed Gamal
In-Reply-To: <20180514223223.25433-1-sthemmin@microsoft.com>

From: Mohammed Gamal <mgamal@redhat.com>

commit a56d99d714665591fed8527b90eef21530ea61e0 upstream

Prior to commit 0cf737808ae7 ("hv_netvsc: netvsc_teardown_gpadl() split")
the call sequence in netvsc_device_remove() was as follows (as
implemented in netvsc_destroy_buf()):
1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message
2- Teardown receive buffer GPADL
3- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message
4- Teardown send buffer GPADL
5- Close vmbus

This didn't work for WS2016 hosts. Commit 0cf737808ae7
("hv_netvsc: netvsc_teardown_gpadl() split") rearranged the
teardown sequence as follows:
1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message
2- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message
3- Close vmbus
4- Teardown receive buffer GPADL
5- Teardown send buffer GPADL

That worked well for WS2016 hosts, but it prevented guests on older hosts from
shutting down after changing network settings. Commit 0ef58b0a05c1
("hv_netvsc: change GPAD teardown order on older versions") ensured the
following message sequence for older hosts
1- Send NVSP_MSG1_TYPE_REVOKE_RECV_BUF message
2- Send NVSP_MSG1_TYPE_REVOKE_SEND_BUF message
3- Teardown receive buffer GPADL
4- Teardown send buffer GPADL
5- Close vmbus

However, with this sequence calling `ip link set eth0 mtu 1000` hangs and the
process becomes uninterruptible. On futher analysis it turns out that on tearing
down the receive buffer GPADL the kernel is waiting indefinitely
in vmbus_teardown_gpadl() for a completion to be signaled.

Here is a snippet of where this occurs:
int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
{
        struct vmbus_channel_gpadl_teardown *msg;
        struct vmbus_channel_msginfo *info;
        unsigned long flags;
        int ret;

        info = kmalloc(sizeof(*info) +
                       sizeof(struct vmbus_channel_gpadl_teardown), GFP_KERNEL);
        if (!info)
                return -ENOMEM;

        init_completion(&info->waitevent);
        info->waiting_channel = channel;
[....]
        ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_gpadl_teardown),
                             true);

        if (ret)
                goto post_msg_err;

        wait_for_completion(&info->waitevent);
[....]
}

The completion is signaled from vmbus_ongpadl_torndown(), which gets called when
the corresponding message is received from the host, which apparently never happens
in that case.
This patch works around the issue by restoring the first mentioned message sequence
for older hosts

Fixes: 0ef58b0a05c1 ("hv_netvsc: change GPAD teardown order on older versions")
Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 25fcba506ac5..99be63eacaeb 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -571,8 +571,17 @@ void netvsc_device_remove(struct hv_device *device)
 		= rtnl_dereference(net_device_ctx->nvdev);
 	int i;
 
+	/*
+	 * Revoke receive buffer. If host is pre-Win2016 then tear down
+	 * receive buffer GPADL. Do the same for send buffer.
+	 */
 	netvsc_revoke_recv_buf(device, net_device);
+	if (vmbus_proto_version < VERSION_WIN10)
+		netvsc_teardown_recv_gpadl(device, net_device);
+
 	netvsc_revoke_send_buf(device, net_device);
+	if (vmbus_proto_version < VERSION_WIN10)
+		netvsc_teardown_send_gpadl(device, net_device);
 
 	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 
@@ -586,15 +595,13 @@ void netvsc_device_remove(struct hv_device *device)
 	 */
 	netdev_dbg(ndev, "net device safe to remove\n");
 
-	/* older versions require that buffer be revoked before close */
-	if (vmbus_proto_version < VERSION_WIN10) {
-		netvsc_teardown_recv_gpadl(device, net_device);
-		netvsc_teardown_send_gpadl(device, net_device);
-	}
-
 	/* Now, we can close the channel safely */
 	vmbus_close(device->channel);
 
+	/*
+	 * If host is Win2016 or higher then we do the GPADL tear down
+	 * here after VMBus is closed.
+	*/
 	if (vmbus_proto_version >= VERSION_WIN10) {
 		netvsc_teardown_recv_gpadl(device, net_device);
 		netvsc_teardown_send_gpadl(device, net_device);
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-stable 23/24] hv_netvsc: Fix net device attach on older Windows hosts
From: Stephen Hemminger @ 2018-05-14 22:32 UTC (permalink / raw)
  To: davem; +Cc: netdev, Mohammed Gamal
In-Reply-To: <20180514223223.25433-1-sthemmin@microsoft.com>

From: Mohammed Gamal <mgamal@redhat.com>

commit 55be9f25be1ca5bda75c39808fc77e42691bc07f upstream

On older windows hosts the net_device instance is returned to
the caller of rndis_filter_device_add() without having the presence
bit set first. This would cause any subsequent calls to network device
operations (e.g. MTU change, channel change) to fail after the device
is detached once, returning -ENODEV.

Instead of returning the device instabce, we take the exit path where
we call netif_device_attach()

Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic")
Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
Reviewed-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/rndis_filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 3bfa56560286..6dde92c1c113 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1276,7 +1276,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
 		   rndis_device->link_state ? "down" : "up");
 
 	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
-		return net_device;
+		goto out;
 
 	rndis_filter_query_link_speed(rndis_device, net_device);
 
-- 
2.17.0

^ permalink raw reply related

* [PATCH net-stable 24/24] hv_netvsc: set master device
From: Stephen Hemminger @ 2018-05-14 22:32 UTC (permalink / raw)
  To: davem; +Cc: netdev, Stephen Hemminger
In-Reply-To: <20180514223223.25433-1-sthemmin@microsoft.com>

From: Stephen Hemminger <stephen@networkplumber.org>

commit 97f3efb64323beb0690576e9d74e94998ad6e82a upstream

The hyper-v transparent bonding should have used master_dev_link.
The netvsc device should look like a master bond device not
like the upper side of a tunnel.

This makes the semantics the same so that userspace applications
looking at network devices see the correct master relationshipship.

Fixes: 0c195567a8f6 ("netvsc: transparent VF management")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index be4b15b63355..11b46c8d2d67 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -1778,7 +1778,8 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
 		goto rx_handler_failed;
 	}
 
-	ret = netdev_upper_dev_link(vf_netdev, ndev);
+	ret = netdev_master_upper_dev_link(vf_netdev, ndev,
+					   NULL, NULL);
 	if (ret != 0) {
 		netdev_err(vf_netdev,
 			   "can not set master device %s (err = %d)\n",
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH] net/mlx5: Use 'kvfree()' for memory allocated by 'kvzalloc()'
From: Saeed Mahameed @ 2018-05-14 22:35 UTC (permalink / raw)
  To: David Miller
  Cc: christophe.jaillet, Saeed Mahameed, Matan Barak, Leon Romanovsky,
	Linux Netdev List, RDMA mailing list, linux-kernel,
	kernel-janitors
In-Reply-To: <20180514.145642.989041199343505570.davem@davemloft.net>

On Mon, May 14, 2018 at 11:56 AM, David Miller <davem@davemloft.net> wrote:
> From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
> Date: Sat, 12 May 2018 19:09:25 +0200
>
>> 'out' is allocated with 'kvzalloc()'. 'kvfree()' must be used to free it.
>>
>> Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
>
> Saeed, I assume I will see this in one of your forthcoming pull
> requests.
>
> Thanks.

In case this is for net-next,  I will apply v3 to mlx5-next once
Christophe adds the "Fixes" tags according to Eric's request.
if it is for net (RC) then you can go ahead and apply v3 to net branch.

Thanks,
Saeed.

^ permalink raw reply

* [net 1/1] net/mlx5: Fix build break when CONFIG_SMP=n
From: Saeed Mahameed @ 2018-05-14 22:38 UTC (permalink / raw)
  To: David S. Miller
  Cc: netdev, Saeed Mahameed, Randy Dunlap, Guenter Roeck,
	Thomas Gleixner

Avoid using the kernel's irq_descriptor and return IRQ vector affinity
directly from the driver.

This fixes the following build break when CONFIG_SMP=n

include/linux/mlx5/driver.h: In function ‘mlx5_get_vector_affinity_hint’:
include/linux/mlx5/driver.h:1299:13: error:
        ‘struct irq_desc’ has no member named ‘affinity_hint’

Fixes: 6082d9c9c94a ("net/mlx5: Fix mlx5_get_vector_affinity function")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
CC: Randy Dunlap <rdunlap@infradead.org>
CC: Guenter Roeck <linux@roeck-us.net>
CC: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Israel Rukshin <israelr@mellanox.com>
---

For -stable v4.14

 include/linux/mlx5/driver.h | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2a156c5dfadd..d703774982ca 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1286,17 +1286,7 @@ enum {
 static inline const struct cpumask *
 mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector)
 {
-	struct irq_desc *desc;
-	unsigned int irq;
-	int eqn;
-	int err;
-
-	err = mlx5_vector2eqn(dev, vector, &eqn, &irq);
-	if (err)
-		return NULL;
-
-	desc = irq_to_desc(irq);
-	return desc->affinity_hint;
+	return dev->priv.irq_info[vector].mask;
 }
 
 #endif /* MLX5_DRIVER_H */
-- 
2.17.0

^ permalink raw reply related

* Re: [PATCH bpf-next v5 3/6] bpf: Add IPv6 Segment Routing helpers
From: Daniel Borkmann @ 2018-05-14 22:40 UTC (permalink / raw)
  To: Mathieu Xhonneux, netdev; +Cc: dlebrun, alexei.starovoitov
In-Reply-To: <7839e5fff52b4b96e5eb0ae8a72a76f8a1e76a8e.1526143526.git.m.xhonneux@gmail.com>

On 05/12/2018 07:25 PM, Mathieu Xhonneux wrote:
[...]
> +BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
> +	   const void *, from, u32, len)
> +{
> +#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
> +	struct seg6_bpf_srh_state *srh_state =
> +		this_cpu_ptr(&seg6_bpf_srh_states);
> +	void *srh_tlvs, *srh_end, *ptr;
> +	struct ipv6_sr_hdr *srh;
> +	int srhoff = 0;
> +
> +	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
> +		return -EINVAL;
> +
> +	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
> +	srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
> +	srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);

Do we need to check that this cannot go out of bounds wrt skb data?

> +	ptr = skb->data + offset;
> +	if (ptr >= srh_tlvs && ptr + len <= srh_end)
> +		srh_state->valid = 0;
> +	else if (ptr < (void *)&srh->flags ||
> +		 ptr + len > (void *)&srh->segments)
> +		return -EFAULT;
> +
> +	if (unlikely(bpf_try_make_writable(skb, offset + len)))
> +		return -EFAULT;
> +
> +	memcpy(ptr, from, len);

You have a use after free here. bpf_try_make_writable() is potentially changing
underlying skb->data (e.g. see pskb_expand_head()). Therefore memcpy()'ing into
cached ptr is invalid.

> +	return 0;
> +#else /* CONFIG_IPV6_SEG6_BPF */
> +	return -EOPNOTSUPP;
> +#endif
> +}
> +
> +static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
> +	.func		= bpf_lwt_seg6_store_bytes,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_ANYTHING,
> +	.arg3_type	= ARG_PTR_TO_MEM,
> +	.arg4_type	= ARG_CONST_SIZE
> +};
> +
> +BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
> +	   u32, action, void *, param, u32, param_len)
> +{
> +#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
> +	struct seg6_bpf_srh_state *srh_state =
> +		this_cpu_ptr(&seg6_bpf_srh_states);
> +	struct ipv6_sr_hdr *srh;
> +	int srhoff = 0;
> +	int err;
> +
> +	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
> +		return -EINVAL;
> +	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
> +
> +	if (!srh_state->valid) {
> +		if (unlikely((srh_state->hdrlen & 7) != 0))
> +			return -EBADMSG;
> +
> +		srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
> +		if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
> +			return -EBADMSG;
> +
> +		srh_state->valid = 1;
> +	}
> +
> +	switch (action) {
> +	case SEG6_LOCAL_ACTION_END_X:
> +		if (param_len != sizeof(struct in6_addr))
> +			return -EINVAL;
> +		return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
> +	case SEG6_LOCAL_ACTION_END_T:
> +		if (param_len != sizeof(int))
> +			return -EINVAL;
> +		return seg6_lookup_nexthop(skb, NULL, *(int *)param);
> +	case SEG6_LOCAL_ACTION_END_B6:
> +		err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
> +					  param, param_len);
> +		if (!err)
> +			srh_state->hdrlen =
> +				((struct ipv6_sr_hdr *)param)->hdrlen << 3;
> +		return err;
> +	case SEG6_LOCAL_ACTION_END_B6_ENCAP:
> +		err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
> +					  param, param_len);
> +		if (!err)
> +			srh_state->hdrlen =
> +				((struct ipv6_sr_hdr *)param)->hdrlen << 3;
> +		return err;
> +	default:
> +		return -EINVAL;
> +	}
> +#else /* CONFIG_IPV6_SEG6_BPF */
> +	return -EOPNOTSUPP;
> +#endif
> +}
> +
> +static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
> +	.func		= bpf_lwt_seg6_action,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_ANYTHING,
> +	.arg3_type	= ARG_PTR_TO_MEM,
> +	.arg4_type	= ARG_CONST_SIZE
> +};
> +
> +BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
> +	   s32, len)
> +{
> +#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
> +	struct seg6_bpf_srh_state *srh_state =
> +		this_cpu_ptr(&seg6_bpf_srh_states);
> +	void *srh_end, *srh_tlvs, *ptr;
> +	struct ipv6_sr_hdr *srh;
> +	struct ipv6hdr *hdr;
> +	int srhoff = 0;
> +	int ret;
> +
> +	if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
> +		return -EINVAL;
> +	srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
> +
> +	srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
> +			((srh->first_segment + 1) << 4));
> +	srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
> +			srh_state->hdrlen);
> +	ptr = skb->data + offset;
> +
> +	if (unlikely(ptr < srh_tlvs || ptr > srh_end))
> +		return -EFAULT;
> +	if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
> +		return -EFAULT;
> +
> +	if (len > 0) {
> +		ret = skb_cow_head(skb, len);
> +		if (unlikely(ret < 0))
> +			return ret;
> +
> +		ret = bpf_skb_net_hdr_push(skb, offset, len);
> +	} else {
> +		ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
> +	}
> +	if (unlikely(ret < 0))
> +		return ret;

And here as well. You changed underlying pointers via skb_cow_head(), but in
the error path you leave the cached pointers that now point to already freed
buffer. Thus, you'd now be able to access the new skb data out of bounds since
cb->data_end is still the old one due to missing bpf_compute_data_pointers(skb).
Please fix and audit your whole series carefully against these types of subtle
bugs.

> +	hdr = (struct ipv6hdr *)skb->data;
> +	hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
> +
> +	bpf_compute_data_pointers(skb);
> +	srh_state->hdrlen += len;
> +	srh_state->valid = 0;
> +	return 0;
> +#else /* CONFIG_IPV6_SEG6_BPF */
> +	return -EOPNOTSUPP;
> +#endif
> +}
> +
> +static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
> +	.func		= bpf_lwt_seg6_adjust_srh,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_ANYTHING,
> +	.arg3_type	= ARG_ANYTHING,
> +};
> +
> +bool bpf_helper_changes_pkt_data(void *func)
> +{
> +	if (func == bpf_skb_vlan_push ||
> +	    func == bpf_skb_vlan_pop ||
> +	    func == bpf_skb_store_bytes ||
> +	    func == bpf_skb_change_proto ||
> +	    func == bpf_skb_change_head ||
> +	    func == bpf_skb_change_tail ||
> +	    func == bpf_skb_adjust_room ||
> +	    func == bpf_skb_pull_data ||
> +	    func == bpf_clone_redirect ||
> +	    func == bpf_l3_csum_replace ||
> +	    func == bpf_l4_csum_replace ||
> +	    func == bpf_xdp_adjust_head ||
> +	    func == bpf_xdp_adjust_meta ||
> +	    func == bpf_msg_pull_data ||
> +	    func == bpf_xdp_adjust_tail ||
> +	    func == bpf_lwt_push_encap ||
> +	    func == bpf_lwt_seg6_store_bytes ||
> +	    func == bpf_lwt_seg6_adjust_srh ||
> +	    func == bpf_lwt_seg6_action
> +	    )
> +		return true;
> +
> +	return false;
> +}
> +
>  static const struct bpf_func_proto *
>  bpf_base_func_proto(enum bpf_func_id func_id)
>  {
> @@ -4703,7 +4940,6 @@ static bool lwt_is_valid_access(int off, int size,
>  	return bpf_skb_is_valid_access(off, size, type, prog, info);
>  }
>  
> -
>  /* Attach type specific accesses */
>  static bool __sock_filter_check_attach_type(int off,
>  					    enum bpf_access_type access_type,
> diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
> index 6794ddf0547c..f0e8a762ae0c 100644
> --- a/net/ipv6/Kconfig
> +++ b/net/ipv6/Kconfig
> @@ -330,4 +330,9 @@ config IPV6_SEG6_HMAC
>  
>  	  If unsure, say N.
>  
> +config IPV6_SEG6_BPF
> +	def_bool y
> +	depends on IPV6_SEG6_LWTUNNEL
> +	depends on IPV6 = y
> +
>  endif # IPV6
> diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
> index e9b23fb924ad..ae68c1ef8fb0 100644
> --- a/net/ipv6/seg6_local.c
> +++ b/net/ipv6/seg6_local.c
> @@ -449,6 +449,8 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
>  	return err;
>  }
>  
> +DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
> +
>  static struct seg6_action_desc seg6_action_table[] = {
>  	{
>  		.action		= SEG6_LOCAL_ACTION_END,
> 

^ permalink raw reply

* Re: [PATCH net-next] udp: Fix kernel panic in UDP GSO path
From: stranche @ 2018-05-14 22:45 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: Eric Dumazet, Willem de Bruijn, David Miller, Network Development,
	Subash Abhinov Kasiviswanathan
In-Reply-To: <CAF=yD-JH8ahoLNKOVjBScRXKP4UQqQpfq89C6xq0=nwd3jQtzw@mail.gmail.com>

On 2018-05-11 17:16, Willem de Bruijn wrote:

>> Hmm, no, we absolutely need to fix GSO instead.
>> 
>> Think of a bonding device (or any virtual devices), your patch wont 
>> avoid the crash.

Hi Eric. Can you clarify what you mean by "fix GSO?" Is that just having 
the GSO path work
regardless of whether or not SG is enabled for the device?

> 
> Thanks for reporting the issue.
> 
> Paged skbuffs is an optimization for gso, but the feature should
> continue to work even if gso skbs are linear, indeed (if at the cost
> of copying during skb_segment).
> 
> We need to make paged contingent on scatter-gather. Rough
> patch below. That is for ipv4 only, the same will be needed for ipv6.
> 
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index b5e21eb198d8..b38731d8a44f 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -884,7 +884,7 @@ static int __ip_append_data(struct sock *sk,
> 
>         exthdrlen = !skb ? rt->dst.header_len : 0;
>         mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
> -       paged = !!cork->gso_size;
> +       paged = cork->gso_size && (rt->dst.dev->features & NETIF_F_SG);

Hi Willem. That's definitely a much cleaner patch than ours since it 
allows the GSO to continue without failure.
We tried it on both the IPv4 and IPv6 path and didn't see the crash in 
either case.

-----
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora 
Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply

* [PATCH net-next 0/3] udp gso fixes
From: Willem de Bruijn @ 2018-05-14 23:07 UTC (permalink / raw)
  To: netdev; +Cc: davem, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

A few small fixes:
- disallow segmentation with XFRM
- do not leak gso packets into the ingress path
- fix a panic if scatter-gather is disabled

Willem de Bruijn (3):
  udp: exclude gso from xfrm paths
  gso: limit udp gso to egress-only virtual devices
  udp: only use paged allocation with scatter-gather

 drivers/net/bonding/bond_main.c | 5 +++--
 drivers/net/team/team.c         | 5 +++--
 include/linux/netdev_features.h | 1 -
 net/ipv4/ip_output.c            | 2 +-
 net/ipv4/udp.c                  | 3 ++-
 net/ipv6/ip6_output.c           | 2 +-
 net/ipv6/udp.c                  | 3 ++-
 7 files changed, 12 insertions(+), 9 deletions(-)

-- 
2.17.0.441.gb46fe60e1d-goog

^ permalink raw reply

* [PATCH net-next 1/3] udp: exclude gso from xfrm paths
From: Willem de Bruijn @ 2018-05-14 23:07 UTC (permalink / raw)
  To: netdev; +Cc: davem, Willem de Bruijn, Michal Kubecek
In-Reply-To: <20180514230747.118875-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

UDP GSO conflicts with transformations in the XFRM layer.
Return an error if GSO is attempted.

Fixes: bec1f6f69736 ("udp: generate gso with UDP_SEGMENT")
CC: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 net/ipv4/udp.c | 3 ++-
 net/ipv6/udp.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ff4d4ba67735..d71f1f3e1155 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -788,7 +788,8 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
 			return -EINVAL;
 		if (sk->sk_no_check_tx)
 			return -EINVAL;
-		if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+		if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
+		    dst_xfrm(skb_dst(skb)))
 			return -EIO;
 
 		skb_shinfo(skb)->gso_size = cork->gso_size;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2839c1bd1e58..426c9d2b418d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1053,7 +1053,8 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
 			return -EINVAL;
 		if (udp_sk(sk)->no_check6_tx)
 			return -EINVAL;
-		if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+		if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
+		    dst_xfrm(skb_dst(skb)))
 			return -EIO;
 
 		skb_shinfo(skb)->gso_size = cork->gso_size;
-- 
2.17.0.441.gb46fe60e1d-goog

^ permalink raw reply related

* [PATCH net-next 2/3] gso: limit udp gso to egress-only virtual devices
From: Willem de Bruijn @ 2018-05-14 23:07 UTC (permalink / raw)
  To: netdev; +Cc: davem, Willem de Bruijn, Alexander Duyck
In-Reply-To: <20180514230747.118875-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

Until the udp receive stack supports large packets (UDP GRO), GSO
packets must not loop from the egress to the ingress path.

Revert the change that added NETIF_F_GSO_UDP_L4 to various virtual
devices through NETIF_F_GSO_ENCAP_ALL as this included devices that
may loop packets, such as veth and macvlan.

Instead add it to specific devices that forward to another device's
egress path: bonding and team.

Fixes: 83aa025f535f ("udp: add gso support to virtual devices")
CC: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 drivers/net/bonding/bond_main.c | 5 +++--
 drivers/net/team/team.c         | 5 +++--
 include/linux/netdev_features.h | 1 -
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 4176e1d95f47..d7b58370ae77 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1107,7 +1107,8 @@ static void bond_compute_features(struct bonding *bond)
 
 done:
 	bond_dev->vlan_features = vlan_features;
-	bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL;
+	bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
+				    NETIF_F_GSO_UDP_L4;
 	bond_dev->gso_max_segs = gso_max_segs;
 	netif_set_gso_max_size(bond_dev, gso_max_size);
 
@@ -4263,7 +4264,7 @@ void bond_setup(struct net_device *bond_dev)
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;
 
-	bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
+	bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
 	bond_dev->features |= bond_dev->hw_features;
 }
 
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 9dbd390ace34..c6a9f0cafea2 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1026,7 +1026,8 @@ static void __team_compute_features(struct team *team)
 	}
 
 	team->dev->vlan_features = vlan_features;
-	team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL;
+	team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
+				     NETIF_GSO_UDP_L4;
 	team->dev->hard_header_len = max_hard_header_len;
 
 	team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
@@ -2117,7 +2118,7 @@ static void team_setup(struct net_device *dev)
 			   NETIF_F_HW_VLAN_CTAG_RX |
 			   NETIF_F_HW_VLAN_CTAG_FILTER;
 
-	dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
+	dev->hw_features |= NETIF_F_GSO_ENCAP_ALL | NETIF_F_GSO_UDP_L4;
 	dev->features |= dev->hw_features;
 }
 
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index c87c3a3453c1..623bb8ced060 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -220,7 +220,6 @@ enum {
 				 NETIF_F_GSO_GRE_CSUM |			\
 				 NETIF_F_GSO_IPXIP4 |			\
 				 NETIF_F_GSO_IPXIP6 |			\
-				 NETIF_F_GSO_UDP_L4 |			\
 				 NETIF_F_GSO_UDP_TUNNEL |		\
 				 NETIF_F_GSO_UDP_TUNNEL_CSUM)
 
-- 
2.17.0.441.gb46fe60e1d-goog

^ permalink raw reply related

* [PATCH net-next 3/3] udp: only use paged allocation with scatter-gather
From: Willem de Bruijn @ 2018-05-14 23:07 UTC (permalink / raw)
  To: netdev; +Cc: davem, Willem de Bruijn
In-Reply-To: <20180514230747.118875-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

Paged allocation stores most payload in skb frags. This helps udp gso
by avoiding copying from the gso skb to segment skb in skb_segment.

But without scatter-gather, data must be linear, so do not use paged
mode unless NETIF_F_SG.

Fixes: 15e36f5b8e98 ("udp: paged allocation with gso")
Reported-by: Sean Tranchetti <stranche@codeaurora.org>
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 net/ipv4/ip_output.c  | 2 +-
 net/ipv6/ip6_output.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b5e21eb198d8..b38731d8a44f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -884,7 +884,7 @@ static int __ip_append_data(struct sock *sk,
 
 	exthdrlen = !skb ? rt->dst.header_len : 0;
 	mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
-	paged = !!cork->gso_size;
+	paged = cork->gso_size && (rt->dst.dev->features & NETIF_F_SG);
 
 	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
 	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7f4493080df6..35a940b9f208 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1262,7 +1262,7 @@ static int __ip6_append_data(struct sock *sk,
 		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
 	}
 
-	paged = !!cork->gso_size;
+	paged = cork->gso_size && (rt->dst.dev->features & NETIF_F_SG);
 	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
 	orig_mtu = mtu;
 
-- 
2.17.0.441.gb46fe60e1d-goog

^ permalink raw reply related

* Re: [PATCH bpf-next v2] samples/bpf: xdp_monitor, accept short options
From: Daniel Borkmann @ 2018-05-14 23:07 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, Prashant Bhole
  Cc: Alexei Starovoitov, David S . Miller, netdev
In-Reply-To: <20180514122044.598feec2@redhat.com>

On 05/14/2018 12:20 PM, Jesper Dangaard Brouer wrote:
> 
> On Mon, 14 May 2018 17:29:15 +0900 Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp> wrote:
> 
>> Updated optstring parameter for getopt_long() to accept short options.
>> Also updated usage() function.
>>
>> Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
> 
> Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>

Applied to bpf-next, thanks everyone!

^ permalink raw reply

* Re: [PATCH net-next] udp: Fix kernel panic in UDP GSO path
From: Willem de Bruijn @ 2018-05-14 23:07 UTC (permalink / raw)
  To: Sean Tranchetti
  Cc: Eric Dumazet, Willem de Bruijn, David Miller, Network Development,
	Subash Abhinov Kasiviswanathan
In-Reply-To: <c86a2fbde50aea2b28e82c333f29d575@codeaurora.org>

>> Paged skbuffs is an optimization for gso, but the feature should
>> continue to work even if gso skbs are linear, indeed (if at the cost
>> of copying during skb_segment).
>>
>> We need to make paged contingent on scatter-gather. Rough
>> patch below. That is for ipv4 only, the same will be needed for ipv6.
>>
>> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
>> index b5e21eb198d8..b38731d8a44f 100644
>> --- a/net/ipv4/ip_output.c
>> +++ b/net/ipv4/ip_output.c
>> @@ -884,7 +884,7 @@ static int __ip_append_data(struct sock *sk,
>>
>>         exthdrlen = !skb ? rt->dst.header_len : 0;
>>         mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
>> -       paged = !!cork->gso_size;
>> +       paged = cork->gso_size && (rt->dst.dev->features & NETIF_F_SG);
>
>
> Hi Willem. That's definitely a much cleaner patch than ours since it allows
> the GSO to continue without failure.
> We tried it on both the IPv4 and IPv6 path and didn't see the crash in
> either case.

Thanks for testing. I have a small set of fixes to udp gso, including
this one. Let me send them right away.

^ permalink raw reply

* Re: [PATCH v3 bpf-next 0/2] bpf: enable stackmap with build_id in nmi
From: Daniel Borkmann @ 2018-05-14 23:09 UTC (permalink / raw)
  To: Song Liu, netdev; +Cc: kernel-team, qinteng, tobin
In-Reply-To: <20180507175049.1541963-1-songliubraving@fb.com>

On 05/07/2018 07:50 PM, Song Liu wrote:
> Changes v2 -> v3:
>   Improve syntax based on suggestion by Tobin C. Harding.
> 
> Changes v1 -> v2:
>   1. Rename some variables to (hopefully) reduce confusion;
>   2. Check irq_work status with IRQ_WORK_BUSY (instead of work->sem);
>   3. In Kconfig, let BPF_SYSCALL select IRQ_WORK;
>   4. Add static to DEFINE_PER_CPU();
>    5. Remove pr_info() in stack_map_init().
> 
> Song Liu (2):
>   bpf: enable stackmap with build_id in nmi context
>   bpf: add selftest for stackmap with build_id in NMI context
> 
>  init/Kconfig                               |   1 +
>  kernel/bpf/stackmap.c                      |  59 +++++++++++--
>  tools/testing/selftests/bpf/test_progs.c   | 134 +++++++++++++++++++++++++++++
>  tools/testing/selftests/bpf/urandom_read.c |  10 ++-
>  4 files changed, 196 insertions(+), 8 deletions(-)

Applied to bpf-next, thanks Song!

^ permalink raw reply

* Re: [PATCH net-next] udp: Fix kernel panic in UDP GSO path
From: Eric Dumazet @ 2018-05-14 23:10 UTC (permalink / raw)
  To: stranche, Willem de Bruijn
  Cc: Eric Dumazet, Willem de Bruijn, David Miller, Network Development,
	Subash Abhinov Kasiviswanathan
In-Reply-To: <c86a2fbde50aea2b28e82c333f29d575@codeaurora.org>



On 05/14/2018 03:45 PM, stranche@codeaurora.org wrote:
> On 2018-05-11 17:16, Willem de Bruijn wrote:
> 
>>> Hmm, no, we absolutely need to fix GSO instead.
>>>
>>> Think of a bonding device (or any virtual devices), your patch wont avoid the crash.
> 
> Hi Eric. Can you clarify what you mean by "fix GSO?" Is that just having the GSO path work
> regardless of whether or not SG is enabled for the device?
>

Yes. GSO is a fallback, and must work all the time, not panic.

^ permalink raw reply

* Re: [PATCH net-next 3/3] udp: only use paged allocation with scatter-gather
From: Eric Dumazet @ 2018-05-14 23:12 UTC (permalink / raw)
  To: Willem de Bruijn, netdev; +Cc: davem, Willem de Bruijn
In-Reply-To: <20180514230747.118875-4-willemdebruijn.kernel@gmail.com>



On 05/14/2018 04:07 PM, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
> 
> Paged allocation stores most payload in skb frags. This helps udp gso
> by avoiding copying from the gso skb to segment skb in skb_segment.
> 
> But without scatter-gather, data must be linear, so do not use paged
> mode unless NETIF_F_SG.
> 
> Fixes: 15e36f5b8e98 ("udp: paged allocation with gso")
> Reported-by: Sean Tranchetti <stranche@codeaurora.org>
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---
>  net/ipv4/ip_output.c  | 2 +-
>  net/ipv6/ip6_output.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index b5e21eb198d8..b38731d8a44f 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -884,7 +884,7 @@ static int __ip_append_data(struct sock *sk,
>  
>  	exthdrlen = !skb ? rt->dst.header_len : 0;
>  	mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
> -	paged = !!cork->gso_size;
> +	paged = cork->gso_size && (rt->dst.dev->features & NETIF_F_SG);
>  
>  	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
>  	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index 7f4493080df6..35a940b9f208 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -1262,7 +1262,7 @@ static int __ip6_append_data(struct sock *sk,
>  		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
>  	}
>  
> -	paged = !!cork->gso_size;
> +	paged = cork->gso_size && (rt->dst.dev->features & NETIF_F_SG);
>  	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
>  	orig_mtu = mtu;
>  
> 

As I said, this wont help for stacked device

bonding might advertise NETIF_F_SG, but one slave might not.

^ permalink raw reply

* Re: [PATCH net-next 2/3] gso: limit udp gso to egress-only virtual devices
From: Willem de Bruijn @ 2018-05-14 23:12 UTC (permalink / raw)
  To: Network Development; +Cc: David Miller, Willem de Bruijn, Alexander Duyck
In-Reply-To: <20180514230747.118875-3-willemdebruijn.kernel@gmail.com>

On Mon, May 14, 2018 at 7:07 PM, Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
> From: Willem de Bruijn <willemb@google.com>
>
> Until the udp receive stack supports large packets (UDP GRO), GSO
> packets must not loop from the egress to the ingress path.
>
> Revert the change that added NETIF_F_GSO_UDP_L4 to various virtual
> devices through NETIF_F_GSO_ENCAP_ALL as this included devices that
> may loop packets, such as veth and macvlan.
>
> Instead add it to specific devices that forward to another device's
> egress path: bonding and team.
>
> Fixes: 83aa025f535f ("udp: add gso support to virtual devices")
> CC: Alexander Duyck <alexander.duyck@gmail.com>
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---

> diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
> index 9dbd390ace34..c6a9f0cafea2 100644
> --- a/drivers/net/team/team.c
> +++ b/drivers/net/team/team.c
> @@ -1026,7 +1026,8 @@ static void __team_compute_features(struct team *team)
>         }
>
>         team->dev->vlan_features = vlan_features;
> -       team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL;
> +       team->dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL |
> +                                    NETIF_GSO_UDP_L4;

This has a typo. team.ko did not build automatically for me and caught it
with a full compile just too late.

Need to send a v2, sorry.

^ permalink raw reply

* Re: [net 1/1] net/mlx5: Fix build break when CONFIG_SMP=n
From: Randy Dunlap @ 2018-05-14 23:19 UTC (permalink / raw)
  To: Saeed Mahameed, David S. Miller; +Cc: netdev, Guenter Roeck, Thomas Gleixner
In-Reply-To: <20180514223810.21197-1-saeedm@mellanox.com>

On 05/14/2018 03:38 PM, Saeed Mahameed wrote:
> Avoid using the kernel's irq_descriptor and return IRQ vector affinity
> directly from the driver.
> 
> This fixes the following build break when CONFIG_SMP=n
> 
> include/linux/mlx5/driver.h: In function ‘mlx5_get_vector_affinity_hint’:
> include/linux/mlx5/driver.h:1299:13: error:
>         ‘struct irq_desc’ has no member named ‘affinity_hint’
> 
> Fixes: 6082d9c9c94a ("net/mlx5: Fix mlx5_get_vector_affinity function")
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> CC: Randy Dunlap <rdunlap@infradead.org>
> CC: Guenter Roeck <linux@roeck-us.net>
> CC: Thomas Gleixner <tglx@linutronix.de>
> Tested-by: Israel Rukshin <israelr@mellanox.com>

Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>

Thanks.

> ---
> 
> For -stable v4.14
> 
>  include/linux/mlx5/driver.h | 12 +-----------
>  1 file changed, 1 insertion(+), 11 deletions(-)
> 
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index 2a156c5dfadd..d703774982ca 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -1286,17 +1286,7 @@ enum {
>  static inline const struct cpumask *
>  mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector)
>  {
> -	struct irq_desc *desc;
> -	unsigned int irq;
> -	int eqn;
> -	int err;
> -
> -	err = mlx5_vector2eqn(dev, vector, &eqn, &irq);
> -	if (err)
> -		return NULL;
> -
> -	desc = irq_to_desc(irq);
> -	return desc->affinity_hint;
> +	return dev->priv.irq_info[vector].mask;
>  }
>  
>  #endif /* MLX5_DRIVER_H */
> 


-- 
~Randy

^ permalink raw reply

* Re: [PATCH v1 1/4] media: rc: introduce BPF_PROG_IR_DECODER
From: Randy Dunlap @ 2018-05-14 23:27 UTC (permalink / raw)
  To: Sean Young, linux-media, linux-kernel, Alexei Starovoitov,
	Mauro Carvalho Chehab, Daniel Borkmann, netdev, Matthias Reichl,
	Devin Heitmueller
In-Reply-To: <32a944171d5c48abf126259595b0088ce3122c91.1526331777.git.sean@mess.org>

On 05/14/2018 02:10 PM, Sean Young wrote:
> Add support for BPF_PROG_IR_DECODER. This type of BPF program can call

Kconfig file below uses IR_BPF_DECODER instead of the symbol name above.

and then patch 3 says a third choice:
The context provided to a BPF_PROG_RAWIR_DECODER is a struct ir_raw_event;

> rc_keydown() to reported decoded IR scancodes, or rc_repeat() to report
> that the last key should be repeated.
> 
> Signed-off-by: Sean Young <sean@mess.org>
> ---
>  drivers/media/rc/Kconfig          |  8 +++
>  drivers/media/rc/Makefile         |  1 +
>  drivers/media/rc/ir-bpf-decoder.c | 93 +++++++++++++++++++++++++++++++
>  include/linux/bpf_types.h         |  3 +
>  include/uapi/linux/bpf.h          | 16 +++++-
>  5 files changed, 120 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/media/rc/ir-bpf-decoder.c
> 
> diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig
> index eb2c3b6eca7f..10ad6167d87c 100644
> --- a/drivers/media/rc/Kconfig
> +++ b/drivers/media/rc/Kconfig
> @@ -120,6 +120,14 @@ config IR_IMON_DECODER
>  	   remote control and you would like to use it with a raw IR
>  	   receiver, or if you wish to use an encoder to transmit this IR.
>  
> +config IR_BPF_DECODER
> +	bool "Enable IR raw decoder using BPF"
> +	depends on BPF_SYSCALL
> +	depends on RC_CORE=y
> +	help
> +	   Enable this option to make it possible to load custom IR
> +	   decoders written in BPF.
> +
>  endif #RC_DECODERS
>  
>  menuconfig RC_DEVICES
> diff --git a/drivers/media/rc/Makefile b/drivers/media/rc/Makefile
> index 2e1c87066f6c..12e1118430d0 100644
> --- a/drivers/media/rc/Makefile
> +++ b/drivers/media/rc/Makefile
> @@ -5,6 +5,7 @@ obj-y += keymaps/
>  obj-$(CONFIG_RC_CORE) += rc-core.o
>  rc-core-y := rc-main.o rc-ir-raw.o
>  rc-core-$(CONFIG_LIRC) += lirc_dev.o
> +rc-core-$(CONFIG_IR_BPF_DECODER) += ir-bpf-decoder.o


-- 
~Randy

^ permalink raw reply

* Re: [PATCH net-next 3/3] udp: only use paged allocation with scatter-gather
From: Willem de Bruijn @ 2018-05-14 23:30 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Network Development, David Miller, Willem de Bruijn
In-Reply-To: <a629c4fa-3666-48c2-900f-9d04d9ecfcbc@gmail.com>

On Mon, May 14, 2018 at 7:12 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>
>
> On 05/14/2018 04:07 PM, Willem de Bruijn wrote:
>> From: Willem de Bruijn <willemb@google.com>
>>
>> Paged allocation stores most payload in skb frags. This helps udp gso
>> by avoiding copying from the gso skb to segment skb in skb_segment.
>>
>> But without scatter-gather, data must be linear, so do not use paged
>> mode unless NETIF_F_SG.
>>
>> Fixes: 15e36f5b8e98 ("udp: paged allocation with gso")
>> Reported-by: Sean Tranchetti <stranche@codeaurora.org>
>> Signed-off-by: Willem de Bruijn <willemb@google.com>
>> ---
>>  net/ipv4/ip_output.c  | 2 +-
>>  net/ipv6/ip6_output.c | 2 +-
>>  2 files changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
>> index b5e21eb198d8..b38731d8a44f 100644
>> --- a/net/ipv4/ip_output.c
>> +++ b/net/ipv4/ip_output.c
>> @@ -884,7 +884,7 @@ static int __ip_append_data(struct sock *sk,
>>
>>       exthdrlen = !skb ? rt->dst.header_len : 0;
>>       mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
>> -     paged = !!cork->gso_size;
>> +     paged = cork->gso_size && (rt->dst.dev->features & NETIF_F_SG);
>>
>>       if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
>>           sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
>> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
>> index 7f4493080df6..35a940b9f208 100644
>> --- a/net/ipv6/ip6_output.c
>> +++ b/net/ipv6/ip6_output.c
>> @@ -1262,7 +1262,7 @@ static int __ip6_append_data(struct sock *sk,
>>               dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
>>       }
>>
>> -     paged = !!cork->gso_size;
>> +     paged = cork->gso_size && (rt->dst.dev->features & NETIF_F_SG);
>>       mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
>>       orig_mtu = mtu;
>>
>>
>
> As I said, this wont help for stacked device
>
> bonding might advertise NETIF_F_SG, but one slave might not.

I don't quite follow. The reported crash happens in the protocol layer,
because of this check. With pagedlen we have not allocated
sufficient space for the skb_put.

                if (!(rt->dst.dev->features&NETIF_F_SG)) {
                        unsigned int off;

                        off = skb->len;
                        if (getfrag(from, skb_put(skb, copy),
                                        offset, copy, off, skb) < 0) {
                                __skb_trim(skb, off);
                                err = -EFAULT;
                                goto error;
                        }
                } else {
                        int i = skb_shinfo(skb)->nr_frags;

Are you referring to a separate potential issue in the gso layer?
If a bonding device advertises SG, but a slave does not, then
skb_segment on the slave should build linear segs? I have not
tested that.

^ permalink raw reply

* Re: [PATCH 01/14] net: sched: use rcu for action cookie update
From: kbuild test robot @ 2018-05-14 23:39 UTC (permalink / raw)
  To: Vlad Buslov
  Cc: kbuild-all, netdev, davem, jhs, xiyou.wangcong, jiri, pablo,
	kadlec, fw, ast, daniel, edumazet, vladbu, keescook, linux-kernel,
	netfilter-devel, coreteam, kliteyn
In-Reply-To: <1526308035-12484-2-git-send-email-vladbu@mellanox.com>

Hi Vlad,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on net/master]
[also build test WARNING on v4.17-rc5 next-20180514]
[cannot apply to net-next/master]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Vlad-Buslov/Modify-action-API-for-implementing-lockless-actions/20180515-025420
reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

>> net/sched/act_api.c:71:15: sparse: incorrect type in initializer (different address spaces) @@    expected struct tc_cookie [noderef] <asn:4>*__ret @@    got [noderef] <asn:4>*__ret @@
   net/sched/act_api.c:71:15:    expected struct tc_cookie [noderef] <asn:4>*__ret
   net/sched/act_api.c:71:15:    got struct tc_cookie *new_cookie
>> net/sched/act_api.c:71:13: sparse: incorrect type in assignment (different address spaces) @@    expected struct tc_cookie *old @@    got struct tc_cookie [noderef] <struct tc_cookie *old @@
   net/sched/act_api.c:71:13:    expected struct tc_cookie *old
   net/sched/act_api.c:71:13:    got struct tc_cookie [noderef] <asn:4>*[assigned] __ret
>> net/sched/act_api.c:132:48: sparse: dereference of noderef expression

vim +71 net/sched/act_api.c

    65	
    66	static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
    67					  struct tc_cookie *new_cookie)
    68	{
    69		struct tc_cookie *old;
    70	
  > 71		old = xchg(old_cookie, new_cookie);
    72		if (old)
    73			call_rcu(&old->rcu, tcf_free_cookie_rcu);
    74	}
    75	
    76	/* XXX: For standalone actions, we don't need a RCU grace period either, because
    77	 * actions are always connected to filters and filters are already destroyed in
    78	 * RCU callbacks, so after a RCU grace period actions are already disconnected
    79	 * from filters. Readers later can not find us.
    80	 */
    81	static void free_tcf(struct tc_action *p)
    82	{
    83		free_percpu(p->cpu_bstats);
    84		free_percpu(p->cpu_qstats);
    85	
    86		tcf_set_action_cookie(&p->act_cookie, NULL);
    87		if (p->goto_chain)
    88			tcf_action_goto_chain_fini(p);
    89	
    90		kfree(p);
    91	}
    92	
    93	static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p)
    94	{
    95		spin_lock_bh(&idrinfo->lock);
    96		idr_remove(&idrinfo->action_idr, p->tcfa_index);
    97		spin_unlock_bh(&idrinfo->lock);
    98		gen_kill_estimator(&p->tcfa_rate_est);
    99		free_tcf(p);
   100	}
   101	
   102	int __tcf_idr_release(struct tc_action *p, bool bind, bool strict)
   103	{
   104		int ret = 0;
   105	
   106		ASSERT_RTNL();
   107	
   108		if (p) {
   109			if (bind)
   110				p->tcfa_bindcnt--;
   111			else if (strict && p->tcfa_bindcnt > 0)
   112				return -EPERM;
   113	
   114			p->tcfa_refcnt--;
   115			if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
   116				if (p->ops->cleanup)
   117					p->ops->cleanup(p);
   118				tcf_idr_remove(p->idrinfo, p);
   119				ret = ACT_P_DELETED;
   120			}
   121		}
   122	
   123		return ret;
   124	}
   125	EXPORT_SYMBOL(__tcf_idr_release);
   126	
   127	static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
   128	{
   129		u32 cookie_len = 0;
   130	
   131		if (act->act_cookie)
 > 132			cookie_len = nla_total_size(act->act_cookie->len);
   133	
   134		return  nla_total_size(0) /* action number nested */
   135			+ nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */
   136			+ cookie_len /* TCA_ACT_COOKIE */
   137			+ nla_total_size(0) /* TCA_ACT_STATS nested */
   138			/* TCA_STATS_BASIC */
   139			+ nla_total_size_64bit(sizeof(struct gnet_stats_basic))
   140			/* TCA_STATS_QUEUE */
   141			+ nla_total_size_64bit(sizeof(struct gnet_stats_queue))
   142			+ nla_total_size(0) /* TCA_OPTIONS nested */
   143			+ nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */
   144	}
   145	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply

* Re: [PATCH net-next 3/3] udp: only use paged allocation with scatter-gather
From: Eric Dumazet @ 2018-05-14 23:45 UTC (permalink / raw)
  To: Willem de Bruijn, Eric Dumazet
  Cc: Network Development, David Miller, Willem de Bruijn
In-Reply-To: <CAF=yD-KYer3RV6hB+-5LYt6VgL3LA6OpgbCBzdmnGrCvGF=ySQ@mail.gmail.com>



On 05/14/2018 04:30 PM, Willem de Bruijn wrote:

> I don't quite follow. The reported crash happens in the protocol layer,
> because of this check. With pagedlen we have not allocated
> sufficient space for the skb_put.
> 
>                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
>                         unsigned int off;
> 
>                         off = skb->len;
>                         if (getfrag(from, skb_put(skb, copy),
>                                         offset, copy, off, skb) < 0) {
>                                 __skb_trim(skb, off);
>                                 err = -EFAULT;
>                                 goto error;
>                         }
>                 } else {
>                         int i = skb_shinfo(skb)->nr_frags;
> 
> Are you referring to a separate potential issue in the gso layer?
> If a bonding device advertises SG, but a slave does not, then
> skb_segment on the slave should build linear segs? I have not
> tested that.

Given that the device attribute could change under us, we need to not
crash, even if initially we thought NETIF_F_SG was available.

Unless you want to hold RTNL in UDP xmit :)

Ideally, GSO should be always on, as we did for TCP.

Otherwise, I can guarantee syzkaller will hit again.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox