All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>,
	Jason Wang <jasowang@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 4.19 32/74] virtio_net: Differentiate sk_buff and xdp_frame on freeing
Date: Mon,  4 Feb 2019 11:36:45 +0100	[thread overview]
Message-ID: <20190204103623.538231886@linuxfoundation.org> (raw)
In-Reply-To: <20190204103619.714714157@linuxfoundation.org>

4.19-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>

[ Upstream commit 5050471d35d1316ba32dfcbb409978337eb9e75e

  I had to fold commit df133f3f9625 ("virtio_net: bulk free tx skbs")
  into this to make it work.  ]

We do not reset or free up unused buffers when enabling/disabling XDP,
so it can happen that xdp_frames are freed after disabling XDP or
sk_buffs are freed after enabling XDP on xdp tx queues.
Thus we need to handle both forms (xdp_frames and sk_buffs) regardless
of XDP setting.
One way to trigger this problem is to disable XDP when napi_tx is
enabled. In that case, virtnet_xdp_set() calls virtnet_napi_enable()
which kicks NAPI. The NAPI handler will call virtnet_poll_cleantx()
which invokes free_old_xmit_skbs() for queues which have been used by
XDP.

Note that even with this change we need to keep skipping
free_old_xmit_skbs() from NAPI handlers when XDP is enabled, because XDP
tx queues do not aquire queue locks.

- v2: Use napi_consume_skb() instead of dev_consume_skb_any()

Fixes: 4941d472bf95 ("virtio-net: do not reset during XDP set")
Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/virtio_net.c |   64 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 17 deletions(-)

--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -57,6 +57,8 @@ module_param(napi_tx, bool, 0644);
 #define VIRTIO_XDP_TX		BIT(0)
 #define VIRTIO_XDP_REDIR	BIT(1)
 
+#define VIRTIO_XDP_FLAG	BIT(0)
+
 /* RX packet size EWMA. The average packet size is used to determine the packet
  * buffer size when refilling RX rings. As the entire RX ring may be refilled
  * at once, the weight is chosen so that the EWMA will be insensitive to short-
@@ -251,6 +253,21 @@ struct padded_vnet_hdr {
 	char padding[4];
 };
 
+static bool is_xdp_frame(void *ptr)
+{
+	return (unsigned long)ptr & VIRTIO_XDP_FLAG;
+}
+
+static void *xdp_to_ptr(struct xdp_frame *ptr)
+{
+	return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG);
+}
+
+static struct xdp_frame *ptr_to_xdp(void *ptr)
+{
+	return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG);
+}
+
 /* Converting between virtqueue no. and kernel tx/rx queue no.
  * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
  */
@@ -461,7 +478,8 @@ static int __virtnet_xdp_xmit_one(struct
 
 	sg_init_one(sq->sg, xdpf->data, xdpf->len);
 
-	err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC);
+	err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf),
+				   GFP_ATOMIC);
 	if (unlikely(err))
 		return -ENOSPC; /* Caller handle free/refcnt */
 
@@ -481,13 +499,13 @@ static int virtnet_xdp_xmit(struct net_d
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct receive_queue *rq = vi->rq;
-	struct xdp_frame *xdpf_sent;
 	struct bpf_prog *xdp_prog;
 	struct send_queue *sq;
 	unsigned int len;
 	int drops = 0;
 	int kicks = 0;
 	int ret, err;
+	void *ptr;
 	int i;
 
 	/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
@@ -506,8 +524,12 @@ static int virtnet_xdp_xmit(struct net_d
 	}
 
 	/* Free up any pending old buffers before queueing new ones. */
-	while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
-		xdp_return_frame(xdpf_sent);
+	while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+		if (likely(is_xdp_frame(ptr)))
+			xdp_return_frame(ptr_to_xdp(ptr));
+		else
+			napi_consume_skb(ptr, false);
+	}
 
 	for (i = 0; i < n; i++) {
 		struct xdp_frame *xdpf = frames[i];
@@ -1326,20 +1348,28 @@ static int virtnet_receive(struct receiv
 	return stats.packets;
 }
 
-static void free_old_xmit_skbs(struct send_queue *sq)
+static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
 {
-	struct sk_buff *skb;
 	unsigned int len;
 	unsigned int packets = 0;
 	unsigned int bytes = 0;
+	void *ptr;
 
-	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
-		pr_debug("Sent skb %p\n", skb);
+	while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+		if (likely(!is_xdp_frame(ptr))) {
+			struct sk_buff *skb = ptr;
 
-		bytes += skb->len;
-		packets++;
+			pr_debug("Sent skb %p\n", skb);
 
-		dev_consume_skb_any(skb);
+			bytes += skb->len;
+			napi_consume_skb(skb, in_napi);
+		} else {
+			struct xdp_frame *frame = ptr_to_xdp(ptr);
+
+			bytes += frame->len;
+			xdp_return_frame(frame);
+		}
+		packets++;
 	}
 
 	/* Avoid overhead when no packets have been processed
@@ -1375,7 +1405,7 @@ static void virtnet_poll_cleantx(struct
 		return;
 
 	if (__netif_tx_trylock(txq)) {
-		free_old_xmit_skbs(sq);
+		free_old_xmit_skbs(sq, true);
 		__netif_tx_unlock(txq);
 	}
 
@@ -1459,7 +1489,7 @@ static int virtnet_poll_tx(struct napi_s
 
 	txq = netdev_get_tx_queue(vi->dev, index);
 	__netif_tx_lock(txq, raw_smp_processor_id());
-	free_old_xmit_skbs(sq);
+	free_old_xmit_skbs(sq, true);
 	__netif_tx_unlock(txq);
 
 	virtqueue_napi_complete(napi, sq->vq, 0);
@@ -1528,7 +1558,7 @@ static netdev_tx_t start_xmit(struct sk_
 	bool use_napi = sq->napi.weight;
 
 	/* Free up any pending old buffers before queueing new ones. */
-	free_old_xmit_skbs(sq);
+	free_old_xmit_skbs(sq, false);
 
 	if (use_napi && kick)
 		virtqueue_enable_cb_delayed(sq->vq);
@@ -1571,7 +1601,7 @@ static netdev_tx_t start_xmit(struct sk_
 		if (!use_napi &&
 		    unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
 			/* More just got used, free them then recheck. */
-			free_old_xmit_skbs(sq);
+			free_old_xmit_skbs(sq, false);
 			if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
 				netif_start_subqueue(dev, qnum);
 				virtqueue_disable_cb(sq->vq);
@@ -2590,10 +2620,10 @@ static void free_unused_bufs(struct virt
 	for (i = 0; i < vi->max_queue_pairs; i++) {
 		struct virtqueue *vq = vi->sq[i].vq;
 		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
-			if (!is_xdp_raw_buffer_queue(vi, i))
+			if (!is_xdp_frame(buf))
 				dev_kfree_skb(buf);
 			else
-				xdp_return_frame(buf);
+				xdp_return_frame(ptr_to_xdp(buf));
 		}
 	}
 



  parent reply	other threads:[~2019-02-04 10:47 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-04 10:36 [PATCH 4.19 00/74] 4.19.20-stable review Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 01/74] Fix "net: ipv4: do not handle duplicate fragments as overlapping" Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 02/74] drm/msm/gpu: fix building without debugfs Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 03/74] ipv6: Consider sk_bound_dev_if when binding a socket to an address Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 04/74] ipv6: sr: clear IP6CB(skb) on SRH ip4ip6 encapsulation Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 05/74] ipvlan, l3mdev: fix broken l3s mode wrt local routes Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 06/74] l2tp: copy 4 more bytes to linear part if necessary Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 07/74] l2tp: fix reading optional fields of L2TPv3 Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 08/74] net: ip_gre: always reports o_key to userspace Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 09/74] net: ip_gre: use erspan key field for tunnel lookup Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 10/74] net/mlx4_core: Add masking for a few queries on HCA caps Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 11/74] netrom: switch to sock timer API Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 12/74] net/rose: fix NULL ax25_cb kernel panic Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 13/74] net: set default network namespace in init_dummy_netdev() Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 14/74] ravb: expand rx descriptor data to accommodate hw checksum Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 15/74] sctp: improve the events for sctp stream reset Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 16/74] tun: move the call to tun_set_real_num_queues Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 17/74] ucc_geth: Reset BQL queue when stopping device Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 18/74] vhost: fix OOB in get_rx_bufs() Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 19/74] net: ip6_gre: always reports o_key to userspace Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 20/74] sctp: improve the events for sctp stream adding Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 21/74] net/mlx5e: Allow MAC invalidation while spoofchk is ON Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 22/74] ip6mr: Fix notifiers call on mroute_clean_tables() Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 23/74] Revert "net/mlx5e: E-Switch, Initialize eswitch only if eswitch manager" Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 24/74] sctp: set chunk transport correctly when its a new asoc Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 25/74] sctp: set flow sport from saddr only when its 0 Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 26/74] virtio_net: Dont enable NAPI when interface is down Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 27/74] virtio_net: Dont call free_old_xmit_skbs for xdp_frames Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 28/74] virtio_net: Fix not restoring real_num_rx_queues Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 29/74] virtio_net: Fix out of bounds access of sq Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 30/74] virtio_net: Dont process redirected XDP frames when XDP is disabled Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 31/74] virtio_net: Use xdp_return_frame to free xdp_frames on destroying vqs Greg Kroah-Hartman
2019-02-04 10:36 ` Greg Kroah-Hartman [this message]
2019-02-04 10:36 ` [PATCH 4.19 33/74] CIFS: Do not count -ENODATA as failure for query directory Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 34/74] CIFS: Fix trace command logging for SMB2 reads and writes Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 35/74] CIFS: Do not consider -ENODATA as stat failure for reads Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 36/74] fs/dcache: Fix incorrect nr_dentry_unused accounting in shrink_dcache_sb() Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 37/74] iommu/vt-d: Fix memory leak in intel_iommu_put_resv_regions() Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 38/74] selftests/seccomp: Enhance per-arch ptrace syscall skip tests Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 39/74] NFS: Fix up return value on fatal errors in nfs_page_async_flush() Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 40/74] ARM: cns3xxx: Fix writing to wrong PCI config registers after alignment Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 41/74] arm64: kaslr: ensure randomized quantities are clean also when kaslr is off Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 42/74] arm64: Do not issue IPIs for user executable ptes Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 43/74] arm64: hyp-stub: Forbid kprobing of the hyp-stub Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 44/74] arm64: hibernate: Clean the __hyp_text to PoC after resume Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 45/74] gpio: altera-a10sr: Set proper output level for direction_output Greg Kroah-Hartman
2019-02-04 10:36 ` [PATCH 4.19 46/74] gpiolib: fix line event timestamps for nested irqs Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 47/74] gpio: pcf857x: Fix interrupts on multiple instances Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 48/74] gpio: sprd: Fix the incorrect data register Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 49/74] gpio: sprd: Fix incorrect irq type setting for the async EIC Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 50/74] gfs2: Revert "Fix loop in gfs2_rbm_find" Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 51/74] mmc: bcm2835: Fix DMA channel leak on probe error Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 52/74] mmc: mediatek: fix incorrect register setting of hs400_cmd_int_delay Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 53/74] ALSA: usb-audio: Add Opus #3 to quirks for native DSD support Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 54/74] ALSA: hda/realtek - Fixed hp_pin no value Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 55/74] IB/hfi1: Remove overly conservative VM_EXEC flag check Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 56/74] platform/x86: asus-nb-wmi: Map 0x35 to KEY_SCREENLOCK Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 57/74] platform/x86: asus-nb-wmi: Drop mapping of 0x33 and 0x34 scan codes Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 58/74] mmc: sdhci-iproc: handle mmc_of_parse() errors during probe Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 59/74] Btrfs: fix deadlock when allocating tree block during leaf/node split Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 60/74] btrfs: On error always free subvol_name in btrfs_mount Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 61/74] kernel/exit.c: release ptraced tasks before zap_pid_ns_processes Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 62/74] mm/hugetlb.c: teach follow_hugetlb_page() to handle FOLL_NOWAIT Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 63/74] oom, oom_reaper: do not enqueue same task twice Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 64/74] mm,memory_hotplug: fix scan_movable_pages() for gigantic hugepages Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 65/74] mm, oom: fix use-after-free in oom_kill_process Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 66/74] mm: hwpoison: use do_send_sig_info() instead of force_sig() Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 67/74] mm: migrate: dont rely on __PageMovable() of newpage after unlocking it Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 68/74] of: Convert to using %pOFn instead of device_node.name Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 69/74] of: overlay: add tests to validate kfrees from overlay removal Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 70/74] of: overlay: add missing of_node_get() in __of_attach_node_sysfs Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 71/74] of: overlay: use prop add changeset entry for property in new nodes Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 72/74] of: overlay: do not duplicate properties from overlay for " Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 73/74] md/raid5: fix out of memory during raid cache recovery Greg Kroah-Hartman
2019-02-04 10:37 ` [PATCH 4.19 74/74] cifs: Always resolve hostname before reconnecting Greg Kroah-Hartman
2019-02-04 21:49 ` [PATCH 4.19 00/74] 4.19.20-stable review Guenter Roeck
2019-02-05  6:14 ` Naresh Kamboju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190204103623.538231886@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=jasowang@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=makita.toshiaki@lab.ntt.co.jp \
    --cc=mst@redhat.com \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.