From: Neil Horman <nhorman@tuxdriver.com>
To: netdev@vger.kernel.org
Cc: Neil Horman <nhorman@tuxdriver.com>,
Shrikrishna Khare <skhare@vmware.com>,
"VMware, Inc." <pv-drivers@vmware.com>,
"David S. Miller" <davem@davemloft.net>
Subject: [PATCH] vmxnet3: avoid calling pskb_may_pull with interrupts disabled
Date: Fri, 4 Mar 2016 13:40:48 -0500 [thread overview]
Message-ID: <1457116848-6753-1-git-send-email-nhorman@tuxdriver.com> (raw)
vmxnet3 has a function vmxnet3_parse_and_copy_hdr which, among other operations,
uses pskb_may_pull to linearize the header portion of an skb. That operation
eventually uses local_bh_disable/enable to ensure that it doesn't race with the
drivers bottom half handler. Unfortunately, vmxnet3 preforms this
parse_and_copy operation with a spinlock held and interrupts disabled. This
causes us to run afoul of the WARN_ON_ONCE(irqs_disabled()) warning in
local_bh_enable, resulting in this:
WARNING: at kernel/softirq.c:159 local_bh_enable+0x59/0x90() (Not tainted)
Hardware name: VMware Virtual Platform
Modules linked in: ipv6 ppdev parport_pc parport microcode e1000 vmware_balloon
vmxnet3 i2c_piix4 sg ext4 jbd2 mbcache sd_mod crc_t10dif sr_mod cdrom mptspi
mptscsih mptbase scsi_transport_spi pata_acpi ata_generic ata_piix vmwgfx ttm
drm_kms_helper drm i2c_core dm_mirror dm_region_hash dm_log dm_mod [last
unloaded: mperf]
Pid: 6229, comm: sshd Not tainted 2.6.32-616.el6.i686 #1
Call Trace:
[<c04624d9>] ? warn_slowpath_common+0x89/0xe0
[<c0469e99>] ? local_bh_enable+0x59/0x90
[<c046254b>] ? warn_slowpath_null+0x1b/0x20
[<c0469e99>] ? local_bh_enable+0x59/0x90
[<c07bb936>] ? skb_copy_bits+0x126/0x210
[<f8d1d9fe>] ? ext4_ext_find_extent+0x24e/0x2d0 [ext4]
[<c07bc49e>] ? __pskb_pull_tail+0x6e/0x2b0
[<f95a6164>] ? vmxnet3_xmit_frame+0xba4/0xef0 [vmxnet3]
[<c05d15a6>] ? selinux_ip_postroute+0x56/0x320
[<c0615988>] ? cfq_add_rq_rb+0x98/0x110
[<c0852df8>] ? packet_rcv+0x48/0x350
[<c07c5839>] ? dev_queue_xmit_nit+0xc9/0x140
...
Fix it by splitting vmxnet3_parse_and_copy_hdr into two functions:
vmxnet3_parse_hdr, which sets up the internal/on stack ctx datastructure, and
pulls the skb (both of which can be done without holding the spinlock with irqs
disabled
and
vmxnet3_copy_header, which just copies the skb to the tx ring under the lock
safely.
tested and shown to correct the described problem. Applies cleanly to the head
of the net tree
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Shrikrishna Khare <skhare@vmware.com>
CC: "VMware, Inc." <pv-drivers@vmware.com>
CC: "David S. Miller" <davem@davemloft.net>
---
drivers/net/vmxnet3/vmxnet3_drv.c | 73 ++++++++++++++++++++++++---------------
1 file changed, 46 insertions(+), 27 deletions(-)
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 0cbf520..17589d8 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -814,7 +814,7 @@ vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
/*
- * parse and copy relevant protocol headers:
+ * parse relevant protocol headers:
* For a tso pkt, relevant headers are L2/3/4 including options
* For a pkt requesting csum offloading, they are L2/3 and may include L4
* if it's a TCP/UDP pkt
@@ -827,15 +827,14 @@ vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
* Other effects:
* 1. related *ctx fields are updated.
* 2. ctx->copy_size is # of bytes copied
- * 3. the portion copied is guaranteed to be in the linear part
+ * 3. the portion to be copied is guaranteed to be in the linear part
*
*/
static int
-vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
- struct vmxnet3_tx_ctx *ctx,
- struct vmxnet3_adapter *adapter)
+vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
+ struct vmxnet3_tx_ctx *ctx,
+ struct vmxnet3_adapter *adapter)
{
- struct Vmxnet3_TxDataDesc *tdd;
u8 protocol = 0;
if (ctx->mss) { /* TSO */
@@ -892,16 +891,34 @@ vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
return 0;
}
+ return 1;
+err:
+ return -1;
+}
+
+/*
+ * copy relevant protocol headers to the transmit ring:
+ * For a tso pkt, relevant headers are L2/3/4 including options
+ * For a pkt requesting csum offloading, they are L2/3 and may include L4
+ * if it's a TCP/UDP pkt
+ *
+ *
+ * Note that this requires that vmxnet3_parse_hdr be called first to set the
+ * appropriate bits in ctx first
+ */
+static void
+vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
+ struct vmxnet3_tx_ctx *ctx,
+ struct vmxnet3_adapter *adapter)
+{
+ struct Vmxnet3_TxDataDesc *tdd;
+
tdd = tq->data_ring.base + tq->tx_ring.next2fill;
memcpy(tdd->data, skb->data, ctx->copy_size);
netdev_dbg(adapter->netdev,
"copy %u bytes to dataRing[%u]\n",
ctx->copy_size, tq->tx_ring.next2fill);
- return 1;
-
-err:
- return -1;
}
@@ -998,22 +1015,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
}
}
- spin_lock_irqsave(&tq->tx_lock, flags);
-
- if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
- tq->stats.tx_ring_full++;
- netdev_dbg(adapter->netdev,
- "tx queue stopped on %s, next2comp %u"
- " next2fill %u\n", adapter->netdev->name,
- tq->tx_ring.next2comp, tq->tx_ring.next2fill);
-
- vmxnet3_tq_stop(tq, adapter);
- spin_unlock_irqrestore(&tq->tx_lock, flags);
- return NETDEV_TX_BUSY;
- }
-
-
- ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
+ ret = vmxnet3_parse_hdr(skb, tq, &ctx, adapter);
if (ret >= 0) {
BUG_ON(ret <= 0 && ctx.copy_size != 0);
/* hdrs parsed, check against other limits */
@@ -1033,9 +1035,26 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
}
} else {
tq->stats.drop_hdr_inspect_err++;
- goto unlock_drop_pkt;
+ goto drop_pkt;
+ }
+
+ spin_lock_irqsave(&tq->tx_lock, flags);
+
+ if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
+ tq->stats.tx_ring_full++;
+ netdev_dbg(adapter->netdev,
+ "tx queue stopped on %s, next2comp %u"
+ " next2fill %u\n", adapter->netdev->name,
+ tq->tx_ring.next2comp, tq->tx_ring.next2fill);
+
+ vmxnet3_tq_stop(tq, adapter);
+ spin_unlock_irqrestore(&tq->tx_lock, flags);
+ return NETDEV_TX_BUSY;
}
+
+ vmxnet3_copy_hdr(skb, tq, &ctx, adapter);
+
/* fill tx descs related to addr & len */
if (vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter))
goto unlock_drop_pkt;
--
2.5.0
next reply other threads:[~2016-03-04 18:41 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-04 18:40 Neil Horman [this message]
2016-03-04 23:13 ` [PATCH] vmxnet3: avoid calling pskb_may_pull with interrupts disabled Shrikrishna Khare
2016-03-07 20:16 ` David Miller
2016-03-08 14:31 ` Neil Horman
2016-03-11 13:22 ` Tetsuo Handa
2016-03-11 19:41 ` Shrikrishna Khare
2016-03-11 23:09 ` Neil Horman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1457116848-6753-1-git-send-email-nhorman@tuxdriver.com \
--to=nhorman@tuxdriver.com \
--cc=davem@davemloft.net \
--cc=netdev@vger.kernel.org \
--cc=pv-drivers@vmware.com \
--cc=skhare@vmware.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).