Netdev List
 help / color / mirror / Atom feed
* [PATCH v3 02/25] virtio: unify config_changed handling
From: Michael S. Tsirkin @ 2014-10-12 11:46 UTC (permalink / raw)
  To: linux-kernel
  Cc: kvm, Wolfram Sang, Heiko Carstens, Sudeep Dutt, virtualization,
	linux-s390, linux-scsi, Christian Borntraeger, v9fs-developer,
	Siva Yerramreddy, Martin Schwidefsky, Paolo Bonzini, netdev,
	Ashutosh Dixit, Greg Kroah-Hartman, Amit Shah, linux390,
	Andrew Morton, David S. Miller
In-Reply-To: <1413114332-626-1-git-send-email-mst@redhat.com>

Replace duplicated code in all transports with a single wrapper in
virtio.c.

The only functional change is in virtio_mmio.c: if a buggy device sends
us an interrupt before driver is set, we previously returned IRQ_NONE,
now we return IRQ_HANDLED.

As this must not happen in practice, this does not look like a big deal.

See also commit 3fff0179e33cd7d0a688dab65700c46ad089e934
	virtio-pci: do not oops on config change if driver not loaded.
for the original motivation behind the driver check.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 include/linux/virtio.h             | 2 ++
 drivers/misc/mic/card/mic_virtio.c | 6 +-----
 drivers/s390/kvm/kvm_virtio.c      | 9 +--------
 drivers/s390/kvm/virtio_ccw.c      | 6 +-----
 drivers/virtio/virtio.c            | 9 +++++++++
 drivers/virtio/virtio_mmio.c       | 7 ++-----
 drivers/virtio/virtio_pci.c        | 6 +-----
 7 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b46671e..3c19bd3 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -108,6 +108,8 @@ void unregister_virtio_device(struct virtio_device *dev);
 
 void virtio_break_device(struct virtio_device *dev);
 
+void virtio_config_changed(struct virtio_device *dev);
+
 /**
  * virtio_driver - operations for a virtio I/O driver
  * @driver: underlying device driver (populate name and owner).
diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c
index f14b600..e647947 100644
--- a/drivers/misc/mic/card/mic_virtio.c
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -462,16 +462,12 @@ static void mic_handle_config_change(struct mic_device_desc __iomem *d,
 	struct mic_device_ctrl __iomem *dc
 		= (void __iomem *)d + mic_aligned_desc_size(d);
 	struct mic_vdev *mvdev = (struct mic_vdev *)ioread64(&dc->vdev);
-	struct virtio_driver *drv;
 
 	if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED)
 		return;
 
 	dev_dbg(mdrv->dev, "%s %d\n", __func__, __LINE__);
-	drv = container_of(mvdev->vdev.dev.driver,
-				struct virtio_driver, driver);
-	if (drv->config_changed)
-		drv->config_changed(&mvdev->vdev);
+	virtio_config_changed(&mvdev->vdev);
 	iowrite8(1, &dc->guest_ack);
 }
 
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index a134965..6431290 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -406,15 +406,8 @@ static void kvm_extint_handler(struct ext_code ext_code,
 
 	switch (param) {
 	case VIRTIO_PARAM_CONFIG_CHANGED:
-	{
-		struct virtio_driver *drv;
-		drv = container_of(vq->vdev->dev.driver,
-				   struct virtio_driver, driver);
-		if (drv->config_changed)
-			drv->config_changed(vq->vdev);
-
+		virtio_config_changed(vq->vdev);
 		break;
-	}
 	case VIRTIO_PARAM_DEV_ADD:
 		schedule_work(&hotplug_work);
 		break;
diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c
index d2c0b44..6cbe6ef 100644
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c
@@ -940,11 +940,7 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev,
 		vring_interrupt(0, vq);
 	}
 	if (test_bit(0, &vcdev->indicators2)) {
-		drv = container_of(vcdev->vdev.dev.driver,
-				   struct virtio_driver, driver);
-
-		if (drv && drv->config_changed)
-			drv->config_changed(&vcdev->vdev);
+		virtio_config_changed(&vcdev->vdev);
 		clear_bit(0, &vcdev->indicators2);
 	}
 }
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index fed0ce1..3980687 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -239,6 +239,15 @@ void unregister_virtio_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(unregister_virtio_device);
 
+void virtio_config_changed(struct virtio_device *dev)
+{
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+
+	if (drv && drv->config_changed)
+		drv->config_changed(dev);
+}
+EXPORT_SYMBOL_GPL(virtio_config_changed);
+
 static int virtio_init(void)
 {
 	if (bus_register(&virtio_bus) != 0)
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index c600ccf..ef9a165 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -234,8 +234,6 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
 {
 	struct virtio_mmio_device *vm_dev = opaque;
 	struct virtio_mmio_vq_info *info;
-	struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
-			struct virtio_driver, driver);
 	unsigned long status;
 	unsigned long flags;
 	irqreturn_t ret = IRQ_NONE;
@@ -244,9 +242,8 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
 	status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
 	writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
 
-	if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)
-			&& vdrv && vdrv->config_changed) {
-		vdrv->config_changed(&vm_dev->vdev);
+	if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)) {
+		virtio_config_changed(&vm_dev->vdev);
 		ret = IRQ_HANDLED;
 	}
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index add40d0..f39f4e7 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -211,12 +211,8 @@ static bool vp_notify(struct virtqueue *vq)
 static irqreturn_t vp_config_changed(int irq, void *opaque)
 {
 	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_driver *drv;
-	drv = container_of(vp_dev->vdev.dev.driver,
-			   struct virtio_driver, driver);
 
-	if (drv && drv->config_changed)
-		drv->config_changed(&vp_dev->vdev);
+	virtio_config_changed(&vp_dev->vdev);
 	return IRQ_HANDLED;
 }
 
-- 
MST

^ permalink raw reply related

* [PATCH v3 01/25] virtio_pci: fix virtio spec compliance on restore
From: Michael S. Tsirkin @ 2014-10-12 11:46 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-s390, kvm, linux-scsi, Christian Borntraeger, netdev,
	stable, virtualization, Paolo Bonzini, Amit Shah, v9fs-developer,
	David S. Miller
In-Reply-To: <1413114332-626-1-git-send-email-mst@redhat.com>

On restore, virtio pci does the following:
+ set features
+ init vqs etc - device can be used at this point!
+ set ACKNOWLEDGE,DRIVER and DRIVER_OK status bits

This is in violation of the virtio spec, which
requires the following order:
- ACKNOWLEDGE
- DRIVER
- init vqs
- DRIVER_OK

This behaviour will break with hypervisors that assume spec compliant
behaviour.  It seems like a good idea to have this patch applied to
stable branches to reduce the support butden for the hypervisors.

Cc: stable@vger.kernel.org
Cc: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_pci.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 3d1463c..add40d0 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -789,6 +789,7 @@ static int virtio_pci_restore(struct device *dev)
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 	struct virtio_driver *drv;
+	unsigned status = 0;
 	int ret;
 
 	drv = container_of(vp_dev->vdev.dev.driver,
@@ -799,14 +800,40 @@ static int virtio_pci_restore(struct device *dev)
 		return ret;
 
 	pci_set_master(pci_dev);
+	/* We always start by resetting the device, in case a previous
+	 * driver messed it up. */
+	vp_reset(&vp_dev->vdev);
+
+	/* Acknowledge that we've seen the device. */
+	status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
+	vp_set_status(&vp_dev->vdev, status);
+
+	/* Maybe driver failed before freeze.
+	 * Restore the failed status, for debugging. */
+	status |= vp_dev->saved_status & VIRTIO_CONFIG_S_FAILED;
+	vp_set_status(&vp_dev->vdev, status);
+
+	if (!drv)
+		return 0;
+
+	/* We have a driver! */
+	status |= VIRTIO_CONFIG_S_DRIVER;
+	vp_set_status(&vp_dev->vdev, status);
+
 	vp_finalize_features(&vp_dev->vdev);
 
-	if (drv && drv->restore)
+	if (drv->restore) {
 		ret = drv->restore(&vp_dev->vdev);
+		if (ret) {
+			status |= VIRTIO_CONFIG_S_FAILED;
+			vp_set_status(&vp_dev->vdev, status);
+			return ret;
+		}
+	}
 
 	/* Finally, tell the device we're all set */
-	if (!ret)
-		vp_set_status(&vp_dev->vdev, vp_dev->saved_status);
+	status |= VIRTIO_CONFIG_S_DRIVER_OK;
+	vp_set_status(&vp_dev->vdev, status);
 
 	return ret;
 }
-- 
MST

^ permalink raw reply related

* [PATCH v3 25/25] virtio-rng: refactor probe error handling
From: Michael S. Tsirkin @ 2014-10-12 11:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Rusty Russell, virtualization, linux-scsi, linux-s390,
	v9fs-developer, netdev, kvm, Amit Shah, Cornelia Huck,
	Christian Borntraeger, David S. Miller, Paolo Bonzini,
	Matt Mackall, Herbert Xu, Amos Kong, Sasha Levin
In-Reply-To: <1413114332-626-1-git-send-email-mst@redhat.com>

Code like
	vi->vq = NULL;
	kfree(vi)
does not make sense.

Clean it up, use goto error labels for cleanup.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/char/hw_random/virtio-rng.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 132c9cc..72295ea 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -109,8 +109,8 @@ static int probe_common(struct virtio_device *vdev)
 
 	vi->index = index = ida_simple_get(&rng_index_ida, 0, 0, GFP_KERNEL);
 	if (index < 0) {
-		kfree(vi);
-		return index;
+		err = index;
+		goto err_ida;
 	}
 	sprintf(vi->name, "virtio_rng.%d", index);
 	init_completion(&vi->have_data);
@@ -128,13 +128,16 @@ static int probe_common(struct virtio_device *vdev)
 	vi->vq = virtio_find_single_vq(vdev, random_recv_done, "input");
 	if (IS_ERR(vi->vq)) {
 		err = PTR_ERR(vi->vq);
-		vi->vq = NULL;
-		kfree(vi);
-		ida_simple_remove(&rng_index_ida, index);
-		return err;
+		goto err_find;
 	}
 
 	return 0;
+
+err_find:
+	ida_simple_remove(&rng_index_ida, index);
+err_ida:
+	kfree(vi);
+	return err;
 }
 
 static void remove_common(struct virtio_device *vdev)
-- 
MST

^ permalink raw reply related

* [PATCH v3 00/25] virtio: fix spec compliance issues
From: Michael S. Tsirkin @ 2014-10-12 11:46 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-s390, kvm, linux-scsi, Christian Borntraeger, netdev,
	virtualization, Paolo Bonzini, Amit Shah, v9fs-developer,
	David S. Miller

Rusty, please review this, and consider for this merge window.

This fixes the following virtio spec compliance issues:
1. on restore, drivers use device before setting ACKNOWLEDGE and DRIVER bits
2. on probe, drivers aren't prepared to handle config interrupts
   arriving before probe returns
3. on probe, drivers use device before DRIVER_OK it set

Note that 1 is a clear violation of virtio spec 0.9 and 1.0,
so I am proposing the fix for stable. OTOH 2 is against 1.0 rules but
is a known documented bug in many drivers, so let's fix it going
forward, but it does not seem to be worth it to backport
the changes.

An error handling bugfix for virtio-net and a fix for a
theoretical race condition in virtio scsi is also included.

2 is merely a theoretical race condition, but it seems important
to address to make sure that changes to address 3 do not introduce
instability.

I also included patch to drop scan callback use from virtio
scsi: using this callback creates asymmetry between probe
and resume, and we actually had to add code comments so
people can figure out the flow. Code flow becomes clearer
with the new API.

After this change, the only user of the scan callback is virtio rng.
It's possible to modify it trivially and then drop scan callback
from core, but I'm rather inclined to look at ways to
make some rng core changes so that we don't need to
have so many variables tracking device state.
So this is deferred for now.

Michael S. Tsirkin (24):
  virtio_pci: fix virtio spec compliance on restore
  virtio: unify config_changed handling
  virtio-pci: move freeze/restore to virtio core
  virtio: defer config changed notifications
  virtio_blk: drop config_enable
  virtio-blk: drop config_mutex
  virtio_net: drop config_enable
  virtio-net: drop config_mutex
  virtio_net: minor cleanup
  virtio: add API to enable VQs early
  virtio_net: enable VQs early
  virtio_blk: enable VQs early
  virtio_console: enable VQs early
  9p/trans_virtio: enable VQs early
  virtio_net: fix use after free on allocation failure
  virtio_scsi: move kick event out from virtscsi_init
  virtio_blk: enable VQs early on restore
  virtio_scsi: enable VQs early on restore
  virtio_console: enable VQs early on restore
  virtio_net: enable VQs early on restore
  virtio_scsi: fix race on device removal
  virtio_balloon: enable VQs early on restore
  virtio_scsi: drop scan callback
  virtio-rng: refactor probe error handling

Paolo Bonzini (1):
  virito_scsi: use freezable WQ for events

 include/linux/virtio.h              |  14 +++++
 include/linux/virtio_config.h       |  17 ++++++
 drivers/block/virtio_blk.c          |  40 ++++----------
 drivers/char/hw_random/virtio-rng.c |  15 +++---
 drivers/char/virtio_console.c       |   4 ++
 drivers/misc/mic/card/mic_virtio.c  |   6 +--
 drivers/net/virtio_net.c            |  44 +++++-----------
 drivers/s390/kvm/kvm_virtio.c       |   9 +---
 drivers/s390/kvm/virtio_ccw.c       |   6 +--
 drivers/scsi/virtio_scsi.c          |  42 +++++++++------
 drivers/virtio/virtio.c             | 102 ++++++++++++++++++++++++++++++++++++
 drivers/virtio/virtio_balloon.c     |   2 +
 drivers/virtio/virtio_mmio.c        |   7 +--
 drivers/virtio/virtio_pci.c         |  33 ++----------
 net/9p/trans_virtio.c               |   2 +
 15 files changed, 206 insertions(+), 137 deletions(-)

-- 
MST

^ permalink raw reply

* [PATCH v3 21/25] virito_scsi: use freezable WQ for events
From: Michael S. Tsirkin @ 2014-10-12 11:48 UTC (permalink / raw)
  To: linux-kernel
  Cc: Rusty Russell, virtualization, linux-scsi, linux-s390,
	v9fs-developer, netdev, kvm, Amit Shah, Cornelia Huck,
	Christian Borntraeger, David S. Miller, Paolo Bonzini,
	James E.J. Bottomley
In-Reply-To: <1413114332-626-1-git-send-email-mst@redhat.com>

From: Paolo Bonzini <pbonzini@redhat.com>

Michael S. Tsirkin noticed a race condition:
we reset device on freeze, but system WQ is still
running so it might try adding bufs to a VQ meanwhile.

To fix, switch to handling events from the freezable WQ.

Reported-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/scsi/virtio_scsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 2b565b3..501838d 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -390,7 +390,7 @@ static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf)
 {
 	struct virtio_scsi_event_node *event_node = buf;
 
-	schedule_work(&event_node->work);
+	queue_work(system_freezable_wq, &event_node->work);
 }
 
 static void virtscsi_event_done(struct virtqueue *vq)
-- 
MST

^ permalink raw reply related

* [PATCH v3 17/25] virtio_blk: enable VQs early on restore
From: Michael S. Tsirkin @ 2014-10-12 11:47 UTC (permalink / raw)
  To: linux-kernel
  Cc: Rusty Russell, virtualization, linux-scsi, linux-s390,
	v9fs-developer, netdev, kvm, Amit Shah, Cornelia Huck,
	Christian Borntraeger, David S. Miller, Paolo Bonzini
In-Reply-To: <1413114332-626-1-git-send-email-mst@redhat.com>

virtio spec requires drivers to set DRIVER_OK before using VQs.
This is set automatically after restore returns, virtio block violated
this rule on restore by restarting queues, which might in theory
cause the VQ to be used directly within restore.

To fix, call virtio_enable_vqs_early before using starting queues.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/block/virtio_blk.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 46b04bf..1c95af5 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -804,10 +804,13 @@ static int virtblk_restore(struct virtio_device *vdev)
 	int ret;
 
 	ret = init_vq(vdev->priv);
-	if (!ret)
-		blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
+	if (ret)
+		return ret;
+
+	virtio_enable_vqs_early(vdev);
 
-	return ret;
+	blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
+	return 0;
 }
 #endif
 
-- 
MST

^ permalink raw reply related

* [PATCH v3 16/25] virtio_scsi: move kick event out from virtscsi_init
From: Michael S. Tsirkin @ 2014-10-12 11:47 UTC (permalink / raw)
  To: linux-kernel
  Cc: Rusty Russell, virtualization, linux-scsi, linux-s390,
	v9fs-developer, netdev, kvm, Amit Shah, Cornelia Huck,
	Christian Borntraeger, David S. Miller, Paolo Bonzini,
	James E.J. Bottomley
In-Reply-To: <1413114332-626-1-git-send-email-mst@redhat.com>

We currently kick event within virtscsi_init,
before host is fully initialized.

This can in theory confuse guest if device
consumes the buffers immediately.

To fix,  move virtscsi_kick_event_all out to scan/restore.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/scsi/virtio_scsi.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index eee1bc0..0642ce3 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -853,7 +853,11 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
 
 static void virtscsi_scan(struct virtio_device *vdev)
 {
-	struct Scsi_Host *shost = (struct Scsi_Host *)vdev->priv;
+	struct Scsi_Host *shost = virtio_scsi_host(vdev);
+	struct virtio_scsi *vscsi = shost_priv(shost);
+
+	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
+		virtscsi_kick_event_all(vscsi);
 
 	scsi_scan_host(shost);
 }
@@ -916,9 +920,6 @@ static int virtscsi_init(struct virtio_device *vdev,
 	virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE);
 	virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE);
 
-	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
-		virtscsi_kick_event_all(vscsi);
-
 	err = 0;
 
 out:
@@ -1048,8 +1049,13 @@ static int virtscsi_restore(struct virtio_device *vdev)
 		return err;
 
 	err = register_hotcpu_notifier(&vscsi->nb);
-	if (err)
+	if (err) {
 		vdev->config->del_vqs(vdev);
+		return err;
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
+		virtscsi_kick_event_all(vscsi);
 
 	return err;
 }
-- 
MST

^ permalink raw reply related

* [PATCH v3 13/25] virtio_console: enable VQs early
From: Michael S. Tsirkin @ 2014-10-12 11:47 UTC (permalink / raw)
  To: linux-kernel
  Cc: Rusty Russell, virtualization, linux-scsi, linux-s390,
	v9fs-developer, netdev, kvm, Amit Shah, Cornelia Huck,
	Christian Borntraeger, David S. Miller, Paolo Bonzini,
	Arnd Bergmann, Greg Kroah-Hartman
In-Reply-To: <1413114332-626-1-git-send-email-mst@redhat.com>

virtio spec requires drivers to set DRIVER_OK before using VQs.
This is set automatically after probe returns, virtio console violated this
rule by adding inbufs, which causes the VQ to be used directly within
probe.

To fix, call virtio_enable_vqs_early before using VQs.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/char/virtio_console.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index b585b47..6ebe8f6 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1449,6 +1449,8 @@ static int add_port(struct ports_device *portdev, u32 id)
 	spin_lock_init(&port->outvq_lock);
 	init_waitqueue_head(&port->waitqueue);
 
+	virtio_enable_vqs_early(portdev->vdev);
+
 	/* Fill the in_vq with buffers so the host can send us data. */
 	nr_added_bufs = fill_queue(port->in_vq, &port->inbuf_lock);
 	if (!nr_added_bufs) {
-- 
MST

^ permalink raw reply related

* Re: IPv6 FIB related crash with MACVLANs in 3.9.11+ kernel.
From: Vladislav Yasevich @ 2014-10-12 11:42 UTC (permalink / raw)
  To: Ben Greear; +Cc: Cong Wang, Hannes Frederic Sowa, Hongmei Li, netdev
In-Reply-To: <5429CE26.6000108@candelatech.com>

On Mon, Sep 29, 2014 at 5:24 PM, Ben Greear <greearb@candelatech.com> wrote:
> On 09/29/2014 01:39 PM, Cong Wang wrote:
>> On Mon, Sep 29, 2014 at 12:48 PM, Ben Greear <greearb@candelatech.com> wrote:
>>>
>>> We are going to be running up to 20 concurrent scripts that
>>> will be dumping routes & ips, and configuring ip addresses
>>> and routes during bringup.
>>>
>>> I don't have a simple stand-alone way to reproduce this,
>>> but at least when we reported the problem is was very easy
>>> for us to reproduce with our tool.
>>>
>>> Maybe 20 scripts running in parallel that randomly configured and dumped routes
>>> and ip addresses on random interfaces would do the trick?
>>>
>>
>> I think the most important question is why is this related with macvlan?
>> IPv6 is L3 while macvlan pure L2, if this is a IPv6 routing bug, it should
>> not be limited to macvlan.
>
> We saw it using mac-vlans on top of ixgbe.  Probably it can be reproduced
> elsewhere, but since we had a good test case, we didn't bother trying lots
> of other combinations.
>
> Just enabling some debug code caused the problem to be much harder
> to reproduce, so it is probably some sort of race.  Maybe lots of mac-vlans
> make it easier to hit.
>
>> What does your IPv6 routing table look like? And how do you configure those
>> macvlan interfaces?
>
> Each interface would have it's own routing table, with at least as subnet
> route.  I'm not sure we were adding a default gateway or not.
>
> The main routing table would also have some auto-created routes
> I think.
>
> It is configured using 'ip' to add and dump routes.
>

Ben

Just saw this thread.  Can you check if this commit makes a difference:
commit 40b8fe45d1f094e3babe7b2dc2b71557ab71401d
Author: Vlad Yasevich <vyasevich@gmail.com>
Date:   Mon Sep 22 16:34:17 2014 -0400

    macvtap: Fix race between device delete and open.

Thanks
-vlad

> Thanks,
> Ben
>
> --
> Ben Greear <greearb@candelatech.com>
> Candela Technologies Inc  http://www.candelatech.com
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH v2] brcmfmac: dhd_sdio.c:  Cleaning up missing null-terminate in conjunction with strncpy
From: Rickard Strandqvist @ 2014-10-12 11:42 UTC (permalink / raw)
  To: Brett Rudley, Arend van Spriel, linville-2XuSBdqkA4R54TAoqtyWWQ
  Cc: Rickard Strandqvist, Hante Meuleman, Pieter-Paul Giesberts,
	Daniel Kim, linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	brcm80211-dev-list-dY08KVG/lbpWk0Htik3J/w,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1413114134-18044-1-git-send-email-rickard_strandqvist-IW2WV5XWFqGZkjO+N0TKoMugMpMbD5Xr@public.gmane.org>

Replacing strncpy with strlcpy to avoid strings that lacks null terminate.
And changed from using strncat to strlcat to simplify code.

Signed-off-by: Rickard Strandqvist <rickard_strandqvist-IW2WV5XWFqGZkjO+N0TKoMugMpMbD5Xr@public.gmane.org>
---
 drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c |   25 ++++++++++----------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
index f55f625..d20d4e6 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
@@ -670,7 +670,6 @@ static int brcmf_sdio_get_fwnames(struct brcmf_chip *ci,
 				  struct brcmf_sdio_dev *sdiodev)
 {
 	int i;
-	uint fw_len, nv_len;
 	char end;
 
 	for (i = 0; i < ARRAY_SIZE(brcmf_fwname_data); i++) {
@@ -684,25 +683,25 @@ static int brcmf_sdio_get_fwnames(struct brcmf_chip *ci,
 		return -ENODEV;
 	}
 
-	fw_len = sizeof(sdiodev->fw_name) - 1;
-	nv_len = sizeof(sdiodev->nvram_name) - 1;
 	/* check if firmware path is provided by module parameter */
 	if (brcmf_firmware_path[0] != '\0') {
-		strncpy(sdiodev->fw_name, brcmf_firmware_path, fw_len);
-		strncpy(sdiodev->nvram_name, brcmf_firmware_path, nv_len);
-		fw_len -= strlen(sdiodev->fw_name);
-		nv_len -= strlen(sdiodev->nvram_name);
+		strlcpy(sdiodev->fw_name, brcmf_firmware_path,
+			sizeof(sdiodev->fw_name));
+		strlcpy(sdiodev->nvram_name, brcmf_firmware_path,
+			sizeof(sdiodev->nvram_name));
 
 		end = brcmf_firmware_path[strlen(brcmf_firmware_path) - 1];
 		if (end != '/') {
-			strncat(sdiodev->fw_name, "/", fw_len);
-			strncat(sdiodev->nvram_name, "/", nv_len);
-			fw_len--;
-			nv_len--;
+			strlcat(sdiodev->fw_name, "/",
+				sizeof(sdiodev->fw_name));
+			strlcat(sdiodev->nvram_name, "/",
+				sizeof(sdiodev->nvram_name));
 		}
 	}
-	strncat(sdiodev->fw_name, brcmf_fwname_data[i].bin, fw_len);
-	strncat(sdiodev->nvram_name, brcmf_fwname_data[i].nv, nv_len);
+	strlcat(sdiodev->fw_name, brcmf_fwname_data[i].bin,
+		sizeof(sdiodev->fw_name));
+	strlcat(sdiodev->nvram_name, brcmf_fwname_data[i].nv,
+		sizeof(sdiodev->nvram_name));
 
 	return 0;
 }
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v2] brcmfmac: dhd_sdio.c:  Cleaning up missing null-terminate in conjunction with strncpy
From: Rickard Strandqvist @ 2014-10-12 11:42 UTC (permalink / raw)
  To: Brett Rudley, Arend van Spriel, linville
  Cc: Rickard Strandqvist, Hante Meuleman, Pieter-Paul Giesberts,
	Daniel Kim, linux-wireless, brcm80211-dev-list, netdev,
	linux-kernel

Now when I did the patch again I just want to ensure that this is not any other errors.

Although strncpy use before was wrong, it did filled the remaining part of the string with null
characters. This is not something that is needed?


And a part that is not obvious right, so check.
When brcmf_firmware_path[0] == '\0'   we do not want to do like.

if (brcmf_firmware_path[0] != '\0') {
...
} else {
  sdiodev->fw_name[0] = '\0';
  sdiodev->nvram_name[0] = '\0';
}


Kind regards
Rickard Strandqvist


Rickard Strandqvist (1):
  net: wireless: brcm80211: brcmfmac: dhd_sdio.c:  Cleaning up missing null-terminate in conjunction with
    strncpy

 drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c |   25 ++++++++++----------
 1 file changed, 12 insertions(+), 13 deletions(-)

-- 
1.7.10.4

^ permalink raw reply

* Re: [PATCH] net: wireless: brcm80211: brcmfmac: dhd_sdio.c: Cleaning up missing null-terminate in conjunction with strncpy
From: Rickard Strandqvist @ 2014-10-12 11:05 UTC (permalink / raw)
  To: Arend van Spriel
  Cc: Brett Rudley, Hante Meuleman, John W. Linville,
	Pieter-Paul Giesberts, Daniel Kim,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	brcm80211-dev-list-dY08KVG/lbpWk0Htik3J/w, Network Development,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <543A36A0.8090300-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>

2014-10-12 10:06 GMT+02:00 Arend van Spriel <arend-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>:
> On 12-10-14 01:52, Rickard Strandqvist wrote:
>> Replacing strncpy with strlcpy to avoid strings that lacks null terminate.
>> And changed from using strncpy to strlcpy to simplify code.
>
> Looks good to me. Just two small process related remarks:
>
> - It is sufficient to prefix the patch with brcmfmac (skip net:...).
> - Send the patch to the wireless maintainer, ie. John Linville.

Hi

Sure, no problem.
Sending a new patch in a moment...

Kind regards
Rickard Strandqvist
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH V1 net-next 1/2] pgtable: Add API to query if write combining is available
From: Moshe Lazer @ 2014-10-12  9:54 UTC (permalink / raw)
  To: David Miller; +Cc: ogerlitz, jackm, talal, yevgenyp, netdev, amirv, moshel
In-Reply-To: <20141008.122450.1919301744382855426.davem@davemloft.net>


On 10/8/2014 7:24 PM, David Miller wrote:
> From: Moshe Lazer <moshel@dev.mellanox.co.il>
> Date: Wed, 08 Oct 2014 11:44:57 +0300
>
>>> #if defined(__i386__) || defined(__x86_64__)
>>> 	if (map->type == _DRM_REGISTERS && !(map->flags & _DRM_WRITE_COMBINING))
>>> 		tmp = pgprot_noncached(tmp);
>>> 	else
>>> 		tmp = pgprot_writecombine(tmp);
>>> #elif defined(__powerpc__)
>>> 	pgprot_val(tmp) |= _PAGE_NO_CACHE;
>>> 	if (map->type == _DRM_REGISTERS)
>>> 		pgprot_val(tmp) |= _PAGE_GUARDED;
>>> #elif defined(__ia64__)
>>> 	if (efi_range_is_wc(vma->vm_start, vma->vm_end -
>>> 				    vma->vm_start))
>>> 		tmp = pgprot_writecombine(tmp);
>>> 	else
>>> 		tmp = pgprot_noncached(tmp);
>>> #elif defined(__sparc__) || defined(__arm__) || defined(__mips__)
>>> 	tmp = pgprot_noncached(tmp);
>>> #endif
>> The idea was to provide an indication as for whether the arch supports
>> write-combining in general.
>> If we want to benefit from blue flame operations, we need to map the
>> blue flame registers as write combining - otherwise there is no
>> benefit. So we would like to know if write combining is supported by
>> the system or not.
>>
> You completely miss my point.  On a given architectuire it might be
> _illegal_ to map certain address ranges as write-combining without
> checks like the ones above that ia64 needs.
>
> Therefore your proposed interface is by definition insufficient.
Thanks David, I'll try to clarify my point.
For me the writecombine_available() is a way to know if the 
pgprot_writecombine() is effective or just cover call to the 
pgprot_noncached().
I want to use the writecombine_available() regardless to the mapping 
address.
For example in mlx4 query_device I want to indicate that blue-flame is 
not supported if  `writecombine_available() ==  false`.
In this case we don't have the mapping address yet.

Later on if an arch has write-combining (writecombine_available() ==  
true) when we try to map the blue-flame registers (in mlx4_ib_mmap):

     vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);

     if (io_remap_pfn_range(vma, vma->vm_start,
         to_mucontext(context)->uar.pfn +
         dev->dev->caps.num_uars,
         PAGE_SIZE, vma->vm_page_prot))
             return -EAGAIN;

I can be sure that pgprot_writecombine() is not a cover for 
pgprot_noncached().
The address checks that you mentioned should be part of 
io_remap_pfn_range, this function should fail if the vma->vm_start is 
not compatible to the vma->vm_page_prot.
Please let me know if I misunderstood something.

^ permalink raw reply

* Re: [PATCH net-next RFC 1/3] virtio: support for urgent descriptors
From: Michael S. Tsirkin @ 2014-10-12  9:27 UTC (permalink / raw)
  To: Jason Wang; +Cc: kvm, netdev, linux-kernel, virtualization, linux-api
In-Reply-To: <1413011806-3813-2-git-send-email-jasowang@redhat.com>

On Sat, Oct 11, 2014 at 03:16:44PM +0800, Jason Wang wrote:
> Below should be useful for some experiments Jason is doing.
> I thought I'd send it out for early review/feedback.
> 
> event idx feature allows us to defer interrupts until
> a specific # of descriptors were used.
> Sometimes it might be useful to get an interrupt after
> a specific descriptor, regardless.
> This adds a descriptor flag for this, and an API
> to create an urgent output descriptor.
> This is still an RFC:
> we'll need a feature bit for drivers to detect this,
> but we've run out of feature bits for virtio 0.X.
> For experimentation purposes, drivers can assume
> this is set, or add a driver-specific feature bit.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> Signed-off-by: Jason Wang <jasowang@redhat.com>

I see that as compared to my original patch, you have
added a new flag: VRING_AVAIL_F_NO_URGENT_INTERRUPT
I don't think it's necessary, see below.

As such, I think this patch should be split:
- original patch adding support for urgent descriptors
- a patch adding virtqueue_enable/disable_cb_urgent(_prepare)?




> ---
>  drivers/virtio/virtio_ring.c     | 75 +++++++++++++++++++++++++++++++++++++---
>  include/linux/virtio.h           | 14 ++++++++
>  include/uapi/linux/virtio_ring.h |  5 ++-
>  3 files changed, 89 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 4d08f45a..a5188c6 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -115,6 +115,7 @@ static inline struct scatterlist *sg_next_arr(struct scatterlist *sg,
>  
>  /* Set up an indirect table of descriptors and add it to the queue. */
>  static inline int vring_add_indirect(struct vring_virtqueue *vq,
> +				     bool urgent,
>  				     struct scatterlist *sgs[],
>  				     struct scatterlist *(*next)
>  				       (struct scatterlist *, unsigned int *),
> @@ -173,6 +174,8 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq,
>  	/* Use a single buffer which doesn't continue */
>  	head = vq->free_head;
>  	vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
> +	if (urgent)
> +		vq->vring.desc[head].flags |= VRING_DESC_F_URGENT;
>  	vq->vring.desc[head].addr = virt_to_phys(desc);
>  	/* kmemleak gives a false positive, as it's hidden by virt_to_phys */
>  	kmemleak_ignore(desc);
> @@ -185,6 +188,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq,
>  }
>  
>  static inline int virtqueue_add(struct virtqueue *_vq,
> +			        bool urgent,
>  				struct scatterlist *sgs[],
>  				struct scatterlist *(*next)
>  				  (struct scatterlist *, unsigned int *),
> @@ -227,7 +231,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
>  	/* If the host supports indirect descriptor tables, and we have multiple
>  	 * buffers, then go indirect. FIXME: tune this threshold */
>  	if (vq->indirect && total_sg > 1 && vq->vq.num_free) {
> -		head = vring_add_indirect(vq, sgs, next, total_sg, total_out,
> +		head = vring_add_indirect(vq, urgent, sgs, next, total_sg, total_out,
>  					  total_in,
>  					  out_sgs, in_sgs, gfp);
>  		if (likely(head >= 0))
> @@ -256,6 +260,10 @@ static inline int virtqueue_add(struct virtqueue *_vq,
>  	for (n = 0; n < out_sgs; n++) {
>  		for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
>  			vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
> +			if (urgent) {
> +				vq->vring.desc[head].flags |= VRING_DESC_F_URGENT;
> +				urgent = false;
> +			}
>  			vq->vring.desc[i].addr = sg_phys(sg);
>  			vq->vring.desc[i].len = sg->length;
>  			prev = i;
> @@ -265,6 +273,10 @@ static inline int virtqueue_add(struct virtqueue *_vq,
>  	for (; n < (out_sgs + in_sgs); n++) {
>  		for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
>  			vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
> +			if (urgent) {
> +				vq->vring.desc[head].flags |= VRING_DESC_F_URGENT;
> +				urgent = false;
> +			}
>  			vq->vring.desc[i].addr = sg_phys(sg);
>  			vq->vring.desc[i].len = sg->length;
>  			prev = i;
> @@ -305,6 +317,8 @@ add_head:
>  
>  /**
>   * virtqueue_add_sgs - expose buffers to other end
> + * @urgent: in case virtqueue_enable_cb_delayed was called, cause an interrupt
> + *          after this descriptor was completed
>   * @vq: the struct virtqueue we're talking about.
>   * @sgs: array of terminated scatterlists.
>   * @out_num: the number of scatterlists readable by other side
> @@ -337,7 +351,7 @@ int virtqueue_add_sgs(struct virtqueue *_vq,
>  		for (sg = sgs[i]; sg; sg = sg_next(sg))
>  			total_in++;
>  	}
> -	return virtqueue_add(_vq, sgs, sg_next_chained,
> +	return virtqueue_add(_vq, false, sgs, sg_next_chained,
>  			     total_out, total_in, out_sgs, in_sgs, data, gfp);
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
> @@ -360,11 +374,35 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
>  			 void *data,
>  			 gfp_t gfp)
>  {
> -	return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp);
> +	return virtqueue_add(vq, false, &sg, sg_next_arr, num, 0, 1, 0, data, gfp);
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
>  
>  /**
> + * virtqueue_add_outbuf - expose output buffers to other end
> + *          in case virtqueue_enable_cb_delayed was called, cause an interrupt
> + *          after this descriptor was completed
> + * @vq: the struct virtqueue we're talking about.
> + * @sgs: array of scatterlists (need not be terminated!)
> + * @num: the number of scatterlists readable by other side
> + * @data: the token identifying the buffer.
> + * @gfp: how to do memory allocations (if necessary).
> + *
> + * Caller must ensure we don't call this with other virtqueue operations
> + * at the same time (except where noted).
> + *
> + * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
> + */
> +int virtqueue_add_outbuf_urgent(struct virtqueue *vq,
> +			 struct scatterlist sg[], unsigned int num,
> +			 void *data,
> +			 gfp_t gfp)
> +{
> +	return virtqueue_add(vq, true, &sg, sg_next_arr, num, 0, 1, 0, data, gfp);
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_add_outbuf_urgent);
> +
> +/**
>   * virtqueue_add_inbuf - expose input buffers to other end
>   * @vq: the struct virtqueue we're talking about.
>   * @sgs: array of scatterlists (need not be terminated!)
> @@ -382,7 +420,7 @@ int virtqueue_add_inbuf(struct virtqueue *vq,
>  			void *data,
>  			gfp_t gfp)
>  {
> -	return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp);
> +	return virtqueue_add(vq, false, &sg, sg_next_arr, 0, num, 0, 1, data, gfp);
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
>  
> @@ -595,6 +633,14 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
>  
> +void virtqueue_disable_cb_urgent(struct virtqueue *_vq)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +
> +	vq->vring.avail->flags |= VRING_AVAIL_F_NO_URGENT_INTERRUPT;
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_disable_cb_urgent);
> +
>  /**
>   * virtqueue_enable_cb_prepare - restart callbacks after disable_cb
>   * @vq: the struct virtqueue we're talking about.
> @@ -626,6 +672,19 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
>  
> +unsigned virtqueue_enable_cb_prepare_urgent(struct virtqueue *_vq)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +	u16 last_used_idx;
> +
> +	START_USE(vq);
> +	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_URGENT_INTERRUPT;
> +	last_used_idx = vq->last_used_idx;
> +	END_USE(vq);
> +	return last_used_idx;
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare_urgent);
> +

You can implement virtqueue_enable_cb_prepare_urgent
simply by clearing ~VRING_AVAIL_F_NO_INTERRUPT.

The effect is same: host sends interrupts only if there
is an urgent descriptor.

>  /**
>   * virtqueue_poll - query pending used buffers
>   * @vq: the struct virtqueue we're talking about.
> @@ -662,6 +721,14 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
>  
> +bool virtqueue_enable_cb_urgent(struct virtqueue *_vq)
> +{
> +	unsigned last_used_idx = virtqueue_enable_cb_prepare_urgent(_vq);
> +
> +	return !virtqueue_poll(_vq, last_used_idx);
> +}
> +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_urgent);
> +
>  /**
>   * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
>   * @vq: the struct virtqueue we're talking about.
> diff --git a/include/linux/virtio.h b/include/linux/virtio.h
> index b46671e..68be5f2 100644
> --- a/include/linux/virtio.h
> +++ b/include/linux/virtio.h
> @@ -39,6 +39,12 @@ int virtqueue_add_outbuf(struct virtqueue *vq,
>  			 void *data,
>  			 gfp_t gfp);
>  
> +int virtqueue_add_outbuf_urgent(struct virtqueue *vq,
> +				struct scatterlist sg[], unsigned int num,
> +				void *data,
> +				gfp_t gfp);
> +
> +
>  int virtqueue_add_inbuf(struct virtqueue *vq,
>  			struct scatterlist sg[], unsigned int num,
>  			void *data,
> @@ -61,10 +67,18 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
>  
>  void virtqueue_disable_cb(struct virtqueue *vq);
>  
> +void virtqueue_disable_cb_urgent(struct virtqueue *vq);
> +
>  bool virtqueue_enable_cb(struct virtqueue *vq);
>  
> +bool virtqueue_enable_cb_urgent(struct virtqueue *vq);
> +
> +bool virtqueue_enable_cb_urgent(struct virtqueue *vq);
> +
>  unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq);
>  
> +unsigned virtqueue_enable_cb_prepare_urgent(struct virtqueue *vq);
> +
>  bool virtqueue_poll(struct virtqueue *vq, unsigned);
>  
>  bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
> diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h
> index a99f9b7..daf5bb0 100644
> --- a/include/uapi/linux/virtio_ring.h
> +++ b/include/uapi/linux/virtio_ring.h
> @@ -39,6 +39,9 @@
>  #define VRING_DESC_F_WRITE	2
>  /* This means the buffer contains a list of buffer descriptors. */
>  #define VRING_DESC_F_INDIRECT	4
> +/* This means the descriptor should cause an interrupt
> + * ignoring avail event idx. */
> +#define VRING_DESC_F_URGENT	8
>  
>  /* The Host uses this in used->flags to advise the Guest: don't kick me when
>   * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
> @@ -48,7 +51,7 @@
>   * when you consume a buffer.  It's unreliable, so it's simply an
>   * optimization.  */
>  #define VRING_AVAIL_F_NO_INTERRUPT	1
> -
> +#define VRING_AVAIL_F_NO_URGENT_INTERRUPT	2
>  /* We support indirect buffer descriptors */
>  #define VIRTIO_RING_F_INDIRECT_DESC	28
>  
> -- 
> 1.8.3.1

^ permalink raw reply

* Re: [PATCH] net: wireless: brcm80211: brcmfmac: dhd_sdio.c:  Cleaning up missing null-terminate in conjunction with strncpy
From: Arend van Spriel @ 2014-10-12  8:06 UTC (permalink / raw)
  To: Rickard Strandqvist
  Cc: Brett Rudley, Hante Meuleman, John W. Linville,
	Pieter-Paul Giesberts, Daniel Kim, linux-wireless,
	brcm80211-dev-list, netdev, linux-kernel
In-Reply-To: <1413071551-15372-1-git-send-email-rickard_strandqvist@spectrumdigital.se>

On 12-10-14 01:52, Rickard Strandqvist wrote:
> Replacing strncpy with strlcpy to avoid strings that lacks null terminate.
> And changed from using strncpy to strlcpy to simplify code.

Looks good to me. Just two small process related remarks:

- It is sufficient to prefix the patch with brcmfmac (skip net:...).
- Send the patch to the wireless maintainer, ie. John Linville.

Acked-by: Arend van Spriel <arend@broadcom.com>
> Signed-off-by: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
> ---
>  drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c |   25 ++++++++++----------
>  1 file changed, 12 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
> index f55f625..d20d4e6 100644
> --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
> +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
> @@ -670,7 +670,6 @@ static int brcmf_sdio_get_fwnames(struct brcmf_chip *ci,
>  				  struct brcmf_sdio_dev *sdiodev)
>  {
>  	int i;
> -	uint fw_len, nv_len;
>  	char end;
>  
>  	for (i = 0; i < ARRAY_SIZE(brcmf_fwname_data); i++) {
> @@ -684,25 +683,25 @@ static int brcmf_sdio_get_fwnames(struct brcmf_chip *ci,
>  		return -ENODEV;
>  	}
>  
> -	fw_len = sizeof(sdiodev->fw_name) - 1;
> -	nv_len = sizeof(sdiodev->nvram_name) - 1;
>  	/* check if firmware path is provided by module parameter */
>  	if (brcmf_firmware_path[0] != '\0') {
> -		strncpy(sdiodev->fw_name, brcmf_firmware_path, fw_len);
> -		strncpy(sdiodev->nvram_name, brcmf_firmware_path, nv_len);
> -		fw_len -= strlen(sdiodev->fw_name);
> -		nv_len -= strlen(sdiodev->nvram_name);
> +		strlcpy(sdiodev->fw_name, brcmf_firmware_path,
> +			sizeof(sdiodev->fw_name));
> +		strlcpy(sdiodev->nvram_name, brcmf_firmware_path,
> +			sizeof(sdiodev->nvram_name));
>  
>  		end = brcmf_firmware_path[strlen(brcmf_firmware_path) - 1];
>  		if (end != '/') {
> -			strncat(sdiodev->fw_name, "/", fw_len);
> -			strncat(sdiodev->nvram_name, "/", nv_len);
> -			fw_len--;
> -			nv_len--;
> +			strlcat(sdiodev->fw_name, "/",
> +				sizeof(sdiodev->fw_name));
> +			strlcat(sdiodev->nvram_name, "/",
> +				sizeof(sdiodev->nvram_name));
>  		}
>  	}
> -	strncat(sdiodev->fw_name, brcmf_fwname_data[i].bin, fw_len);
> -	strncat(sdiodev->nvram_name, brcmf_fwname_data[i].nv, nv_len);
> +	strlcat(sdiodev->fw_name, brcmf_fwname_data[i].bin,
> +		sizeof(sdiodev->fw_name));
> +	strlcat(sdiodev->nvram_name, brcmf_fwname_data[i].nv,
> +		sizeof(sdiodev->nvram_name));
>  
>  	return 0;
>  }
> 

^ permalink raw reply

* Re: [PATCH net 2/3] net: sctp: fix panic on duplicate ASCONF chunks
From: Vladislav Yasevich @ 2014-10-12  7:15 UTC (permalink / raw)
  To: Neil Horman
  Cc: Daniel Borkmann, David Miller, linux-sctp@vger.kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <20141012014233.GA16360@localhost.localdomain>

I prefer we stay away from WARN-ONs, but pr_debug might be useful.

-vlad

On Sat, Oct 11, 2014 at 9:42 PM, Neil Horman <nhorman@tuxdriver.com> wrote:
> On Sat, Oct 11, 2014 at 12:02:31AM +0200, Daniel Borkmann wrote:
>> On 10/10/2014 05:39 PM, Neil Horman wrote:
>> ...
>> >Is it worth adding a WARN_ON, to indicate that two ASCONF chunks have been
>> >received with duplicate serials?
>>
>> Don't think so, as this would be triggerable from outside.
>>
> WARN_ON_ONCE then, per serial number?
> Neil
>
>> Thanks,
>> Daniel
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>

^ permalink raw reply

* Re: [PATCH net 2/3] net: sctp: fix panic on duplicate ASCONF chunks
From: Neil Horman @ 2014-10-12  1:42 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: davem, linux-sctp, netdev, Vlad Yasevich
In-Reply-To: <54385777.7090907@redhat.com>

On Sat, Oct 11, 2014 at 12:02:31AM +0200, Daniel Borkmann wrote:
> On 10/10/2014 05:39 PM, Neil Horman wrote:
> ...
> >Is it worth adding a WARN_ON, to indicate that two ASCONF chunks have been
> >received with duplicate serials?
> 
> Don't think so, as this would be triggerable from outside.
> 
WARN_ON_ONCE then, per serial number?
Neil

> Thanks,
> Daniel
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* [PATCH] net: wireless: brcm80211: brcmfmac: dhd_sdio.c:  Cleaning up missing null-terminate in conjunction with strncpy
From: Rickard Strandqvist @ 2014-10-11 23:52 UTC (permalink / raw)
  To: Brett Rudley, Arend van Spriel
  Cc: Rickard Strandqvist, Hante Meuleman, John W. Linville,
	Pieter-Paul Giesberts, Daniel Kim, linux-wireless,
	brcm80211-dev-list, netdev, linux-kernel

Replacing strncpy with strlcpy to avoid strings that lacks null terminate.
And changed from using strncpy to strlcpy to simplify code.

Signed-off-by: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
---
 drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c |   25 ++++++++++----------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
index f55f625..d20d4e6 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c
@@ -670,7 +670,6 @@ static int brcmf_sdio_get_fwnames(struct brcmf_chip *ci,
 				  struct brcmf_sdio_dev *sdiodev)
 {
 	int i;
-	uint fw_len, nv_len;
 	char end;
 
 	for (i = 0; i < ARRAY_SIZE(brcmf_fwname_data); i++) {
@@ -684,25 +683,25 @@ static int brcmf_sdio_get_fwnames(struct brcmf_chip *ci,
 		return -ENODEV;
 	}
 
-	fw_len = sizeof(sdiodev->fw_name) - 1;
-	nv_len = sizeof(sdiodev->nvram_name) - 1;
 	/* check if firmware path is provided by module parameter */
 	if (brcmf_firmware_path[0] != '\0') {
-		strncpy(sdiodev->fw_name, brcmf_firmware_path, fw_len);
-		strncpy(sdiodev->nvram_name, brcmf_firmware_path, nv_len);
-		fw_len -= strlen(sdiodev->fw_name);
-		nv_len -= strlen(sdiodev->nvram_name);
+		strlcpy(sdiodev->fw_name, brcmf_firmware_path,
+			sizeof(sdiodev->fw_name));
+		strlcpy(sdiodev->nvram_name, brcmf_firmware_path,
+			sizeof(sdiodev->nvram_name));
 
 		end = brcmf_firmware_path[strlen(brcmf_firmware_path) - 1];
 		if (end != '/') {
-			strncat(sdiodev->fw_name, "/", fw_len);
-			strncat(sdiodev->nvram_name, "/", nv_len);
-			fw_len--;
-			nv_len--;
+			strlcat(sdiodev->fw_name, "/",
+				sizeof(sdiodev->fw_name));
+			strlcat(sdiodev->nvram_name, "/",
+				sizeof(sdiodev->nvram_name));
 		}
 	}
-	strncat(sdiodev->fw_name, brcmf_fwname_data[i].bin, fw_len);
-	strncat(sdiodev->nvram_name, brcmf_fwname_data[i].nv, nv_len);
+	strlcat(sdiodev->fw_name, brcmf_fwname_data[i].bin,
+		sizeof(sdiodev->fw_name));
+	strlcat(sdiodev->nvram_name, brcmf_fwname_data[i].nv,
+		sizeof(sdiodev->nvram_name));
 
 	return 0;
 }
-- 
1.7.10.4

^ permalink raw reply related

* Re: Netlink mmap tx security?
From: David Miller @ 2014-10-11 23:09 UTC (permalink / raw)
  To: luto; +Cc: torvalds, kaber, netdev
In-Reply-To: <CALCETrWfQe5H2Ht7cjCQLfUw+XUcRvga_H93esaWpAp37=noZg@mail.gmail.com>

From: Andy Lutomirski <luto@amacapital.net>
Date: Sat, 11 Oct 2014 15:29:17 -0700

> On May 12, 2014 3:08 PM, "Andy Lutomirski" <luto@amacapital.net> wrote:
>>
>> [moving to netdev -- this is much lower impact than I thought, since
>> you can't set up a netlink mmap ring without global CAP_NET_ADMIN]
> 
> Did anything ever happen here?  Despite not being a privilege
> escalation in the normal sense, it's still a bug, and it's still a
> fairly easy bypass of module signatures.

We definitely still need to address this, and since Patrick has still
not responded on this issue after being given 5 months to do so, the
only reasonable path is to get rid of this feature altogether.

Thanks for reminding me.

^ permalink raw reply

* Re: [bisected] e341694e3eb5 netlink_lookup() rcu conversion causes latencies
From: David Miller @ 2014-10-11 23:08 UTC (permalink / raw)
  To: tgraf
  Cc: eric.dumazet, heiko.carstens, sasha.levin, paulmck, nikolay,
	netdev, linux-kernel, braunu
In-Reply-To: <20141011222514.GA14186@casper.infradead.org>

From: Thomas Graf <tgraf@suug.ch>
Date: Sat, 11 Oct 2014 23:25:14 +0100

> I think the issue here is obvious and a fix is on the way to move
> the insertion and removal to a worker to no longer require the
> synchronize_rcu().
> 
> What bothers me is that the synchronize_rcu() should only occur
> on expand/shrink and not for every table update. The default table
> size is 64.

Not true, every netlink socket release incurs a synchronize_net()
now, because we added such a call to netlink_release().

I specifically brought this up to as a possible problem when the
changes went in...

^ permalink raw reply

* Re: Netlink mmap tx security?
From: Andy Lutomirski @ 2014-10-11 22:29 UTC (permalink / raw)
  To: Linus Torvalds, David S. Miller, Patrick McHardy,
	Network Development
In-Reply-To: <CALCETrVuREGVKwrni61DEg=Tgjc7x41WYtrEiDMqMvwVFXfybw@mail.gmail.com>

On May 12, 2014 3:08 PM, "Andy Lutomirski" <luto@amacapital.net> wrote:
>
> [moving to netdev -- this is much lower impact than I thought, since
> you can't set up a netlink mmap ring without global CAP_NET_ADMIN]

Did anything ever happen here?  Despite not being a privilege
escalation in the normal sense, it's still a bug, and it's still a
fairly easy bypass of module signatures.


--Andy

>
> On Wed, May 7, 2014 at 5:46 PM, Andy Lutomirski <luto@amacapital.net> wrote:
> > Disclaimer: I haven't tried to write a proof-of-concept, so I could be
> > wrong here.
> >
> >     /* Netlink messages are validated by the receiver before processing.
> >      * In order to avoid userspace changing the contents of the message
> >      * after validation, the socket and the ring may only be used by a
> >      * single process, otherwise we fall back to copying.
> >      */
> >     if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
> >         atomic_read(&nlk->mapped) > 1)
> >         excl = false;
> >
> > How is this possibly safe?  I think it's broken for at least three reasons:
> >
> > 1. Shouldn't that be atomic_long(read(&f_count) > 1))?
> >
> > 2. threads
> >
> > 3. process_vm_writev
> >
> > I wouldn't be surprised if RDMA and AIO also break this assumption.
> >
> > Does anything rely on mmapped netlink tx being fast?  If not, can this
> > code just be deleted?
> >
> > For that matter, does anything rely on mmapped netlink tx at all?
> >
> > (On a non-KASLR, non-SMEP system, this probably gives reliable kernel
> > code execution.)
> >
> > For that matter, netlink_mmap_sendmsg also appears to vulnerable to a
> > TOCTOU attack via hdr.
> >

^ permalink raw reply

* Re: [bisected] e341694e3eb5 netlink_lookup() rcu conversion causes latencies
From: Thomas Graf @ 2014-10-11 22:25 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Heiko Carstens, Sasha Levin, paulmck, Nikolay Aleksandrov,
	David S. Miller, netdev, linux-kernel, Ursula Braun
In-Reply-To: <1413055964.9362.50.camel@edumazet-glaptop2.roam.corp.google.com>

On 10/11/14 at 12:32pm, Eric Dumazet wrote:
> On Sat, 2014-10-11 at 10:36 +0200, Heiko Carstens wrote:
> > Hi all,
> > 
> > it just came to my attention that commit e341694e3eb5
> > "netlink: Convert netlink_lookup() to use RCU protected hash table"
> > causes network latencies for me on s390.
> > 
> > The testcase is quite simple and 100% reproducible on s390:
> > 
> > Simply login via ssh to a remote system which has the above mentioned
> > patch applied. Any action like pressing return now has significant
> > latencies. Or in other words, working via such a connection becomes
> > a pain ;)
> > 
> > I haven't debugged it, however I assume the problem is that a) the
> > commit introduces a synchronize_net() call und b) s390 kernels
> > usually get compiled with CONFIG_HZ_100 while most other architectures
> > use CONFIG_HZ_1000.
> > If I change the kernel config to CONFIG_HZ_1000 the problem goes away,
> > however I don't consider this a fix...
> > 
> > Another reason why this hasn't been observed on x86 may or may not be
> > that we haven't implemented CONFIG_HAVE_CONTEXT_TRACKING on s390 (yet).
> > But that's just guessing...
> 
> CC Paul and Sasha

I think the issue here is obvious and a fix is on the way to move
the insertion and removal to a worker to no longer require the
synchronize_rcu().

What bothers me is that the synchronize_rcu() should only occur
on expand/shrink and not for every table update. The default table
size is 64.

^ permalink raw reply

* Re: [Nios2-dev] [PATCH net-next 0/2] Altera TSE with no PHY
From: Walter Lozano @ 2014-10-11 22:24 UTC (permalink / raw)
  To: Matthew Gerlach
  Cc: netdev@vger.kernel.org, f.fainelli@gmail.com,
	tobias.klauser@gmail.com, vbridgers2013@gmail.com,
	nios2-dev@lists.rocketboards.org, davem@davemloft.net,
	guido@vanguardiasur.com.ar
In-Reply-To: <1412974007001.77871@opensource.altera.com>

Hi Matthew,

Thanks for your testing. I'm glad to hear that I didn't break
anything. Most of my testing
was done with a custom board with a SFP transceiver.

Regards,

Walter

On Fri, Oct 10, 2014 at 5:46 PM, Matthew Gerlach
<mgerlach@opensource.altera.com> wrote:
> Hi Walter,
>
> I've examined your patch, and it makes sense to me.  I tested your patch again the TSE example design, and it did not break anything.
>
> Thanks,
> Matthew Gerlach
> ________________________________________
> From: nios2-dev-bounces@lists.rocketboards.org <nios2-dev-bounces@lists.rocketboards.org> on behalf of Walter Lozano <walter@vanguardiasur.com.ar>
> Sent: Friday, October 3, 2014 11:08 AM
> To: netdev@vger.kernel.org
> Cc: f.fainelli@gmail.com; tobias.klauser@gmail.com; vbridgers2013@gmail.com; nios2-dev@lists.rocketboards.org; davem@davemloft.net; guido@vanguardiasur.com.ar
> Subject: [Nios2-dev] [PATCH net-next 0/2] Altera TSE with no PHY
>
> In some scenarios there is no PHY chip present, for example in optical links.
> This serie of patches moves PHY get addr and MDIO create to a new function and
> avoids PHY and MDIO probing in these cases.
>
> Walter Lozano (2):
>   Altera TSE: Move PHY get addr and MDIO create
>   Altera TSE: Add support for no PHY
>
>  drivers/net/ethernet/altera/altera_tse_main.c |   65 +++++++++++++++++--------
>  1 file changed, 44 insertions(+), 21 deletions(-)
>
> --
> 1.7.10.4
>
> _______________________________________________
> Nios2-dev mailing list
> Nios2-dev@lists.rocketboards.org
> http://lists.rocketboards.org/cgi-bin/mailman/listinfo/nios2-dev



-- 
Walter Lozano, VanguardiaSur
www.vanguardiasur.com.ar

^ permalink raw reply

* [PATCH net] tcp: fix tcp_ack() performance problem
From: Eric Dumazet @ 2014-10-11 22:17 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, Willem de Bruijn, Neal Cardwell, Yuchung Cheng,
	Van Jacobson

From: Eric Dumazet <edumazet@google.com>

We worked hard to improve tcp_ack() performance, by not accessing
skb_shinfo() in fast path (cd7d8498c9a5 tcp: change tcp_skb_pcount()
location)

We still have one spurious access because of ACK timestamping,
added in commit e1c8a607b281 ("net-timestamp: ACK timestamp for
bytestreams")

By checking if sk_tsflags has SOF_TIMESTAMPING_TX_ACK set,
we can avoid two cache line misses for the common case.

While we are at it, add two prefetchw() :

One in tcp_ack() to bring skb at the head of write queue.

One in tcp_clean_rtx_queue() loop to bring following skb,
as we will delete skb from the write queue and dirty skb->next->prev.

Add a couple of [un]likely() clauses.

After this patch, tcp_ack() is no longer the most consuming
function in tcp stack.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Van Jacobson <vanj@google.com>
---
Google-Bug-Id : 9694859

 net/ipv4/tcp_input.c |   36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 00a41499d52c..a12b455928e5 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -68,6 +68,7 @@
 #include <linux/module.h>
 #include <linux/sysctl.h>
 #include <linux/kernel.h>
+#include <linux/prefetch.h>
 #include <net/dst.h>
 #include <net/tcp.h>
 #include <net/inet_common.h>
@@ -3029,6 +3030,21 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
 	return packets_acked;
 }
 
+static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
+			   u32 prior_snd_una)
+{
+	const struct skb_shared_info *shinfo;
+
+	/* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
+	if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)))
+		return;
+
+	shinfo = skb_shinfo(skb);
+	if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
+	    between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1))
+		__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+}
+
 /* Remove acknowledged frames from the retransmission queue. If our packet
  * is before the ack sequence we can discard it as it's confirmed to have
  * arrived at the other end.
@@ -3052,14 +3068,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 	first_ackt.v64 = 0;
 
 	while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
-		struct skb_shared_info *shinfo = skb_shinfo(skb);
 		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
 		u8 sacked = scb->sacked;
 		u32 acked_pcount;
 
-		if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
-		    between(shinfo->tskey, prior_snd_una, tp->snd_una - 1))
-			__skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+		tcp_ack_tstamp(sk, skb, prior_snd_una);
 
 		/* Determine how many packets and what bytes were acked, tso and else */
 		if (after(scb->end_seq, tp->snd_una)) {
@@ -3073,10 +3086,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 			fully_acked = false;
 		} else {
+			/* Speedup tcp_unlink_write_queue() and next loop */
+			prefetchw(skb->next);
 			acked_pcount = tcp_skb_pcount(skb);
 		}
 
-		if (sacked & TCPCB_RETRANS) {
+		if (unlikely(sacked & TCPCB_RETRANS)) {
 			if (sacked & TCPCB_SACKED_RETRANS)
 				tp->retrans_out -= acked_pcount;
 			flag |= FLAG_RETRANS_DATA_ACKED;
@@ -3107,7 +3122,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		 * connection startup slow start one packet too
 		 * quickly.  This is severely frowned upon behavior.
 		 */
-		if (!(scb->tcp_flags & TCPHDR_SYN)) {
+		if (likely(!(scb->tcp_flags & TCPHDR_SYN))) {
 			flag |= FLAG_DATA_ACKED;
 		} else {
 			flag |= FLAG_SYN_ACKED;
@@ -3119,9 +3134,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 		tcp_unlink_write_queue(skb, sk);
 		sk_wmem_free_skb(sk, skb);
-		if (skb == tp->retransmit_skb_hint)
+		if (unlikely(skb == tp->retransmit_skb_hint))
 			tp->retransmit_skb_hint = NULL;
-		if (skb == tp->lost_skb_hint)
+		if (unlikely(skb == tp->lost_skb_hint))
 			tp->lost_skb_hint = NULL;
 	}
 
@@ -3132,7 +3147,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		flag |= FLAG_SACK_RENEGING;
 
 	skb_mstamp_get(&now);
-	if (first_ackt.v64) {
+	if (likely(first_ackt.v64)) {
 		seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
 		ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
 	}
@@ -3394,6 +3409,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	int acked = 0; /* Number of packets newly acked */
 	long sack_rtt_us = -1L;
 
+	/* We very likely will need to access write queue head. */
+	prefetchw(sk->sk_write_queue.next);
+
 	/* If the ack is older than previous acks
 	 * then we can probably ignore it.
 	 */

^ permalink raw reply related

* [GIT] Networking
From: David Miller @ 2014-10-11 21:59 UTC (permalink / raw)
  To: torvalds; +Cc: akpm, netdev, linux-kernel


This set fixes a bunch of fallout from the changes that went in
during this merge window, particularly:

1) Fix fsl_pq_mdio (Claudiu Manoil) and fm10k (Pranith Kumar) build
   failures.

2) Several networking drivers do atomic_set() on page counts where
   that's not exactly legal.  From Eric Dumazet.

3) Make __skb_flow_get_ports() work cleanly with unaligned data,
   from Alexander Duyck.

4) Fix some kernel-doc buglets in rfkill and netlabel, from Fabian
   Frederick.

5) Unbalanced enable_irq_wake usage in bcmgenet and systemport
   drivers, from Florian Fainelli.

6) pxa168_eth needs to depend on HAS_DMA, from Geert Uytterhoeven.

7) Multi-dequeue in the qdisc layer severely bypasses the fairness
   limits the previous code used to enforce, reintroduce in a way that
   at the same time doesn't compromise bulk dequeue opportunities.
   From Jesper Dangaard Brouer.

8) macvlan receive path unnecessarily hops through a softirq by
   using netif_rx() instead of netif_receive_skb().  From Jason
   Baron.

Please pull, thanks a lot!

The following changes since commit 35a9ad8af0bb0fa3525e6d0d20e32551d226f38e:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next (2014-10-08 21:40:54 -0400)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git master

for you to fetch changes up to 01d2d484e49e9bc0ed9b5fdaf345a0e2bf35ffed:

  Merge branch 'bcmgenet_systemport' (2014-10-10 15:39:22 -0400)

----------------------------------------------------------------

Alexander Duyck (1):
      flow-dissector: Fix alignment issue in __skb_flow_get_ports

Alexei Starovoitov (1):
      net: bpf: fix bpf syscall dependence on anon_inodes

Andrea Merello (1):
      rtl818x_pci: fix response rate may be incorrect.

Claudiu Manoil (8):
      net/fsl_pq_mdio: Fix asm/ucc.h compile error for ARM
      net/fsl_pq_mdio: Use ioread/iowrite32be() portable accessors
      net/fsl_pq_mdio: Replace spin_event_timeout() with arch independent
      gianfar: Include missing headers for ARM builds
      gianfar: Exclude PPC specific errata handling from ARM builds
      gianfar: Make MAC addr setup endian safe, cleanup
      gianfar: Replace spin_event_timeout() with arch independent
      gianfar: Replace eieio with wmb for non-PPC archs

David S. Miller (9):
      Merge branch 'gianfar'
      Merge branch 'cxgb4'
      Merge branch 'r8152'
      Merge tag 'master-2014-10-08' of git://git.kernel.org/.../linville/wireless-next
      Merge git://git.kernel.org/.../pablo/nf-next
      Merge branch 'xgene'
      Merge branch 'macvlan'
      Merge branch 'net-drivers-pgcnt'
      Merge branch 'bcmgenet_systemport'

Eric Dumazet (5):
      fm10k: fix race accessing page->_count
      igb: fix race accessing page->_count
      ixgbe: fix race accessing page->_count
      mlx4: fix race accessing page->_count
      net: fix races in page->_count manipulation

Fabian Frederick (2):
      net: rfkill: kernel-doc warning fixes
      netlabel: kernel-doc warning fix

Florian Fainelli (3):
      net: bcmgenet: fix off-by-one in incrementing read pointer
      net: bcmgenet: avoid unbalanced enable_irq_wake calls
      net: systemport: avoid unbalanced enable_irq_wake calls

Geert Uytterhoeven (1):
      net: pxa168_eth: PXA168_ETH should depend on HAS_DMA

Hariprasad Shenai (3):
      cxgb4/cxgb4vf: Updated the LSO transfer length in CPL_TX_PKT_LSO for T5
      cxgb4vf: Add 40G support for cxgb4vf driver
      cxgb4: Wait for device to get ready before reading any register

Iyappan Subramanian (6):
      MAINTAINERS: Update APM X-Gene section
      Documentation: dts: Update section header for APM X-Gene
      dtb: Add 10GbE node to APM X-Gene SoC device tree
      drivers: net: xgene: Preparing for adding 10GbE support
      drivers: net: xgene: Add 10GbE support
      drivers: net: xgene: Add 10GbE ethtool support

Jesper Dangaard Brouer (1):
      net_sched: restore qdisc quota fairness limits after bulk dequeue

John W. Linville (1):
      Merge branch 'master' of git://git.kernel.org/.../linville/wireless

LEROY Christophe (1):
      net: fs_enet: error: 'SCCE_ENET_TXF' undeclared

Larry Finger (1):
      rtlwifi: Fix possible unaligned array in ether_addr_copy()

Li RongQing (2):
      Documentation: replace __sk_run_filter with __bpf_prog_run
      net: filter: fix the comments

Marek Puzyniak (1):
      ath9k_htc: avoid kernel panic in ath9k_hw_reset

Masanari Iida (1):
      net: Missing @ before descriptions cause make xmldocs warning

Pablo Neira Ayuso (2):
      netfilter: kill nf_send_reset6() from include/net/netfilter/ipv6/nf_reject.h
      netfilter: fix wrong arithmetics regarding NFT_REJECT_ICMPX_MAX

Pranith Kumar (1):
      networking: fm10k: Fix build failure

Sascha Hauer (1):
      net/phy: micrel: Add clock support for KSZ8021/KSZ8031

Sujith Manoharan (3):
      ath: Fix smatch warning
      ath9k: Fix crash in MCC mode
      ath9k: Fix sequence number assignment

Vince Bridgers (1):
      stmmac: correct mc_filter local variable in set_filter and set_mac_addr call

hayeswang (3):
      r8152: autoresume before setting feature
      r8152: adjust usb_autopm_xxx
      r8152: add mutex for hw settings

jbaron@akamai.com (2):
      macvlan: pass 'bool' type to macvlan_count_rx()
      macvlan: optimize the receive path

 Documentation/devicetree/bindings/net/apm-xgene-enet.txt |   4 +-
 Documentation/devicetree/bindings/net/micrel.txt         |   6 +
 Documentation/networking/filter.txt                      |   4 +-
 MAINTAINERS                                              |   1 -
 arch/arm64/boot/dts/apm-mustang.dts                      |   4 +
 arch/arm64/boot/dts/apm-storm.dtsi                       |  29 ++++-
 drivers/net/ethernet/apm/xgene/Makefile                  |   3 +-
 drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c      |  28 ++++-
 drivers/net/ethernet/apm/xgene/xgene_enet_hw.c           |  44 ++++---
 drivers/net/ethernet/apm/xgene/xgene_enet_hw.h           |  30 ++---
 drivers/net/ethernet/apm/xgene/xgene_enet_main.c         |  86 ++++++++++----
 drivers/net/ethernet/apm/xgene/xgene_enet_main.h         |  24 +++-
 drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c        | 331 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h        |  57 +++++++++
 drivers/net/ethernet/broadcom/bcmsysport.c               |   3 +-
 drivers/net/ethernet/broadcom/genet/bcmgenet.c           |   9 +-
 drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c       |   4 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h               |   2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c          |   6 +-
 drivers/net/ethernet/chelsio/cxgb4/sge.c                 |   5 +-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c               |  17 +--
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h              |   1 +
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h             |   5 +-
 drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c      |  12 +-
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c               |   5 +-
 drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h       |   6 +
 drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c           |  10 +-
 drivers/net/ethernet/freescale/fs_enet/mac-fcc.c         |   2 +-
 drivers/net/ethernet/freescale/fs_enet/mac-scc.c         |   2 +-
 drivers/net/ethernet/freescale/fsl_pq_mdio.c             |  56 +++++----
 drivers/net/ethernet/freescale/gianfar.c                 |  68 ++++++-----
 drivers/net/ethernet/freescale/gianfar.h                 |  31 +++++
 drivers/net/ethernet/intel/Kconfig                       |   1 +
 drivers/net/ethernet/intel/fm10k/fm10k_main.c            |   7 +-
 drivers/net/ethernet/intel/igb/igb_main.c                |   7 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c            |   8 +-
 drivers/net/ethernet/marvell/Kconfig                     |   3 +-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c               |   6 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c     |   4 +-
 drivers/net/macvlan.c                                    |  21 ++--
 drivers/net/phy/micrel.c                                 |  31 ++++-
 drivers/net/usb/r8152.c                                  |  98 +++++++++++++---
 drivers/net/wireless/ath/ath9k/ath9k.h                   |   4 +-
 drivers/net/wireless/ath/ath9k/beacon.c                  |  12 +-
 drivers/net/wireless/ath/ath9k/htc_drv_init.c            |   1 +
 drivers/net/wireless/ath/ath9k/main.c                    |   2 +-
 drivers/net/wireless/ath/ath9k/tx99.c                    |   8 +-
 drivers/net/wireless/ath/ath9k/xmit.c                    |  34 ++++--
 drivers/net/wireless/ath/main.c                          |   8 +-
 drivers/net/wireless/rtl818x/rtl8180/dev.c               |  36 ++++--
 drivers/net/wireless/rtlwifi/wifi.h                      |   2 +-
 include/linux/micrel_phy.h                               |   1 +
 include/net/netfilter/ipv6/nf_reject.h                   | 157 +------------------------
 include/uapi/linux/netfilter/nf_tables.h                 |   2 +-
 net/Kconfig                                              |   1 +
 net/core/filter.c                                        |   9 +-
 net/core/flow_dissector.c                                |  36 +++---
 net/core/skbuff.c                                        |  35 ++++--
 net/netfilter/nft_reject.c                               |  10 +-
 net/netlabel/netlabel_kapi.c                             |   1 -
 net/rfkill/core.c                                        |   4 +-
 net/sched/sch_generic.c                                  |  20 ++--
 62 files changed, 1017 insertions(+), 447 deletions(-)
 create mode 100644 drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
 create mode 100644 drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox