Netdev List
 help / color / mirror / Atom feed
* [PATCH 12/18] virtio_test: support used_event index
From: Michael S. Tsirkin @ 2011-05-04 20:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Krishna Kumar, Carsten Otte, lguest, Shirley Ma, kvm, linux-s390,
	netdev, habanero, Heiko Carstens, linux-kernel, virtualization,
	steved, Christian Borntraeger, Tom Lendacky, Martin Schwidefsky,
	linux390
In-Reply-To: <cover.1304541918.git.mst@redhat.com>

Add ability to test the new used_event feature,
enable by default.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/virtio_test.c |   18 ++++++++++++++++--
 1 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 9e65e6d..157ec68 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -210,18 +210,29 @@ const struct option longopts[] = {
 		.val = 'i',
 	},
 	{
+		.name = "used-event-idx",
+		.val = 'U',
+	},
+	{
+		.name = "no-used-event-idx",
+		.val = 'u',
+	},
+	{
 	}
 };
 
 static void help()
 {
-	fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n");
+	fprintf(stderr, "Usage: virtio_test [--help]"
+		" [--no-indirect] "
+		" [--no-used-event-idx]\n");
 }
 
 int main(int argc, char **argv)
 {
 	struct vdev_info dev;
-	unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC;
+	unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
+		(1ULL << VIRTIO_RING_F_USED_EVENT_IDX);
 	int o;
 
 	for (;;) {
@@ -238,6 +249,9 @@ int main(int argc, char **argv)
 		case 'i':
 			features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
 			break;
+		case 'u':
+			features &= ~(1ULL << VIRTIO_RING_F_USED_EVENT_IDX);
+			break;
 		default:
 			assert(0);
 			break;
-- 
1.7.5.53.gc233e

^ permalink raw reply related

* [PATCH 13/18] virtio_test: avail_event index support
From: Michael S. Tsirkin @ 2011-05-04 20:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Rusty Russell, Carsten Otte, Christian Borntraeger, linux390,
	Martin Schwidefsky, Heiko Carstens, Shirley Ma, lguest,
	linux-kernel, virtualization, netdev, linux-s390, kvm,
	Krishna Kumar, Tom Lendacky, steved, habanero
In-Reply-To: <cover.1304541918.git.mst@redhat.com>

Add ability to test the new avail_event feature,
enable by default.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/virtio_test.c |   17 +++++++++++++++--
 1 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 157ec68..8adf55d 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -202,6 +202,14 @@ const struct option longopts[] = {
 		.val = 'h',
 	},
 	{
+		.name = "avail-event-idx",
+		.val = 'A',
+	},
+	{
+		.name = "no-avail-event-idx",
+		.val = 'a',
+	},
+	{
 		.name = "indirect",
 		.val = 'I',
 	},
@@ -224,7 +232,8 @@ const struct option longopts[] = {
 static void help()
 {
 	fprintf(stderr, "Usage: virtio_test [--help]"
-		" [--no-indirect] "
+		" [--no-indirect]"
+		" [--no-avail-event-idx]"
 		" [--no-used-event-idx]\n");
 }
 
@@ -232,7 +241,8 @@ int main(int argc, char **argv)
 {
 	struct vdev_info dev;
 	unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
-		(1ULL << VIRTIO_RING_F_USED_EVENT_IDX);
+		(1ULL << VIRTIO_RING_F_USED_EVENT_IDX) |
+		(1ULL << VIRTIO_RING_F_AVAIL_EVENT_IDX);
 	int o;
 
 	for (;;) {
@@ -243,6 +253,9 @@ int main(int argc, char **argv)
 		case '?':
 			help();
 			exit(2);
+		case 'a':
+			features &= ~(1ULL << VIRTIO_RING_F_AVAIL_EVENT_IDX);
+			break;
 		case 'h':
 			help();
 			goto done;
-- 
1.7.5.53.gc233e

^ permalink raw reply related

* [PATCH 14/18] virtio: add api for delayed callbacks
From: Michael S. Tsirkin @ 2011-05-04 20:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Krishna Kumar, Carsten Otte, lguest, Shirley Ma, kvm, linux-s390,
	netdev, habanero, Heiko Carstens, linux-kernel, virtualization,
	steved, Christian Borntraeger, Tom Lendacky, Martin Schwidefsky,
	linux390
In-Reply-To: <cover.1304541918.git.mst@redhat.com>

Add an API that tells the other side that callbacks
should be delayed until a lot of work has been done.
Implement using the new used_event feature.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_ring.c |   27 +++++++++++++++++++++++++++
 include/linux/virtio.h       |    9 +++++++++
 2 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 262dfe6..3a70d70 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -397,6 +397,33 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
 
+bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	int bufs;
+
+	START_USE(vq);
+
+	/* We optimistically turn back on interrupts, then check if there was
+	 * more to do. */
+	/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
+	 * either clear the flags bit or point the event index at the next
+	 * entry. Always do both to keep code simple. */
+	vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+	/* TODO: tune this threshold */
+	bufs = (vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
+	vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
+	virtio_mb();
+	if (unlikely(vq->vring.used->idx - vq->last_used_idx > bufs)) {
+		END_USE(vq);
+		return false;
+	}
+
+	END_USE(vq);
+	return true;
+}
+EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
+
 void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 718336b..5151fd1 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -51,6 +51,13 @@ struct virtqueue {
  *	This re-enables callbacks; it returns "false" if there are pending
  *	buffers in the queue, to detect a possible race between the driver
  *	checking for more work, and enabling callbacks.
+ * virtqueue_enable_cb_delayed: restart callbacks after disable_cb.
+ *	vq: the struct virtqueue we're talking about.
+ *	This re-enables callbacks but hints to the other side to delay
+ *	interrupts until most of the available buffers have been processed;
+ *	it returns "false" if there are many pending buffers in the queue,
+ *	to detect a possible race between the driver checking for more work,
+ *	and enabling callbacks.
  * virtqueue_detach_unused_buf: detach first unused buffer
  * 	vq: the struct virtqueue we're talking about.
  * 	Returns NULL or the "data" token handed to add_buf
@@ -86,6 +93,8 @@ void virtqueue_disable_cb(struct virtqueue *vq);
 
 bool virtqueue_enable_cb(struct virtqueue *vq);
 
+bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
+
 void *virtqueue_detach_unused_buf(struct virtqueue *vq);
 
 /**
-- 
1.7.5.53.gc233e

^ permalink raw reply related

* [PATCH 16/18] virtio_ring: Add capacity check API
From: Michael S. Tsirkin @ 2011-05-04 20:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Krishna Kumar, Carsten Otte, lguest, Shirley Ma, kvm, linux-s390,
	netdev, habanero, Heiko Carstens, linux-kernel, virtualization,
	steved, Christian Borntraeger, Tom Lendacky, Martin Schwidefsky,
	linux390
In-Reply-To: <cover.1304541918.git.mst@redhat.com>

>From: Shirley Ma <mashirle@us.ibm.com>

Signed-off-by: Shirley Ma <xma@us.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

I'm not sure who wrote this first anymore :)
But it's a simple patch.

 drivers/virtio/virtio_ring.c |    8 ++++++++
 include/linux/virtio.h       |    5 +++++
 2 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 3a70d70..57bf9d5 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -365,6 +365,14 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
 }
 EXPORT_SYMBOL_GPL(virtqueue_get_buf);
 
+int virtqueue_get_capacity(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->num_free;
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_capacity);
+
 void virtqueue_disable_cb(struct virtqueue *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 5151fd1..944ebcd 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -42,6 +42,9 @@ struct virtqueue {
  *	vq: the struct virtqueue we're talking about.
  *	len: the length written into the buffer
  *	Returns NULL or the "data" token handed to add_buf.
+ * virtqueue_get_capacity: get the current capacity of the queue
+ *	vq: the struct virtqueue we're talking about.
+ *	Returns remaining capacity of the queue.
  * virtqueue_disable_cb: disable callbacks
  *	vq: the struct virtqueue we're talking about.
  *	Note that this is not necessarily synchronous, hence unreliable and only
@@ -89,6 +92,8 @@ void virtqueue_kick(struct virtqueue *vq);
 
 void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
 
+int virtqueue_get_capacity(struct virtqueue *vq);
+
 void virtqueue_disable_cb(struct virtqueue *vq);
 
 bool virtqueue_enable_cb(struct virtqueue *vq);
-- 
1.7.5.53.gc233e

^ permalink raw reply related

* [PATCH 17/18] virtio_net: fix TX capacity checks using new API
From: Michael S. Tsirkin @ 2011-05-04 20:53 UTC (permalink / raw)
  To: linux-kernel
  Cc: Krishna Kumar, Carsten Otte, lguest, Shirley Ma, kvm, linux-s390,
	netdev, habanero, Heiko Carstens, linux-kernel, virtualization,
	steved, Christian Borntraeger, Tom Lendacky, Martin Schwidefsky,
	linux390
In-Reply-To: <cover.1304541918.git.mst@redhat.com>

virtio net uses the number of sg entries to
check for TX ring capacity freed. But this
gives incorrect results when indirect buffers
are used. Use the new capacity API instead.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c |    9 ++++-----
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f685324..f33c92b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -509,19 +509,17 @@ again:
 	return received;
 }
 
-static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
+static void free_old_xmit_skbs(struct virtnet_info *vi)
 {
 	struct sk_buff *skb;
-	unsigned int len, tot_sgs = 0;
+	unsigned int len;
 
 	while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) {
 		pr_debug("Sent skb %p\n", skb);
 		vi->dev->stats.tx_bytes += skb->len;
 		vi->dev->stats.tx_packets++;
-		tot_sgs += skb_vnet_hdr(skb)->num_sg;
 		dev_kfree_skb_any(skb);
 	}
-	return tot_sgs;
 }
 
 static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
@@ -611,7 +609,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		netif_stop_queue(dev);
 		if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) {
 			/* More just got used, free them then recheck. */
-			capacity += free_old_xmit_skbs(vi);
+			free_old_xmit_skbs(vi);
+			capacity = virtqueue_get_capacity(vi->svq);
 			if (capacity >= 2+MAX_SKB_FRAGS) {
 				netif_start_queue(dev);
 				virtqueue_disable_cb(vi->svq);
-- 
1.7.5.53.gc233e

^ permalink raw reply related

* [PATCH 18/18] virtio_net: limit xmit polling
From: Michael S. Tsirkin @ 2011-05-04 20:53 UTC (permalink / raw)
  To: linux-kernel
  Cc: Krishna Kumar, Carsten Otte, lguest, Shirley Ma, kvm, linux-s390,
	netdev, habanero, Heiko Carstens, linux-kernel, virtualization,
	steved, Christian Borntraeger, Tom Lendacky, Martin Schwidefsky,
	linux390
In-Reply-To: <cover.1304541918.git.mst@redhat.com>

Current code might introduce a lot of latency variation
if there are many pending bufs at the time we
attempt to transmit a new one. This is bad for
real-time applications and can't be good for TCP either.

Free up just enough to both clean up all buffers
eventually and to be able to xmit the next packet.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c |   18 +++++++++++-------
 1 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index f33c92b..9982bd7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -509,17 +509,23 @@ again:
 	return received;
 }
 
-static void free_old_xmit_skbs(struct virtnet_info *vi)
+static bool free_old_xmit_skbs(struct virtnet_info *vi, int capacity)
 {
 	struct sk_buff *skb;
 	unsigned int len;
+	bool c;
+	/* We try to free up at least 2 skbs per one sent, so that we'll get
+	 * all of the memory back if they are used fast enough. */
+	int n = 2;
 
-	while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) {
+	while ((c = virtqueue_get_capacity(vi->svq) >= capacity) && --n > 0 &&
+	       (skb = virtqueue_get_buf(vi->svq, &len)) != NULL) {
 		pr_debug("Sent skb %p\n", skb);
 		vi->dev->stats.tx_bytes += skb->len;
 		vi->dev->stats.tx_packets++;
 		dev_kfree_skb_any(skb);
 	}
+	return c;
 }
 
 static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
@@ -574,8 +580,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct virtnet_info *vi = netdev_priv(dev);
 	int capacity;
 
-	/* Free up any pending old buffers before queueing new ones. */
-	free_old_xmit_skbs(vi);
+	/* Free enough pending old buffers to enable queueing new ones. */
+	free_old_xmit_skbs(vi, 2+MAX_SKB_FRAGS);
 
 	/* Try to transmit */
 	capacity = xmit_skb(vi, skb);
@@ -609,9 +615,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		netif_stop_queue(dev);
 		if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) {
 			/* More just got used, free them then recheck. */
-			free_old_xmit_skbs(vi);
-			capacity = virtqueue_get_capacity(vi->svq);
-			if (capacity >= 2+MAX_SKB_FRAGS) {
+			if (!likely(free_old_xmit_skbs(vi, 2+MAX_SKB_FRAGS))) {
 				netif_start_queue(dev);
 				virtqueue_disable_cb(vi->svq);
 			}
-- 
1.7.5.53.gc233e

^ permalink raw reply related

* [PATCH 15/18] virtio_net: delay TX callbacks
From: Michael S. Tsirkin @ 2011-05-04 20:52 UTC (permalink / raw)
  To: linux-kernel
  Cc: Rusty Russell, Carsten Otte, Christian Borntraeger, linux390,
	Martin Schwidefsky, Heiko Carstens, Shirley Ma, lguest,
	linux-kernel, virtualization, netdev, linux-s390, kvm,
	Krishna Kumar, Tom Lendacky, steved, habanero
In-Reply-To: <cover.1304541918.git.mst@redhat.com>

Ask for delayed callbacks on TX ring full, to give the
other side more of a chance to make progress.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 0cb0b06..f685324 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -609,7 +609,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * before it gets out of hand.  Naturally, this wastes entries. */
 	if (capacity < 2+MAX_SKB_FRAGS) {
 		netif_stop_queue(dev);
-		if (unlikely(!virtqueue_enable_cb(vi->svq))) {
+		if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) {
 			/* More just got used, free them then recheck. */
 			capacity += free_old_xmit_skbs(vi);
 			if (capacity >= 2+MAX_SKB_FRAGS) {
-- 
1.7.5.53.gc233e


^ permalink raw reply related

* Re: [PATCH] tcp_cubic: limit delayed_ack ratio to prevent divide error
From: Brandeburg, Jesse @ 2011-05-04 20:53 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: David Miller, Sangtae Ha, Injong Rhee, Valdis.Kletnieks@vt.edu,
	rdunlap@xenotime.net, lkml@techboom.com, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <20110504130456.425dee68@nehalam>



On Wed, 4 May 2011, Stephen Hemminger wrote:

> TCP Cubic keeps a metric that estimates the amount of delayed
> acknowledgements to use in adjusting the window. If an abnormally
> large number of packets are acknowledged at once, then the update
> could wrap and reach zero. This kind of ACK could only
> happen when there was a large window and huge number of
> ACK's were lost.
> 
> This patch limits the value of delayed ack ratio. The choice of 32
> is just a conservative value since normally it should be range of 
> 1 to 4 packets.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

patch seems fine, but please credit the reporter (lkml@techboom.com) with 
reporting the issue with logs, maybe even with Reported-by: and some kind 
of reference to the panic message or the email thread in the text or 
header?


^ permalink raw reply

* Re: [PATCH v4 1/1] can: add pruss CAN driver.
From: Oliver Hartkopp @ 2011-05-04 20:55 UTC (permalink / raw)
  To: Wolfgang Grandegger
  Cc: sachi-EvXpCiN+lbve9wHmmfpqLFaTQe2KTcn/,
	davinci-linux-open-source-VycZQUHpC/PFrsHnngEfi1aTQe2KTcn/,
	Arnd Bergmann, Subhasish Ghosh, nsekhar-l0cyMroinI0, open list,
	CAN NETWORK DRIVERS, Marc Kleine-Budde,
	Netdev-u79uwXL29TY76Z2rM5mHXA, m-watkins-l0cyMroinI0,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
In-Reply-To: <4DC17A31.8070409-5Yr1BZd7O62+XT7JhA+gdA@public.gmane.org>

On 04.05.2011 18:09, Wolfgang Grandegger wrote:

> On 05/04/2011 05:57 PM, Kurt Van Dijck wrote:

>> When doing so, I'd vote for an unlimited(by software) list of hardware filters (id/mask).
>> The hardware must abort when no more filters are available.
> 
> Sounds good and not even to complicated. For the SJA1000 we would just
> allow to set the global mask.

Yes. "unlimited(by software)" was a bit misleading at first reading, as we
should not filter IDs by software in the irq handler. But to create a API that
supports as much HW filters as the hardware provides is a good idea.

>> I think that when using hardware filters, knowing the actual device
>> with it's amount of hardware filters is the least of your problems.
>> Userspace applications that suddenly stop working properly due to
>> hw filters (i.e. some traffic not coming in anymore) will be a major
>> source of bugreports.
> 
> Well, hardware filtering will be off by default and must explicitly be
> set by the user, like for the bitrate setting.

To be correct: By the admin.

The setting of CAN HW filters has a system-wide effect to all users on the
local host. The same effect as we have for the setting of the bitrate. This is
the major difference to the user-configurable per-socket CAN-ID filters that
are provided e.g. by the CAN_RAW socket.

As the current netlink configuration interface for CAN interfaces is not
accessible for standard users also this would be the right place to extend the
netlink interface.

Regards,
Oliver

^ permalink raw reply

* Re: [net-next-2.6 0/9][pull request] Intel Wired LAN Driver Update
From: David Miller @ 2011-05-04 20:56 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, bphilips
In-Reply-To: <1304537441-2056-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Wed,  4 May 2011 12:30:32 -0700

> The following series contains updates to e100, igb, igbvf, ixgb and ixgbe.
> 
> The following are changes since commit e67f88dd12f610da98ca838822f2c9b4e7c6100e:
>   net: dont hold rtnl mutex during netlink dump callbacks
> and are available in the git repository at:
>   master.kernel.org:/pub/scm/linux/kernel/git/jkirsher/net-next-2.6 master

Pulled, thanks Jeff.

^ permalink raw reply

* Re: [net-next-2.6 0/9][pull request] Intel Wired LAN Driver Update
From: David Miller @ 2011-05-04 20:59 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, bphilips
In-Reply-To: <20110504.135621.232749554.davem@davemloft.net>


BTW, Jeff, can I get some kind of time frame on when the ethtool_ops->phys_id
conversions/removals for e100, e1000, and igb will be coming my way?

It's starting to be on the order of weeks that I've been waiting for
this so that I can apply Stephen Hemminger's patch which gets rid of
the old ->phys_id interfaces entirely.

Thanks.

^ permalink raw reply

* Re: [PATCH 1/2] libertas: Convert lbs_pr_<level> to pr_<level>
From: Joe Perches @ 2011-05-04 21:01 UTC (permalink / raw)
  To: Dan Williams
  Cc: John W. Linville, libertas-dev, linux-wireless, netdev,
	linux-kernel
In-Reply-To: <1304537949.1097.8.camel@dcbw.foobar.com>

On Wed, 2011-05-04 at 14:39 -0500, Dan Williams wrote:
> On Mon, 2011-05-02 at 16:49 -0700, Joe Perches wrote:
> > Use the standard pr_<level> functions eases grep a bit.
> > Added a few missing terminating newlines to messages.
> > Coalesced long formats.
> Is there any reason to not put the pr_fmt() definition into 'defs.h'
> instead of C&P at the top of every file?  I don't really care either way
> but that seems cleaner since almost all the libertas files are going to
> use logging.

It has to be before any #include <linux/kernel.h>
or any other #include that uses it.

At some point, all of the uses of

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

should be removed as I intend to have that
or an equivalent become the default.



^ permalink raw reply

* Re: 2.6.38.2, kernel panic, probably related to framentation handling
From: David Miller @ 2011-05-04 21:05 UTC (permalink / raw)
  To: eric.dumazet; +Cc: denys, netdev
In-Reply-To: <1304539346.32152.81.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 04 May 2011 22:02:26 +0200

> [PATCH] net: ip_expire() must revalidate route
> 
> Commit 4a94445c9a5c (net: Use ip_route_input_noref() in input path)
> added a bug in IP defragmentation handling, in case timeout is fired.
> 
> When a frame is defragmented, we use last skb dst field when building
> final skb. Its dst is valid, since we are in rcu read section.
> 
> But if a timeout occurs, we take first queued fragment to build one ICMP
> TIME EXCEEDED message. Problem is all queued skb have weak dst pointers,
> since we escaped RCU critical section after their queueing. icmp_send()
> might dereference a now freed (and possibly reused) part of memory.
> 
> Calling skb_dst_drop() and ip_route_input_noref() to revalidate route is
> the only possible choice.
> 
> Reported-by: Denys Fedoryshchenko <denys@visp.net.lb>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---

Applied to net-2.6 and queued up for -stable, thanks Eric!

^ permalink raw reply

* Re: [ethtool PATCH 4/4] v5 Add RX packet classification interface
From: Alexander Duyck @ 2011-05-04 21:07 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: Dimitris Michailidis, davem@davemloft.net, Kirsher, Jeffrey T,
	netdev@vger.kernel.org
In-Reply-To: <1304534738.2926.74.camel@bwh-desktop>

On 5/4/2011 11:45 AM, Ben Hutchings wrote:
> On Wed, 2011-05-04 at 11:21 -0700, Alexander Duyck wrote:
> [...]
>> Honestly what I would prefer to see is a seperate call added such as an
>> ETHTOOL_GRSCLSRLLOC that we could pass the flow specifier to and perhaps
>> include first/last location call in that and then let the driver return
>> where it wants to drop the rule.
>
> This must not be done as a separate operation because it's racy (in fact
> that's an inherent problem with the rule manager).  In the sfc driver
> (and probably others in future) filters could be inserted for RFS at any
> time.
>
>> That way we can avoid having to create
>> an overly complicated rule manager that can handle all the bizarre rule
>> ordering options that I am sure all the different network devices support.
>
> Right, the rule manager can't implement that.
>
>> The only reason I am not implementing this now is because there aren't
>> any drivers in place that would currently use it.  I figure once cxgb
>> has a means in place of supporting flow classifier rules then Dimitris
>> can add the necessary code to ethtool and the kernel to allow the driver
>> to specify rule locations.  I would prefer to avoid adding features
>> based on speculation of what will be needed and would like to be able to
>> actually see how the features will be used.
>
> If you are going to implement the same interface in ixgbe as in niu
> (modulo bugs), then I have no objection to going ahead with this and
> then adding the option for driver-assigned locations later.
>
> Please can you confirm that the location specified for
> ETHTOOL_SRXCLSRLINS will indeed be used as a priority in case of
> overlapping filters?
>
> Ben.
>

The ixgbe approach should be nearly identical in terms of how the 
priorities are based on the location of the filters.  The original 
version from Santwona had the rule manager breaking the rules up into 7 
sections so that rules that specified fewer fields would be near the end 
of the list.  I'm pretty sure that was all due to priorities from what I 
could see in the niu driver since the filters that covered wider ranges 
were being made lower priority to be matched last.

In terms of overloading the get count call, that probably would be the 
best route in terms of changing rule manager behavior.  The only thing I 
am having a hard time seeing is how the rule manager would be able to 
distinguish between low priority and high priority filter rules, or is 
this something that new keywords would be added to the parser for?

I just put out version 6 of the patches.  Essentially I have reduced the 
size of the rule manager to being used only on insertion without any 
rule location specified.  The one thing to keep in mind with this rule 
manager is that the rule at table size - 1 is always going to be the 
lowest priority rule.  So if it was reserved for unspecified rules it 
would be easy to use something like that to achieve an "auto-select" 
location that the driver could then reassign as rules were added to it.

Thanks,

Alex


^ permalink raw reply

* Re: [net-next-2.6 PATCH] can: make struct can_proto const
From: David Miller @ 2011-05-04 21:08 UTC (permalink / raw)
  To: socketcan-fJ+pQTUTwRTk1uMJSBkQmQ
  Cc: socketcan-core-0fE9KPoRgkgATYTw5x5z8w,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <4DC1B86A.40308-fJ+pQTUTwRTk1uMJSBkQmQ@public.gmane.org>

From: Oliver Hartkopp <socketcan-fJ+pQTUTwRTk1uMJSBkQmQ@public.gmane.org>
Date: Wed, 04 May 2011 22:34:50 +0200

> On 04.05.2011 06:40, Kurt Van Dijck wrote:
>> commit 53914b67993c724cec585863755c9ebc8446e83b had the
>> same message. That commit did put everything in place but
>> did not make can_proto const itself.
>> 
>> Signed-off-by: Kurt Van Dijck <kurt.van.dijck-/BeEPy95v10@public.gmane.org>
>> 
> 
> Acked-by: Oliver Hartkopp <socketcan-fJ+pQTUTwRTk1uMJSBkQmQ@public.gmane.org>

Applied.

^ permalink raw reply

* Re: [net-next-2.6 PATCH] can: rename can_try_module_get to can_get_proto
From: David Miller @ 2011-05-04 21:08 UTC (permalink / raw)
  To: socketcan-fJ+pQTUTwRTk1uMJSBkQmQ
  Cc: socketcan-core-0fE9KPoRgkgATYTw5x5z8w,
	netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <4DC1B89A.3080904-fJ+pQTUTwRTk1uMJSBkQmQ@public.gmane.org>

From: Oliver Hartkopp <socketcan-fJ+pQTUTwRTk1uMJSBkQmQ@public.gmane.org>
Date: Wed, 04 May 2011 22:35:38 +0200

> On 04.05.2011 06:42, Kurt Van Dijck wrote:
>> can: rename can_try_module_get to can_get_proto
>> 
>> can_try_module_get does return a struct can_proto.
>> The name explains what is done in so much detail that a caller
>> may not notice that a struct can_proto is locked/unlocked.
>> 
>> Signed-off-by: Kurt Van Dijck <kurt.van.dijck-/BeEPy95v10@public.gmane.org>
> 
> Acked-by: Oliver Hartkopp <socketcan-fJ+pQTUTwRTk1uMJSBkQmQ@public.gmane.org>

Applied.

^ permalink raw reply

* Re: [net-next-2.6 0/9][pull request] Intel Wired LAN Driver Update
From: Jeff Kirsher @ 2011-05-04 21:14 UTC (permalink / raw)
  To: David Miller
  Cc: netdev@vger.kernel.org, gospo@redhat.com, bphilips@novell.com
In-Reply-To: <20110504.135904.229767743.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 550 bytes --]

On Wed, 2011-05-04 at 13:59 -0700, David Miller wrote:
> BTW, Jeff, can I get some kind of time frame on when the ethtool_ops->phys_id
> conversions/removals for e100, e1000, and igb will be coming my way?
> 
> It's starting to be on the order of weeks that I've been waiting for
> this so that I can apply Stephen Hemminger's patch which gets rid of
> the old ->phys_id interfaces entirely.
> 
> Thanks.

I have the remaining patches in testing currently.  Should have the
remaining three patches to you by this weekend (if not sooner).

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply

* Re: [net-next-2.6 0/9][pull request] Intel Wired LAN Driver Update
From: David Miller @ 2011-05-04 21:16 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, bphilips
In-Reply-To: <1304543684.2998.6.camel@jtkirshe-MOBL1>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Wed, 04 May 2011 14:14:44 -0700

> On Wed, 2011-05-04 at 13:59 -0700, David Miller wrote:
>> BTW, Jeff, can I get some kind of time frame on when the ethtool_ops->phys_id
>> conversions/removals for e100, e1000, and igb will be coming my way?
>> 
>> It's starting to be on the order of weeks that I've been waiting for
>> this so that I can apply Stephen Hemminger's patch which gets rid of
>> the old ->phys_id interfaces entirely.
>> 
>> Thanks.
> 
> I have the remaining patches in testing currently.  Should have the
> remaining three patches to you by this weekend (if not sooner).

Thanks!

^ permalink raw reply

* [PDFv2] virtio-spec: 64 bit features, used/avail event
From: Michael S. Tsirkin @ 2011-05-04 21:17 UTC (permalink / raw)
  To: linux-kernel, qemu-devel, kvm
  Cc: Krishna Kumar, Carsten Otte, lguest, Shirley Ma, linux-s390,
	netdev, habanero, Rusty Russell, Heiko Carstens, virtualization,
	steved, Christian Borntraeger, Tom Lendacky, Martin Schwidefsky,
	linux390
In-Reply-To: <20110504203256.GA20819@redhat.com>

People asked for a pdf for a new spec, so here it is:
http://userweb.kernel.org/~mst/virtio-spec-event-idx-v2.pdf

Guest and host implementation can be found here:
git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git vhost-net-next-event-idx-v1
git://git.kernel.org/pub/scm/linux/kernel/git/mst/qemu-kvm.git virtio-net-event-idx-v1

Description reposted below:

I'm working on a patchset (to follow shortly)
that modified the notificatin hand-off in virtio to be basically
like Xen: each side published an index, the other side only triggers
an event when it crosses that index value
(Xen event indexes start at 1, ours start at 0 for
backward-compatiblity, but that's minor).

Especially for testing, it is very convenient to have
separate feature bits for this change in used and available
ring; since we've run out of bits in the 32 bit field,
I added another 32 bit and bit 31 enables that.

I started with using both flags and indexes in parallel,
but switched to doing either-or: this means we do
not need to tweak memory access ordering as index access just
replaces flags access.

A note on naming: the index replacing avail->flags is named
used_event, the index replacing used->flags is named
avail_event to stress the fact that these actually
point into the other side of the ring:
event is triggered when avail->idx == used->avail_event + 1
and when used->idx == avail->used_event + 1, respectively.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

-- 
MST

^ permalink raw reply

* Re: [PATCH 0/4] [RFC] virtio-net: Improve small packet performance
From: Michael S. Tsirkin @ 2011-05-04 21:23 UTC (permalink / raw)
  To: Krishna Kumar2; +Cc: davem, eric.dumazet, kvm, netdev, rusty
In-Reply-To: <OFAEA8C363.C13C507B-ON65257886.0051F71F-65257886.0052217F@in.ibm.com>

On Wed, May 04, 2011 at 08:29:44PM +0530, Krishna Kumar2 wrote:
> "Michael S. Tsirkin" <mst@redhat.com> wrote on 05/04/2011 08:16:22 PM:
> 
> > > A. virtio:
> > >    - Provide a API to get available number of slots.
> > >
> > > B. virtio-net:
> > >    - Remove stop/start txq's and associated callback.
> > >    - Pre-calculate the number of slots needed to transmit
> > >      the skb in xmit_skb and bail out early if enough space
> > >      is not available. My testing shows that 2.5-3% of
> > >      packets are benefited by using this API.
> > >    - Do not drop skbs but instead return TX_BUSY like other
> > >      drivers.
> > >    - When returning EBUSY, set a per-txq variable to indicate
> > >      to dev_queue_xmit() whether to restart xmits on this txq.
> > >
> > > C. net/sched/sch_generic.c:
> > >    Since virtio-net now returns EBUSY, the skb is requeued to
> > >    gso_skb. This allows adding the addional check for restart
> > >    xmits in just the slow-path (the first re-queued packet
> > >    case of dequeue_skb, where it checks for gso_skb) before
> > >    deciding whether to call the driver or not.
> > >
> > > Patch was also tested between two servers with Emulex OneConnect
> > > 10G cards to confirm there is no regression. Though the patch is
> > > an attempt to improve only small packet performance, there was
> > > improvement for 1K, 2K and also 16K both in BW and SD. Results
> > > from Guest -> Remote Host (BW in Mbps) for 1K and 16K I/O sizes:
> > >
> > > ________________________________________________________
> > >          I/O Size: 1K
> > > #   BW1   BW2 (%)      SD1   SD2 (%)
> > > ________________________________________________________
> > > 1   1226   3313 (170.2)   6.6   1.9 (-71.2)
> > > 2   3223   7705 (139.0)   18.0   7.1 (-60.5)
> > > 4   7223   8716 (20.6)   36.5   29.7 (-18.6)
> > > 8   8689   8693 (0)   131.5   123.0 (-6.4)
> > > 16   8059   8285 (2.8)   578.3   506.2 (-12.4)
> > > 32   7758   7955 (2.5)   2281.4   2244.2 (-1.6)
> > > 64   7503   7895 (5.2)   9734.0   9424.4 (-3.1)
> > > 96   7496   7751 (3.4)   21980.9   20169.3 (-8.2)
> > > 128   7389   7741 (4.7)   40467.5   34995.5 (-13.5)
> > > ________________________________________________________
> > > Summary:   BW: 16.2%   SD: -10.2%
> > >
> > > ________________________________________________________
> > >          I/O Size: 16K
> > > #   BW1   BW2 (%)      SD1   SD2 (%)
> > > ________________________________________________________
> > > 1   6684   7019 (5.0)   1.1   1.1 (0)
> > > 2   7674   7196 (-6.2)   5.0   4.8 (-4.0)
> > > 4   7358   8032 (9.1)   21.3   20.4 (-4.2)
> > > 8   7393   8015 (8.4)   82.7   82.0 (-.8)
> > > 16   7958   8366 (5.1)   283.2   310.7 (9.7)
> > > 32   7792   8113 (4.1)   1257.5   1363.0 (8.3)
> > > 64   7673   8040 (4.7)   5723.1   5812.4 (1.5)
> > > 96   7462   7883 (5.6)   12731.8   12119.8 (-4.8)
> > > 128   7338   7800 (6.2)   21331.7   21094.7 (-1.1)
> > > ________________________________________________________
> > > Summary:   BW: 4.6%   SD: -1.5%
> > >
> > > Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
> > > ---
> >
> > So IIUC, we delay transmit by an arbitrary value and hope
> > that the host is done with the packets by then?
> 
> Not "hope" exactly. If the device is not ready, then
> the packet is requeued. The main idea is to avoid
> drops/stop/starts, etc.

Yes, I see that, definitely. I guess it's a win if the
interrupt takes at least a jiffy to arrive anyway,
and a loss if not. Is there some reason interrupts
might be delayed until the next jiffy?

> > Interesting.
> >
> > I am currently testing an approach where
> > we tell the host explicitly to interrupt us only after
> > a large part of the queue is empty.
> > With 256 entries in a queue, we should get 1 interrupt per
> > on the order of 100 packets which does not seem like a lot.
> >
> > I can post it, mind testing this?
> 
> Sure.
> 
> - KK

Just posted. Would appreciate feedback.

-- 
MST

^ permalink raw reply

* Re: [ethtool PATCH 4/4] v5 Add RX packet classification interface
From: Ben Hutchings @ 2011-05-04 21:54 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Dimitris Michailidis, davem@davemloft.net, Kirsher, Jeffrey T,
	netdev@vger.kernel.org
In-Reply-To: <4DC1C018.5090709@intel.com>

On Wed, 2011-05-04 at 14:07 -0700, Alexander Duyck wrote:
> On 5/4/2011 11:45 AM, Ben Hutchings wrote:
[...]
> > Please can you confirm that the location specified for
> > ETHTOOL_SRXCLSRLINS will indeed be used as a priority in case of
> > overlapping filters?
> >
> > Ben.
> >
> 
> The ixgbe approach should be nearly identical in terms of how the 
> priorities are based on the location of the filters.

OK, good.

> The original 
> version from Santwona had the rule manager breaking the rules up into 7 
> sections so that rules that specified fewer fields would be near the end 
> of the list.  I'm pretty sure that was all due to priorities from what I 
> could see in the niu driver since the filters that covered wider ranges 
> were being made lower priority to be matched last.

That would make sense.

> In terms of overloading the get count call, that probably would be the 
> best route in terms of changing rule manager behavior.  The only thing I 
> am having a hard time seeing is how the rule manager would be able to 
> distinguish between low priority and high priority filter rules, or is 
> this something that new keywords would be added to the parser for?

Right, there would have to be keywords to specify that.

> I just put out version 6 of the patches.  Essentially I have reduced the 
> size of the rule manager to being used only on insertion without any 
> rule location specified.  The one thing to keep in mind with this rule 
> manager is that the rule at table size - 1 is always going to be the 
> lowest priority rule.  So if it was reserved for unspecified rules it 
> would be easy to use something like that to achieve an "auto-select" 
> location that the driver could then reassign as rules were added to it.

I don't think any location value within the physical table size should
select this special behaviour.  The special location values for
auto-select (with whatever priority) should be distinct from all the
physical location values.

I still need to review your patches but it sounds like they will be
ready to apply.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Re: [PATCH 05/18] virtio: used event index interface
From: Tom Lendacky @ 2011-05-04 21:56 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-kernel, Rusty Russell, Carsten Otte, Christian Borntraeger,
	linux390, Martin Schwidefsky, Heiko Carstens, Shirley Ma, lguest,
	virtualization, netdev, linux-s390, kvm, Krishna Kumar, steved,
	habanero
In-Reply-To: <aacced20b8109a615ee24c1bde6d9f5702850111.1304541919.git.mst@redhat.com>

On Wednesday, May 04, 2011 03:51:09 PM Michael S. Tsirkin wrote:
> Define a new feature bit for the guest to utilize a used_event index
> (like Xen) instead if a flag bit to enable/disable interrupts.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  include/linux/virtio_ring.h |    9 +++++++++
>  1 files changed, 9 insertions(+), 0 deletions(-)
> 
> diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
> index e4d144b..f5c1b75 100644
> --- a/include/linux/virtio_ring.h
> +++ b/include/linux/virtio_ring.h
> @@ -29,6 +29,10 @@
>  /* We support indirect buffer descriptors */
>  #define VIRTIO_RING_F_INDIRECT_DESC	28
> 
> +/* The Guest publishes the used index for which it expects an interrupt
> + * at the end of the avail ring. Host should ignore the avail->flags
> field. */ +#define VIRTIO_RING_F_USED_EVENT_IDX	29
> +
>  /* Virtio ring descriptors: 16 bytes.  These can chain together via
> "next". */ struct vring_desc {
>  	/* Address (guest-physical). */
> @@ -83,6 +87,7 @@ struct vring {
>   *	__u16 avail_flags;
>   *	__u16 avail_idx;
>   *	__u16 available[num];
> + *	__u16 used_event_idx;
>   *
>   *	// Padding to the next align boundary.
>   *	char pad[];
> @@ -93,6 +98,10 @@ struct vring {
>   *	struct vring_used_elem used[num];
>   * };
>   */
> +/* We publish the used event index at the end of the available ring.
> + * It is at the end for backwards compatibility. */
> +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
> +
>  static inline void vring_init(struct vring *vr, unsigned int num, void *p,
>  			      unsigned long align)
>  {

You should update the vring_size procedure to account for the extra field at 
the end of the available ring by change the "(2 + num)" to "(3 + num)":
    return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)

Tom

^ permalink raw reply

* Lockdep splat for rt8169
From: Ben Greear @ 2011-05-04 21:56 UTC (permalink / raw)
  To: netdev; +Cc: Francois Romieu

This is from un-modified 39-rc6, with the slub cmpxcg patch posted today on lkml.

Seems to be the first post 2.6.38 kernel that will boot stable
on this system!

I previously reported the timer issue, but perhaps the lock
debugging will help.

[ INFO: inconsistent lock state ]
2.6.39-rc6+ #22
---------------------------------
inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage.
udevd/2410 [HC1[1]:SC0[0]:HE0:SE1] takes:
  (/home/greearb/git/linux-2.6/net/core/link_watch.c:35){?.-...}, at: [<c0445bb6>] del_timer_sync+0x0/0xa7
{HARDIRQ-ON-W} state was registered at:
   [<c046293d>] __lock_acquire+0x2b5/0xb77
   [<c046329f>] lock_acquire+0xa0/0xc4
   [<c044548a>] run_timer_softirq+0x142/0x232
   [<c043fbaa>] __do_softirq+0xb1/0x17c
irq event stamp: 138
hardirqs last  enabled at (137): [<c04b01b9>] get_page_from_freelist+0x28c/0x3c9
hardirqs last disabled at (138): [<c07f5927>] common_interrupt+0x27/0x40
softirqs last  enabled at (0): [<c0438c53>] copy_process+0x301/0xf1b
softirqs last disabled at (0): [<  (null)>]   (null)

other info that might help us debug this:
2 locks held by udevd/2410:
  #0:  (&sig->cred_guard_mutex){+.+.+.}, at: [<c04ea205>] prepare_bprm_creds+0x25/0x5a
  #1:  (&(&tp->lock)->rlock){-.-...}, at: [<f88c34ca>] __rtl8169_check_link_status+0x25/0xb4 [r8169]

stack backtrace:
Pid: 2410, comm: udevd Not tainted 2.6.39-rc6+ #22
Call Trace:
  [<c04617dc>] valid_state+0x131/0x144
  [<c04618de>] mark_lock+0xef/0x1de
  [<c0461fa1>] ? print_irq_inversion_bug+0xf0/0xf0
  [<c04628cf>] __lock_acquire+0x247/0xb77
  [<c0445bb6>] ? get_next_timer_interrupt+0x1d2/0x1d2
  [<c046329f>] lock_acquire+0xa0/0xc4
  [<c0445bb6>] ? get_next_timer_interrupt+0x1d2/0x1d2
  [<c0445bef>] del_timer_sync+0x39/0xa7
  [<c0445bb6>] ? get_next_timer_interrupt+0x1d2/0x1d2
  [<c07579ae>] linkwatch_schedule_work+0x6d/0x88
  [<c0757a76>] linkwatch_fire_event+0xad/0xb2
  [<c075ee8c>] netif_carrier_on+0x28/0x39
  [<f88c34f9>] __rtl8169_check_link_status+0x54/0xb4 [r8169]
  [<f88c3e22>] rtl8169_interrupt+0x1f4/0x298 [r8169]
  [<c0483fe0>] handle_irq_event_percpu+0x58/0x17b
  [<c0484134>] handle_irq_event+0x31/0x48
  [<c0485c2f>] ? handle_percpu_irq+0x40/0x40
  [<c0485cbe>] handle_edge_irq+0x8f/0xb1
  <IRQ>  [<c0403afe>] ? do_IRQ+0x3c/0x95
  [<c07f592e>] ? common_interrupt+0x2e/0x40
  [<c04b00d8>] ? get_page_from_freelist+0x1ab/0x3c9
  [<c04b0bd6>] ? __alloc_pages_nodemask+0x60e/0x66f
  [<c05b6061>] ? blk_finish_plug+0x12/0x2d
  [<c04254b1>] ? pte_alloc_one+0x1c/0x37
  [<c04c1fb7>] ? __pte_alloc+0x1d/0xf3
  [<c04c425f>] ? handle_mm_fault+0xee/0x150
  [<c04c4518>] ? __get_user_pages+0x257/0x39b
  [<c04c46d5>] ? get_user_pages+0x39/0x40
  [<c04eb064>] ? get_arg_page+0x35/0x8e
  [<c05cfc9c>] ? strnlen_user+0x20/0x3e
  [<c04eb1a8>] ? copy_strings+0xeb/0x1b3
  [<c04eb291>] ? copy_strings_kernel+0x21/0x30
  [<c04eb674>] ? do_execve+0x11d/0x22d
  [<c040830a>] ? sys_execve+0x31/0x54
  [<c07f5452>] ? ptregs_execve+0x12/0x20
  [<c07f535c>] ? sysenter_do_call+0x12/0x38
------------[ cut here ]------------
WARNING: at /home/greearb/git/linux-2.6/kernel/timer.c:1012 del_timer_sync+0x90/0xa7()
Hardware name: To Be Filled By O.E.M.
Modules linked in: veth 8021q garp stp llc fuse macvlan pktgen coretemp hwmon nfs lockd fscache auth_rpcgss nfs_acl sunrpc ipv6 uinput arc4 ecb 
snd_hda_codec_realtek ath9k snd_hda_intel snd_hda_codec mac80211 snd_hwdep snd_seq ath9k_common snd_seq_device snd_pcm microcode ath9k_hw ath cfg80211 snd_timer 
iTCO_wdt i2c_i801 pcspkr snd serio_raw iTCO_vendor_support r8169 soundcore snd_page_alloc mii i915 drm_kms_helper drm i2c_algo_bit video [last unloaded: 
scsi_wait_scan]
Pid: 2410, comm: udevd Not tainted 2.6.39-rc6+ #22
Call Trace:
  [<c043a0e2>] warn_slowpath_common+0x6a/0x7f
  [<c0445c46>] ? del_timer_sync+0x90/0xa7
  [<c043a10b>] warn_slowpath_null+0x14/0x18
  [<c0445c46>] del_timer_sync+0x90/0xa7
  [<c07579ae>] linkwatch_schedule_work+0x6d/0x88
  [<c0757a76>] linkwatch_fire_event+0xad/0xb2
  [<c075ee8c>] netif_carrier_on+0x28/0x39
  [<f88c34f9>] __rtl8169_check_link_status+0x54/0xb4 [r8169]
  [<f88c3e22>] rtl8169_interrupt+0x1f4/0x298 [r8169]
  [<c0483fe0>] handle_irq_event_percpu+0x58/0x17b
  [<c0484134>] handle_irq_event+0x31/0x48
  [<c0485c2f>] ? handle_percpu_irq+0x40/0x40
  [<c0485cbe>] handle_edge_irq+0x8f/0xb1
  <IRQ>  [<c0403afe>] ? do_IRQ+0x3c/0x95
  [<c07f592e>] ? common_interrupt+0x2e/0x40
  [<c04b00d8>] ? get_page_from_freelist+0x1ab/0x3c9
  [<c04b0bd6>] ? __alloc_pages_nodemask+0x60e/0x66f
  [<c05b6061>] ? blk_finish_plug+0x12/0x2d
  [<c04254b1>] ? pte_alloc_one+0x1c/0x37
  [<c04c1fb7>] ? __pte_alloc+0x1d/0xf3
  [<c04c425f>] ? handle_mm_fault+0xee/0x150
  [<c04c4518>] ? __get_user_pages+0x257/0x39b
  [<c04c46d5>] ? get_user_pages+0x39/0x40
  [<c04eb064>] ? get_arg_page+0x35/0x8e
  [<c05cfc9c>] ? strnlen_user+0x20/0x3e
  [<c04eb1a8>] ? copy_strings+0xeb/0x1b3
  [<c04eb291>] ? copy_strings_kernel+0x21/0x30
  [<c04eb674>] ? do_execve+0x11d/0x22d
  [<c040830a>] ? sys_execve+0x31/0x54
  [<c07f5452>] ? ptregs_execve+0x12/0x20
  [<c07f535c>] ? sysenter_do_call+0x12/0x38
---[ end trace 5bb67ffe27b66e2e ]---

-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com


^ permalink raw reply

* Re: [PATCH 09/18] virtio: use avail_event index
From: Tom Lendacky @ 2011-05-04 21:58 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: linux-kernel, Rusty Russell, Carsten Otte, Christian Borntraeger,
	linux390, Martin Schwidefsky, Heiko Carstens, Shirley Ma, lguest,
	virtualization, netdev, linux-s390, kvm, Krishna Kumar, steved,
	habanero
In-Reply-To: <8bba6a0a8eee17e741c5155b04ff1b1c9f34bf94.1304541919.git.mst@redhat.com>


On Wednesday, May 04, 2011 03:51:47 PM Michael S. Tsirkin wrote:
> Use the new avail_event feature to reduce the number
> of exits from the guest.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>  drivers/virtio/virtio_ring.c |   39
> ++++++++++++++++++++++++++++++++++++++- 1 files changed, 38 insertions(+),
> 1 deletions(-)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 3a3ed75..262dfe6 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -82,6 +82,15 @@ struct vring_virtqueue
>  	/* Host supports indirect buffers */
>  	bool indirect;
> 
> +	/* Host publishes avail event idx */
> +	bool event;
> +
> +	/* Is kicked_avail below valid? */
> +	bool kicked_avail_valid;
> +
> +	/* avail idx value we already kicked. */
> +	u16 kicked_avail;
> +
>  	/* Number of free buffers */
>  	unsigned int num_free;
>  	/* Head of free buffer list. */
> @@ -228,6 +237,12 @@ add_head:
>  	 * new available array entries. */
>  	virtio_wmb();
>  	vq->vring.avail->idx++;
> +	/* If the driver never bothers to kick in a very long while,
> +	 * avail index might wrap around. If that happens, invalidate
> +	 * kicked_avail index we stored. TODO: make sure all drivers
> +	 * kick at least once in 2^16 and remove this. */
> +	if (unlikely(vq->vring.avail->idx == vq->kicked_avail))
> +		vq->kicked_avail_valid = true;

vq->kicked_avail_valid should be set to false here.

Tom

> 
>  	pr_debug("Added buffer head %i to %p\n", head, vq);
>  	END_USE(vq);
> @@ -236,6 +251,23 @@ add_head:
>  }
>  EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
> 
> +
> +static bool vring_notify(struct vring_virtqueue *vq)
> +{
> +	u16 old, new;
> +	bool v;
> +	if (!vq->event)
> +		return !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
> +
> +	v = vq->kicked_avail_valid;
> +	old = vq->kicked_avail;
> +	new = vq->kicked_avail = vq->vring.avail->idx;
> +	vq->kicked_avail_valid = true;
> +	if (unlikely(!v))
> +		return true;
> +	return vring_need_event(vring_avail_event(&vq->vring), new, old);
> +}
> +
>  void virtqueue_kick(struct virtqueue *_vq)
>  {
>  	struct vring_virtqueue *vq = to_vvq(_vq);
> @@ -244,7 +276,7 @@ void virtqueue_kick(struct virtqueue *_vq)
>  	/* Need to update avail index before checking if we should notify */
>  	virtio_mb();
> 
> -	if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
> +	if (vring_notify(vq))
>  		/* Prod other side to tell it about changes. */
>  		vq->notify(&vq->vq);
> 
> @@ -437,6 +469,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
>  	vq->vq.name = name;
>  	vq->notify = notify;
>  	vq->broken = false;
> +	vq->kicked_avail_valid = false;
> +	vq->kicked_avail = 0;
>  	vq->last_used_idx = 0;
>  	list_add_tail(&vq->vq.list, &vdev->vqs);
>  #ifdef DEBUG
> @@ -444,6 +478,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
>  #endif
> 
>  	vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
> +	vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_AVAIL_EVENT_IDX);
> 
>  	/* No callback?  Tell other side not to bother us. */
>  	if (!callback)
> @@ -482,6 +517,8 @@ void vring_transport_features(struct virtio_device
> *vdev) break;
>  		case VIRTIO_RING_F_USED_EVENT_IDX:
>  			break;
> +		case VIRTIO_RING_F_AVAIL_EVENT_IDX:
> +			break;
>  		default:
>  			/* We don't understand this bit. */
>  			clear_bit(i, vdev->features);

^ permalink raw reply

* Re: [PATCH] net: ehea: fix wrongly-reported supported modes
From: Kleber Sacilotto de Souza @ 2011-05-04 21:59 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: netdev
In-Reply-To: <1304458460.2873.18.camel@bwh-desktop>

On Tue, 2011-05-03 at 22:34 +0100, Ben Hutchings wrote:
> On Tue, 2011-05-03 at 21:16 +0100, Ben Hutchings wrote:
> > On Tue, 2011-05-03 at 16:42 -0300, Kleber Sacilotto de Souza wrote:
> > > Currently EHEA reports to ethtool as supporting 10000baseT_Full and
> > > FIBRE independent of the hardware configuration. However, these
> > > capabilities should be reported only if the physical port and
> > > the medium support them, which is the case where the physical port
> > > is connected at 10Gb.
> > >
> > > Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
> > > ---
> > >  drivers/net/ehea/ehea_ethtool.c |   21 ++++++++++++++-------
> > >  1 files changed, 14 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/drivers/net/ehea/ehea_ethtool.c b/drivers/net/ehea/ehea_ethtool.c
> > > index 3e2e734..04716c2 100644
> > > --- a/drivers/net/ehea/ehea_ethtool.c
> > > +++ b/drivers/net/ehea/ehea_ethtool.c
> > > @@ -55,15 +55,22 @@ static int ehea_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
> > >  		cmd->duplex = -1;
> > >  	}
> > > 
> > > -	cmd->supported = (SUPPORTED_10000baseT_Full | SUPPORTED_1000baseT_Full
> > > -		       | SUPPORTED_100baseT_Full |  SUPPORTED_100baseT_Half
> > > -		       | SUPPORTED_10baseT_Full | SUPPORTED_10baseT_Half
> > > -		       | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
> > > +	cmd->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full
> > > +		       | SUPPORTED_100baseT_Half | SUPPORTED_10baseT_Full
> > > +		       | SUPPORTED_10baseT_Half | SUPPORTED_Autoneg);
> > > 
> > > -	cmd->advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_Autoneg
> > > -			 | ADVERTISED_FIBRE);
> > > +	cmd->advertising = ADVERTISED_Autoneg;
> > > +
> > > +	if (cmd->speed == SPEED_10000) {
> > > +		cmd->supported |= (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE);
> > > +		cmd->advertising |= (ADVERTISED_10000baseT_Full | ADVERTISED_FIBRE);
> > > +		cmd->port = PORT_FIBRE;
> > > +	} else {
> > > +		cmd->supported |= SUPPORTED_TP;
> > > +		cmd->advertising |= (ADVERTISED_1000baseT_Full | ADVERTISED_TP);
> > > +		cmd->port = PORT_TP;
> > > +	}
> > 
> > This doesn't make any sense.  If the current speed is 10G, then the
> > driver also claims to support speeds of 10M, 100M, 1G and 10G.  But then
>                                                                ^
>                                                            on fibre
> 
> > if the speed actually is <10G, the driver claims to support TP.  What's
> > going on here?

You are right. This patch was based on very vague hardware specs that
wasn't making much sense. I have more details about the hardware now, I
will send another patch soon.

> > 
> > (Also, claiming to support BASE-T modes on non-TP media is bogus, though
> > I understand why people are doing it.)
> > 
> > Ben.
> > 
> > > -	cmd->port = PORT_FIBRE;
> > >  	cmd->autoneg = port->autoneg == 1 ? AUTONEG_ENABLE : AUTONEG_DISABLE;
> > > 
> > >  	return 0;
> > 
> 

-- 
Kleber S. Souza
IBM Linux Technology Center


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox