Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH][Revised Log] PPPoE: Fix flush/close races.
From: Michal Ostrowski @ 2009-10-26 20:06 UTC (permalink / raw)
  To: linux-ppp, netdev, Cyrill Gorcunov, Denys Fedoryschenko
In-Reply-To: <1256587405-7073-1-git-send-email-mostrows@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1081 bytes --]

Be more careful about the state of pointers during tear-down.
The "pppoe_dev" field can only be looked at safely while holding socket locks.
This subsequently allows for the flush_lock to be killed.

We depend on the PPPOX_CONNECTED state to tell us that that those fields are
valid, so whoever clears that state (pppox_unbind_sock()) is responsible for
the dev_put() call.

We also have to ensure that we delete_item() on all sockets before they are
cleaned up.

The need for these changes has been exposed by scenarios wherein namespace
bindings of ethernet devices change while there are ongoing PPPoE sessions,
which resulted in oopses due to unusual socket connection termination paths,
exposing these issues.

Signed-off-by: Michal Ostrowski <mostrows@gmail.com>
Reviewed-by: Cyril Gorcunov <gorcunov@gmail.com>
Reported-by: Denys Fedoryschenko <denys@visp.net.lb>
Tested-by: Denys Fedoryschenko <denys@visp.net.lb>
---
 drivers/net/pppoe.c |  129 +++++++++++++++++++++++++++------------------------
 1 files changed, 68 insertions(+), 61 deletions(-)

[-- Attachment #2: 0001-PPPoE-Fix-flush-close-races.patch --]
[-- Type: text/x-patch, Size: 7871 bytes --]

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index 7cbf6f9..2559991 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -111,9 +111,6 @@ struct pppoe_net {
 	rwlock_t hash_lock;
 };
 
-/* to eliminate a race btw pppoe_flush_dev and pppoe_release */
-static DEFINE_SPINLOCK(flush_lock);
-
 /*
  * PPPoE could be in the following stages:
  * 1) Discovery stage (to obtain remote MAC and Session ID)
@@ -303,45 +300,48 @@ static void pppoe_flush_dev(struct net_device *dev)
 	write_lock_bh(&pn->hash_lock);
 	for (i = 0; i < PPPOE_HASH_SIZE; i++) {
 		struct pppox_sock *po = pn->hash_table[i];
+		struct sock *sk;
 
-		while (po != NULL) {
-			struct sock *sk;
-			if (po->pppoe_dev != dev) {
+		while (po) {
+			while (po && po->pppoe_dev != dev) {
 				po = po->next;
-				continue;
 			}
+
+			if (!po)
+				break;
+
 			sk = sk_pppox(po);
-			spin_lock(&flush_lock);
-			po->pppoe_dev = NULL;
-			spin_unlock(&flush_lock);
-			dev_put(dev);
 
 			/* We always grab the socket lock, followed by the
-			 * hash_lock, in that order.  Since we should
-			 * hold the sock lock while doing any unbinding,
-			 * we need to release the lock we're holding.
-			 * Hold a reference to the sock so it doesn't disappear
-			 * as we're jumping between locks.
+			 * hash_lock, in that order.  Since we should hold the
+			 * sock lock while doing any unbinding, we need to
+			 * release the lock we're holding.  Hold a reference to
+			 * the sock so it doesn't disappear as we're jumping
+			 * between locks.
 			 */
 
 			sock_hold(sk);
-
 			write_unlock_bh(&pn->hash_lock);
 			lock_sock(sk);
 
-			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+			if (po->pppoe_dev == dev
+			    && sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
 				pppox_unbind_sock(sk);
 				sk->sk_state = PPPOX_ZOMBIE;
 				sk->sk_state_change(sk);
+				po->pppoe_dev = NULL;
+				dev_put(dev);
 			}
 
 			release_sock(sk);
 			sock_put(sk);
 
-			/* Restart scan at the beginning of this hash chain.
-			 * While the lock was dropped the chain contents may
-			 * have changed.
+			/* Restart the process from the start of the current
+			 * hash chain. We dropped locks so the world may have
+			 * change from underneath us.
 			 */
+
+			BUG_ON(pppoe_pernet(dev_net(dev)) == NULL);
 			write_lock_bh(&pn->hash_lock);
 			po = pn->hash_table[i];
 		}
@@ -388,11 +388,16 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
 	struct pppox_sock *po = pppox_sk(sk);
 	struct pppox_sock *relay_po;
 
+	/* Backlog receive. Semantics of backlog rcv preclude any code from
+	 * executing in lock_sock()/release_sock() bounds; meaning sk->sk_state
+	 * can't change.
+	 */
+
 	if (sk->sk_state & PPPOX_BOUND) {
 		ppp_input(&po->chan, skb);
 	} else if (sk->sk_state & PPPOX_RELAY) {
-		relay_po = get_item_by_addr(dev_net(po->pppoe_dev),
-						&po->pppoe_relay);
+		relay_po = get_item_by_addr(sock_net(sk),
+					    &po->pppoe_relay);
 		if (relay_po == NULL)
 			goto abort_kfree;
 
@@ -447,6 +452,10 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
 		goto drop;
 
 	pn = pppoe_pernet(dev_net(dev));
+
+	/* Note that get_item does a sock_hold(), so sk_pppox(po)
+	 * is known to be safe.
+	 */
 	po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
 	if (!po)
 		goto drop;
@@ -561,6 +570,7 @@ static int pppoe_release(struct socket *sock)
 	struct sock *sk = sock->sk;
 	struct pppox_sock *po;
 	struct pppoe_net *pn;
+	struct net *net = NULL;
 
 	if (!sk)
 		return 0;
@@ -571,44 +581,28 @@ static int pppoe_release(struct socket *sock)
 		return -EBADF;
 	}
 
+	po = pppox_sk(sk);
+
+	if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+		dev_put(po->pppoe_dev);
+		po->pppoe_dev = NULL;
+	}
+
 	pppox_unbind_sock(sk);
 
 	/* Signal the death of the socket. */
 	sk->sk_state = PPPOX_DEAD;
 
-	/*
-	 * pppoe_flush_dev could lead to a race with
-	 * this routine so we use flush_lock to eliminate
-	 * such a case (we only need per-net specific data)
-	 */
-	spin_lock(&flush_lock);
-	po = pppox_sk(sk);
-	if (!po->pppoe_dev) {
-		spin_unlock(&flush_lock);
-		goto out;
-	}
-	pn = pppoe_pernet(dev_net(po->pppoe_dev));
-	spin_unlock(&flush_lock);
+	net = sock_net(sk);
+	pn = pppoe_pernet(net);
 
 	/*
 	 * protect "po" from concurrent updates
 	 * on pppoe_flush_dev
 	 */
-	write_lock_bh(&pn->hash_lock);
+	delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote,
+		    po->pppoe_ifindex);
 
-	po = pppox_sk(sk);
-	if (stage_session(po->pppoe_pa.sid))
-		__delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote,
-				po->pppoe_ifindex);
-
-	if (po->pppoe_dev) {
-		dev_put(po->pppoe_dev);
-		po->pppoe_dev = NULL;
-	}
-
-	write_unlock_bh(&pn->hash_lock);
-
-out:
 	sock_orphan(sk);
 	sock->sk = NULL;
 
@@ -625,8 +619,9 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 	struct sock *sk = sock->sk;
 	struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr;
 	struct pppox_sock *po = pppox_sk(sk);
-	struct net_device *dev;
+	struct net_device *dev = NULL;
 	struct pppoe_net *pn;
+	struct net *net = NULL;
 	int error;
 
 	lock_sock(sk);
@@ -652,12 +647,14 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 	/* Delete the old binding */
 	if (stage_session(po->pppoe_pa.sid)) {
 		pppox_unbind_sock(sk);
+		pn = pppoe_pernet(sock_net(sk));
+		delete_item(pn, po->pppoe_pa.sid,
+			    po->pppoe_pa.remote, po->pppoe_ifindex);
 		if (po->pppoe_dev) {
-			pn = pppoe_pernet(dev_net(po->pppoe_dev));
-			delete_item(pn, po->pppoe_pa.sid,
-				po->pppoe_pa.remote, po->pppoe_ifindex);
 			dev_put(po->pppoe_dev);
+			po->pppoe_dev = NULL;
 		}
+
 		memset(sk_pppox(po) + 1, 0,
 		       sizeof(struct pppox_sock) - sizeof(struct sock));
 		sk->sk_state = PPPOX_NONE;
@@ -666,16 +663,15 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 	/* Re-bind in session stage only */
 	if (stage_session(sp->sa_addr.pppoe.sid)) {
 		error = -ENODEV;
-		dev = dev_get_by_name(sock_net(sk), sp->sa_addr.pppoe.dev);
+		net = sock_net(sk);
+		dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev);
 		if (!dev)
-			goto end;
+			goto err_put;
 
 		po->pppoe_dev = dev;
 		po->pppoe_ifindex = dev->ifindex;
-		pn = pppoe_pernet(dev_net(dev));
-		write_lock_bh(&pn->hash_lock);
+		pn = pppoe_pernet(net);
 		if (!(dev->flags & IFF_UP)) {
-			write_unlock_bh(&pn->hash_lock);
 			goto err_put;
 		}
 
@@ -683,6 +679,7 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 		       &sp->sa_addr.pppoe,
 		       sizeof(struct pppoe_addr));
 
+		write_lock_bh(&pn->hash_lock);
 		error = __set_item(pn, po);
 		write_unlock_bh(&pn->hash_lock);
 		if (error < 0)
@@ -696,8 +693,11 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 		po->chan.ops = &pppoe_chan_ops;
 
 		error = ppp_register_net_channel(dev_net(dev), &po->chan);
-		if (error)
+		if (error) {
+			delete_item(pn, po->pppoe_pa.sid,
+				    po->pppoe_pa.remote, po->pppoe_ifindex);
 			goto err_put;
+		}
 
 		sk->sk_state = PPPOX_CONNECTED;
 	}
@@ -915,6 +915,14 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
 	struct pppoe_hdr *ph;
 	int data_len = skb->len;
 
+	/* The higher-level PPP code (ppp_unregister_channel()) ensures the PPP
+	 * xmit operations conclude prior to an unregistration call.  Thus
+	 * sk->sk_state cannot change, so we don't need to do lock_sock().
+	 * But, we also can't do a lock_sock since that introduces a potential
+	 * deadlock as we'd reverse the lock ordering used when calling
+	 * ppp_unregister_channel().
+	 */
+
 	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
 		goto abort;
 
@@ -944,7 +952,6 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
 			po->pppoe_pa.remote, NULL, data_len);
 
 	dev_queue_xmit(skb);
-
 	return 1;
 
 abort:

^ permalink raw reply related

* Re: [PATCH] PPPoE: Fix flush/close races.
From: Denys Fedoryschenko @ 2009-10-26 20:05 UTC (permalink / raw)
  To: Cyrill Gorcunov; +Cc: Michal Ostrowski, linux-ppp, netdev, Eric Dumazet
In-Reply-To: <20091026195933.GC5321@lenovo>

On Monday 26 October 2009 21:59:33 Cyrill Gorcunov wrote:
> [Michal Ostrowski - Mon, Oct 26, 2009 at 02:51:52PM -0500]
>
> | Be more careful about the state of pointers during tear-down.
> | The "pppoe_dev" field can only be looked at safely while holding socket
> | locks. This subsequently allows for the flush_lock to be killed.
> |
> | We depend on the PPPOX_CONNECTED state to tell us that that those fields
> | are valid, so whoever clears that state (pppox_unbind_sock()) is
> | responsible for the dev_put() call.
> |
> | We also have to ensure that we delete_item() on all sockets before they
> | are cleaned up.
> |
> | The need for these changes has been exposed by scenarios wherein
> | namespace bindings of ethernet devices change while there are ongoing
> | PPPoE sessions, which resulted in oopses due to unusual socket connection
> | termination paths, exposing these issues.
> |
> | Signed-off-by: Michal Ostrowski <mostrows@gmail.com>
> | Reviewed-by: Cyril Gorcunov <gorcunov@gmail.com>
>
> ...
>
> Thanks a lot Michal!
>
> I think we should add as well
>
> Reported-by: Denys Fedoryschenko <denys@visp.net.lb>
> Tested-by: Denys Fedoryschenko <denys@visp.net.lb>
>
> 	-- Cyrill

Yes, till now everything working perfectly. Confirming :-)

^ permalink raw reply

* Re: [PATCH] PPPoE: Fix flush/close races.
From: Cyrill Gorcunov @ 2009-10-26 19:59 UTC (permalink / raw)
  To: Michal Ostrowski; +Cc: linux-ppp, netdev, Denys Fedoryschenko, Eric Dumazet
In-Reply-To: <e6d1cecd0910261251w721b8258n7dfc1bac9d01af8b@mail.gmail.com>

[Michal Ostrowski - Mon, Oct 26, 2009 at 02:51:52PM -0500]
| Be more careful about the state of pointers during tear-down.
| The "pppoe_dev" field can only be looked at safely while holding socket locks.
| This subsequently allows for the flush_lock to be killed.
| 
| We depend on the PPPOX_CONNECTED state to tell us that that those fields are
| valid, so whoever clears that state (pppox_unbind_sock()) is responsible for
| the dev_put() call.
| 
| We also have to ensure that we delete_item() on all sockets before they are
| cleaned up.
| 
| The need for these changes has been exposed by scenarios wherein namespace
| bindings of ethernet devices change while there are ongoing PPPoE sessions,
| which resulted in oopses due to unusual socket connection termination paths,
| exposing these issues.
| 
| Signed-off-by: Michal Ostrowski <mostrows@gmail.com>
| Reviewed-by: Cyril Gorcunov <gorcunov@gmail.com>
...

Thanks a lot Michal!

I think we should add as well

Reported-by: Denys Fedoryschenko <denys@visp.net.lb>
Tested-by: Denys Fedoryschenko <denys@visp.net.lb>

	-- Cyrill

^ permalink raw reply

* [PATCH] PPPoE: Fix flush/close races.
From: Michal Ostrowski @ 2009-10-26 19:51 UTC (permalink / raw)
  To: linux-ppp, netdev, Cyrill Gorcunov
In-Reply-To: <1256586498-6230-1-git-send-email-mostrows@gmail.com>

[-- Attachment #1: Type: text/plain, Size: 975 bytes --]

Be more careful about the state of pointers during tear-down.
The "pppoe_dev" field can only be looked at safely while holding socket locks.
This subsequently allows for the flush_lock to be killed.

We depend on the PPPOX_CONNECTED state to tell us that that those fields are
valid, so whoever clears that state (pppox_unbind_sock()) is responsible for
the dev_put() call.

We also have to ensure that we delete_item() on all sockets before they are
cleaned up.

The need for these changes has been exposed by scenarios wherein namespace
bindings of ethernet devices change while there are ongoing PPPoE sessions,
which resulted in oopses due to unusual socket connection termination paths,
exposing these issues.

Signed-off-by: Michal Ostrowski <mostrows@gmail.com>
Reviewed-by: Cyril Gorcunov <gorcunov@gmail.com>
---
 drivers/net/pppoe.c |  129 +++++++++++++++++++++++++++------------------------
 1 files changed, 68 insertions(+), 61 deletions(-)

[-- Attachment #2: 0001-PPPoE-Fix-flush-close-races.patch --]
[-- Type: text/x-patch, Size: 7871 bytes --]

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index 7cbf6f9..2559991 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -111,9 +111,6 @@ struct pppoe_net {
 	rwlock_t hash_lock;
 };
 
-/* to eliminate a race btw pppoe_flush_dev and pppoe_release */
-static DEFINE_SPINLOCK(flush_lock);
-
 /*
  * PPPoE could be in the following stages:
  * 1) Discovery stage (to obtain remote MAC and Session ID)
@@ -303,45 +300,48 @@ static void pppoe_flush_dev(struct net_device *dev)
 	write_lock_bh(&pn->hash_lock);
 	for (i = 0; i < PPPOE_HASH_SIZE; i++) {
 		struct pppox_sock *po = pn->hash_table[i];
+		struct sock *sk;
 
-		while (po != NULL) {
-			struct sock *sk;
-			if (po->pppoe_dev != dev) {
+		while (po) {
+			while (po && po->pppoe_dev != dev) {
 				po = po->next;
-				continue;
 			}
+
+			if (!po)
+				break;
+
 			sk = sk_pppox(po);
-			spin_lock(&flush_lock);
-			po->pppoe_dev = NULL;
-			spin_unlock(&flush_lock);
-			dev_put(dev);
 
 			/* We always grab the socket lock, followed by the
-			 * hash_lock, in that order.  Since we should
-			 * hold the sock lock while doing any unbinding,
-			 * we need to release the lock we're holding.
-			 * Hold a reference to the sock so it doesn't disappear
-			 * as we're jumping between locks.
+			 * hash_lock, in that order.  Since we should hold the
+			 * sock lock while doing any unbinding, we need to
+			 * release the lock we're holding.  Hold a reference to
+			 * the sock so it doesn't disappear as we're jumping
+			 * between locks.
 			 */
 
 			sock_hold(sk);
-
 			write_unlock_bh(&pn->hash_lock);
 			lock_sock(sk);
 
-			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+			if (po->pppoe_dev == dev
+			    && sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
 				pppox_unbind_sock(sk);
 				sk->sk_state = PPPOX_ZOMBIE;
 				sk->sk_state_change(sk);
+				po->pppoe_dev = NULL;
+				dev_put(dev);
 			}
 
 			release_sock(sk);
 			sock_put(sk);
 
-			/* Restart scan at the beginning of this hash chain.
-			 * While the lock was dropped the chain contents may
-			 * have changed.
+			/* Restart the process from the start of the current
+			 * hash chain. We dropped locks so the world may have
+			 * change from underneath us.
 			 */
+
+			BUG_ON(pppoe_pernet(dev_net(dev)) == NULL);
 			write_lock_bh(&pn->hash_lock);
 			po = pn->hash_table[i];
 		}
@@ -388,11 +388,16 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb)
 	struct pppox_sock *po = pppox_sk(sk);
 	struct pppox_sock *relay_po;
 
+	/* Backlog receive. Semantics of backlog rcv preclude any code from
+	 * executing in lock_sock()/release_sock() bounds; meaning sk->sk_state
+	 * can't change.
+	 */
+
 	if (sk->sk_state & PPPOX_BOUND) {
 		ppp_input(&po->chan, skb);
 	} else if (sk->sk_state & PPPOX_RELAY) {
-		relay_po = get_item_by_addr(dev_net(po->pppoe_dev),
-						&po->pppoe_relay);
+		relay_po = get_item_by_addr(sock_net(sk),
+					    &po->pppoe_relay);
 		if (relay_po == NULL)
 			goto abort_kfree;
 
@@ -447,6 +452,10 @@ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev,
 		goto drop;
 
 	pn = pppoe_pernet(dev_net(dev));
+
+	/* Note that get_item does a sock_hold(), so sk_pppox(po)
+	 * is known to be safe.
+	 */
 	po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex);
 	if (!po)
 		goto drop;
@@ -561,6 +570,7 @@ static int pppoe_release(struct socket *sock)
 	struct sock *sk = sock->sk;
 	struct pppox_sock *po;
 	struct pppoe_net *pn;
+	struct net *net = NULL;
 
 	if (!sk)
 		return 0;
@@ -571,44 +581,28 @@ static int pppoe_release(struct socket *sock)
 		return -EBADF;
 	}
 
+	po = pppox_sk(sk);
+
+	if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+		dev_put(po->pppoe_dev);
+		po->pppoe_dev = NULL;
+	}
+
 	pppox_unbind_sock(sk);
 
 	/* Signal the death of the socket. */
 	sk->sk_state = PPPOX_DEAD;
 
-	/*
-	 * pppoe_flush_dev could lead to a race with
-	 * this routine so we use flush_lock to eliminate
-	 * such a case (we only need per-net specific data)
-	 */
-	spin_lock(&flush_lock);
-	po = pppox_sk(sk);
-	if (!po->pppoe_dev) {
-		spin_unlock(&flush_lock);
-		goto out;
-	}
-	pn = pppoe_pernet(dev_net(po->pppoe_dev));
-	spin_unlock(&flush_lock);
+	net = sock_net(sk);
+	pn = pppoe_pernet(net);
 
 	/*
 	 * protect "po" from concurrent updates
 	 * on pppoe_flush_dev
 	 */
-	write_lock_bh(&pn->hash_lock);
+	delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote,
+		    po->pppoe_ifindex);
 
-	po = pppox_sk(sk);
-	if (stage_session(po->pppoe_pa.sid))
-		__delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote,
-				po->pppoe_ifindex);
-
-	if (po->pppoe_dev) {
-		dev_put(po->pppoe_dev);
-		po->pppoe_dev = NULL;
-	}
-
-	write_unlock_bh(&pn->hash_lock);
-
-out:
 	sock_orphan(sk);
 	sock->sk = NULL;
 
@@ -625,8 +619,9 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 	struct sock *sk = sock->sk;
 	struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr;
 	struct pppox_sock *po = pppox_sk(sk);
-	struct net_device *dev;
+	struct net_device *dev = NULL;
 	struct pppoe_net *pn;
+	struct net *net = NULL;
 	int error;
 
 	lock_sock(sk);
@@ -652,12 +647,14 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 	/* Delete the old binding */
 	if (stage_session(po->pppoe_pa.sid)) {
 		pppox_unbind_sock(sk);
+		pn = pppoe_pernet(sock_net(sk));
+		delete_item(pn, po->pppoe_pa.sid,
+			    po->pppoe_pa.remote, po->pppoe_ifindex);
 		if (po->pppoe_dev) {
-			pn = pppoe_pernet(dev_net(po->pppoe_dev));
-			delete_item(pn, po->pppoe_pa.sid,
-				po->pppoe_pa.remote, po->pppoe_ifindex);
 			dev_put(po->pppoe_dev);
+			po->pppoe_dev = NULL;
 		}
+
 		memset(sk_pppox(po) + 1, 0,
 		       sizeof(struct pppox_sock) - sizeof(struct sock));
 		sk->sk_state = PPPOX_NONE;
@@ -666,16 +663,15 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 	/* Re-bind in session stage only */
 	if (stage_session(sp->sa_addr.pppoe.sid)) {
 		error = -ENODEV;
-		dev = dev_get_by_name(sock_net(sk), sp->sa_addr.pppoe.dev);
+		net = sock_net(sk);
+		dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev);
 		if (!dev)
-			goto end;
+			goto err_put;
 
 		po->pppoe_dev = dev;
 		po->pppoe_ifindex = dev->ifindex;
-		pn = pppoe_pernet(dev_net(dev));
-		write_lock_bh(&pn->hash_lock);
+		pn = pppoe_pernet(net);
 		if (!(dev->flags & IFF_UP)) {
-			write_unlock_bh(&pn->hash_lock);
 			goto err_put;
 		}
 
@@ -683,6 +679,7 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 		       &sp->sa_addr.pppoe,
 		       sizeof(struct pppoe_addr));
 
+		write_lock_bh(&pn->hash_lock);
 		error = __set_item(pn, po);
 		write_unlock_bh(&pn->hash_lock);
 		if (error < 0)
@@ -696,8 +693,11 @@ static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr,
 		po->chan.ops = &pppoe_chan_ops;
 
 		error = ppp_register_net_channel(dev_net(dev), &po->chan);
-		if (error)
+		if (error) {
+			delete_item(pn, po->pppoe_pa.sid,
+				    po->pppoe_pa.remote, po->pppoe_ifindex);
 			goto err_put;
+		}
 
 		sk->sk_state = PPPOX_CONNECTED;
 	}
@@ -915,6 +915,14 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
 	struct pppoe_hdr *ph;
 	int data_len = skb->len;
 
+	/* The higher-level PPP code (ppp_unregister_channel()) ensures the PPP
+	 * xmit operations conclude prior to an unregistration call.  Thus
+	 * sk->sk_state cannot change, so we don't need to do lock_sock().
+	 * But, we also can't do a lock_sock since that introduces a potential
+	 * deadlock as we'd reverse the lock ordering used when calling
+	 * ppp_unregister_channel().
+	 */
+
 	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
 		goto abort;
 
@@ -944,7 +952,6 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
 			po->pppoe_pa.remote, NULL, data_len);
 
 	dev_queue_xmit(skb);
-
 	return 1;
 
 abort:

^ permalink raw reply related

* Re: [PATCH] virtio-net: fix data corruption with OOM
From: Michael S. Tsirkin @ 2009-10-26 19:34 UTC (permalink / raw)
  To: Rusty Russell; +Cc: virtualization, kvm, netdev
In-Reply-To: <20091026184243.GA26473@redhat.com>

On Mon, Oct 26, 2009 at 08:42:43PM +0200, Michael S. Tsirkin wrote:
> On Mon, Oct 26, 2009 at 12:11:51PM +1030, Rusty Russell wrote:
> > On Mon, 26 Oct 2009 03:33:40 am Michael S. Tsirkin wrote:
> > > virtio net used to unlink skbs from send queues on error,
> > > but ever since 48925e372f04f5e35fec6269127c62b2c71ab794
> > > we do not do this. This causes guest data corruption and crashes
> > > with vhost since net core can requeue the skb or free it without
> > > it being taken off the list.
> > > 
> > > This patch fixes this by queueing the skb after successfull
> > > transmit.
> > 
> > I originally thought that this was racy: as soon as we do add_buf, we need to
> > make sure we're ready for the callback (for virtio_pci, it's ->kick, but we
> > shouldn't rely on that).
> 
> Modified the guest slightly, and I am getting crashes again.
> I didn't have time to debug this, but based on previous experience,
> I reverted 48925e372f04f5e35fec6269127c62b2c71ab794,
> and the crash went away.
> Rusty, what do you say we just revert 48925e372f04f5e35fec6269127c62b2c71ab794
> for now?

Hmm. Can't reproduce the crash anymore.
There is a small chance that the problem was my error,
so I guess I should try to reproduce and debug this,
after all.

^ permalink raw reply

* 2.6.32-rc5-git3: Reported regressions 2.6.30 -> 2.6.31
From: Rafael J. Wysocki @ 2009-10-26 19:26 UTC (permalink / raw)
  To: Linux Kernel Mailing List
  Cc: Andrew Morton, Linus Torvalds, Natalie Protasevich,
	Kernel Testers List, Network Development, Linux ACPI,
	Linux PM List, Linux SCSI List, Linux Wireless List, DRI

This message contains a list of some regressions introduced between 2.6.30 and
2.6.31, for which there are no fixes in the mainline I know of.  If any of them
have been fixed already, please let me know.

If you know of any other unresolved regressions introduced between 2.6.30
and 2.6.31, please let me know either and I'll add them to the list.
Also, please let me know if any of the entries below are invalid.

Each entry from the list will be sent additionally in an automatic reply to
this message with CCs to the people involved in reporting and handling the
issue.


Listed regressions statistics:

  Date          Total  Pending  Unresolved
  ----------------------------------------
  2009-10-26      170       37          32
  2009-10-12      161       45          35
  2009-10-02      151       49          42
  2009-09-06      123       34          27
  2009-08-26      108       33          26
  2009-08-20      102       32          29
  2009-08-10       89       27          24
  2009-08-02       76       36          28
  2009-07-27       70       51          43
  2009-07-07       35       25          21
  2009-06-29       22       22          15


Unresolved regressions
----------------------

Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14476
Subject		: Unable to handle kernel paging request in nfs_write_mapping
Submitter	: Stephan von Krawczynski <skraw@ithnet.com>
Date		: 2009-10-14 9:53 (13 days old)
References	: http://marc.info/?l=linux-kernel&m=125551421405656&w=4
Handled-By	: Trond Myklebust <Trond.Myklebust@netapp.com>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14474
Subject		: restorecond going crazy on 2.6.31.4 - inotify regression?
Submitter	: Robert Hancock <hancockrwd@gmail.com>
Date		: 2009-10-16 0:03 (11 days old)
References	: http://marc.info/?l=linux-kernel&m=125565159520489&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14446
Subject		: battery status info broken/useless in 2.6.32-rc3 - MSI PR200 (possibly others, too)
Submitter	: Eddy Petrișor <eddy.petrisor+linbug@gmail.com>
Date		: 2009-10-20 08:25 (7 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14417
Subject		: [Regression] Wireless driver iwlagn+iwlcore doesn't work after resume (needs reloading)
Submitter	: Eddy Petrișor <eddy.petrisor+linbug@gmail.com>
Date		: 2009-10-16 11:07 (11 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14402
Subject		: Atheros ath9k module is not working with 2.6.31.1 on an Acer Extensa 7630EZ
Submitter	: Bernhard <berndl81@gmx.at>
Date		: 2009-10-14 11:17 (13 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14400
Subject		: disable/enable wlan broken with ath5k
Submitter	: Daniel Bumke <danielbumke@gmail.com>
Date		: 2009-10-13 12:35 (14 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14391
Subject		: use after free of struct powernow_k8_data
Submitter	: Michal Schmidt <mschmidt@redhat.com>
Date		: 2009-09-24 14:51 (33 days old)
References	: http://marc.info/?l=linux-kernel&m=125380383515615&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14388
Subject		: keyboard under X with 2.6.31
Submitter	: Frédéric L. W. Meunier <fredlwm@gmail.com>
Date		: 2009-10-07 20:19 (20 days old)
First-Bad-Commit: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=e043e42bdb66885b3ac10d27a01ccb9972e2b0a3
References	: http://marc.info/?l=linux-kernel&m=125494753228217&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14385
Subject		: DMAR regression in 2.6.31 leads to ext4 corruption?
Submitter	: Andy Isaacson <adi@hexapodia.org>
Date		: 2009-10-08 23:56 (19 days old)
References	: http://marc.info/?l=linux-kernel&m=125504643703877&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14294
Subject		: kernel BUG at drivers/ide/ide-disk.c:187
Submitter	: Santiago Garcia Mantinan <manty@manty.net>
Date		: 2009-09-30 11:05 (27 days old)
References	: http://marc.info/?l=linux-kernel&m=125430926311466&w=4
Handled-By	: David Miller <davem@davemloft.net>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14267
Subject		: Disassociating atheros wlan
Submitter	: Kristoffer Ericson <kristoffer.ericson@gmail.com>
Date		: 2009-09-24 10:16 (33 days old)
References	: http://marc.info/?l=linux-kernel&m=125378723723384&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14265
Subject		: ifconfig: page allocation failure. order:5, mode:0x8020 w/ e100
Submitter	: Karol Lewandowski <karol.k.lewandowski@gmail.com>
Date		: 2009-09-15 12:05 (42 days old)
References	: http://marc.info/?l=linux-kernel&m=125301636509517&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14257
Subject		: Not able to boot on 32 bit System
Submitter	: Rishikesh <risrajak@linux.vnet.ibm.com>
Date		: 2009-09-21 15:25 (36 days old)
References	: http://marc.info/?l=linux-kernel&m=125354604314412&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14256
Subject		: kernel BUG at fs/ext3/super.c:435
Submitter	: Mikael Pettersson <mikpe@it.uu.se>
Date		: 2009-09-21 7:29 (36 days old)
References	: http://marc.info/?l=linux-kernel&m=125351816109264&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14252
Subject		: WARNING: at include/linux/skbuff.h:1382 w/ e1000
Submitter	: Stephan von Krawczynski <skraw@ithnet.com>
Date		: 2009-09-20 11:26 (37 days old)
References	: http://marc.info/?l=linux-kernel&m=125344599006033&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14249
Subject		: BUG: oops in gss_validate on 2.6.31
Submitter	: Bastian Blank <bastian@waldi.eu.org>
Date		: 2009-09-16 10:29 (41 days old)
References	: http://marc.info/?l=linux-kernel&m=125309700417283&w=4
Handled-By	: Trond Myklebust <trond.myklebust@fys.uio.no>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14248
Subject		: 2.6.31 wireless: WARNING: at net/wireless/ibss.c:34
Submitter	: Jurriaan <thunder8@xs4all.nl>
Date		: 2009-09-13 7:32 (44 days old)
References	: http://marc.info/?l=linux-kernel&m=125282721113553&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14204
Subject		: MCE prevent booting on my computer(pentium iii @500Mhz)
Submitter	: GNUtoo <GNUtoo@no-log.org>
Date		: 2009-09-21 20:36 (36 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14181
Subject		: b43 causes panic at ifconfig down / shutdown
Submitter	: Jeremy Huddleston <jeremyhu@freedesktop.org>
Date		: 2009-09-15 18:34 (42 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14157
Subject		: end_request: I/O error, dev cciss/cXdX, sector 0
Submitter	:  <jiri.harcarik@gmail.com>
Date		: 2009-09-11 07:42 (46 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14141
Subject		: order 2 page allocation failures in iwlagn
Submitter	: Frans Pop <elendil@planet.nl>
Date		: 2009-09-06 7:40 (51 days old)
First-Bad-Commit: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=2ff05b2b4eac2e63d345fc731ea151a060247f53
References	: http://marc.info/?l=linux-kernel&m=125222287419691&w=4
		  http://lkml.org/lkml/2009/10/2/86
		  http://lkml.org/lkml/2009/10/5/24
		  http://lkml.indiana.edu/hypermail/linux/kernel/0910.1/01395.html
Handled-By	: Pekka Enberg <penberg@cs.helsinki.fi>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14114
Subject		: Tuning a saa7134 based card is broken in kernel 2.6.31-rc7
Submitter	: Tsvety Petrov <Tsvetoslav.Petrov@itron.com>
Date		: 2009-09-03 21:06 (54 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14090
Subject		: WARNING: at fs/notify/inotify/inotify_user.c:394
Submitter	: Joerg Platte <bugzilla@jako.ping.de>
Date		: 2009-08-30 15:21 (58 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14058
Subject		: Oops in fsnotify
Submitter	: Grant Wilson <grant.wilson@zen.co.uk>
Date		: 2009-08-20 15:48 (68 days old)
References	: http://marc.info/?l=linux-kernel&m=125078450923133&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=13987
Subject		: Received NMI interrupt at resume
Submitter	: Christian Casteyde <casteyde.christian@free.fr>
Date		: 2009-08-15 07:55 (73 days old)
First-Bad-Commit: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=f41f3f373dd72344c65d801d6381fe83ef3a2c54


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=13943
Subject		: WARNING: at net/mac80211/mlme.c:2292 with ath5k
Submitter	: Fabio Comolli <fabio.comolli@gmail.com>
Date		: 2009-08-06 20:15 (82 days old)
References	: http://marc.info/?l=linux-kernel&m=124958978600600&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=13941
Subject		: x86 Geode issue
Submitter	: Martin-Éric Racine <q-funk@iki.fi>
Date		: 2009-08-03 12:58 (85 days old)
References	: http://marc.info/?l=linux-kernel&m=124930434732481&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=13906
Subject		: Huawei E169 GPRS connection causes Ooops
Submitter	: Clemens Eisserer <linuxhippy@gmail.com>
Date		: 2009-08-04 09:02 (84 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=13836
Subject		: suspend script fails, related to stdout?
Submitter	: Tomas M. <tmezzadra@gmail.com>
Date		: 2009-07-17 21:24 (102 days old)
References	: http://marc.info/?l=linux-kernel&m=124785853811667&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=13809
Subject		: oprofile: possible circular locking dependency detected
Submitter	: Jerome Marchand <jmarchan@redhat.com>
Date		: 2009-07-22 13:35 (97 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=13733
Subject		: 2.6.31-rc2: irq 16: nobody cared
Submitter	: Niel Lambrechts <niel.lambrechts@gmail.com>
Date		: 2009-07-06 18:32 (113 days old)
References	: http://marc.info/?l=linux-kernel&m=124690524027166&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=13645
Subject		: NULL pointer dereference at (null) (level2_spare_pgt)
Submitter	: poornima nayak <mpnayak@linux.vnet.ibm.com>
Date		: 2009-06-17 17:56 (132 days old)
References	: http://lkml.org/lkml/2009/6/17/194


Regressions with patches
------------------------

Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14340
Subject		: speedstep-ich driver not working in 2.6.31
Submitter	:  <dave.mueller@gmx.ch>
Date		: 2009-10-07 08:16 (20 days old)
Handled-By	: Eric Pielbug <e.a.b.piel@tudelft.nl>
		  Rusty Russell <rusty@rustcorp.com.au>
Patch		: http://patchwork.kernel.org/patch/54672/


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14258
Subject		: Memory leak in SCSI initialization
Submitter	: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date		: 2009-09-22 4:18 (35 days old)
References	: http://marc.info/?l=linux-kernel&m=125359311312243&w=4
Handled-By	: Michael Ellerman <michael@ellerman.id.au>
		  James Bottomley <James.Bottomley@suse.de>
Patch		: http://patchwork.kernel.org/patch/51412/


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14253
Subject		: Oops in driversbasefirmware_class
Submitter	: Lars Ericsson <Lars_Ericsson@telia.com>
Date		: 2009-09-16 20:44 (41 days old)
References	: http://lkml.org/lkml/2009/9/16/461
Handled-By	: Frederik Deweerdt <frederik.deweerdt@xprog.eu>
Patch		: http://patchwork.kernel.org/patch/49914/


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14137
Subject		: usb console regressions
Submitter	: Jason Wessel <jason.wessel@windriver.com>
Date		: 2009-09-05 21:08 (52 days old)
References	: http://marc.info/?l=linux-kernel&m=125218501310512&w=4
Handled-By	: Jason Wessel <jason.wessel@windriver.com>
Patch		: http://patchwork.kernel.org/patch/45953/
		  http://patchwork.kernel.org/patch/45952/


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14017
Subject		: _end symbol missing from Symbol.map
Submitter	: Hannes Reinecke <hare@suse.de>
Date		: 2009-08-13 6:45 (75 days old)
First-Bad-Commit: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=091e52c3551d3031343df24b573b770b4c6c72b6
References	: http://marc.info/?l=linux-kernel&m=125014649102253&w=4
Handled-By	: Hannes Reinecke <hare@suse.de>
Patch		: http://marc.info/?l=linux-kernel&m=125014649102253&w=4


For details, please visit the bug entries and follow the links given in
references.

As you can see, there is a Bugzilla entry for each of the listed regressions.
There also is a Bugzilla entry used for tracking the regressions introduced
between 2.6.30 and 2.6.31, unresolved as well as resolved, at:

http://bugzilla.kernel.org/show_bug.cgi?id=13615

Please let me know if there are any Bugzilla entries that should be added to
the list in there.

Thanks,
Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* performance regression in virtio-net in 2.6.32-rc4
From: Michael S. Tsirkin @ 2009-10-26 18:48 UTC (permalink / raw)
  To: Rusty Russell; +Cc: virtualization, kvm, netdev

Hi!
I noticed a performance regression in virtio net: going from
2.6.31 to 2.6.32-rc4 I see this, for guest to host communication:

[mst@tuck ~]$ ssh robin sh streamtest1
TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 11.0.0.3
(11.0.0.3) port 0 AF_INET : demo
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.20    7806.48

[mst@tuck ~]$ ssh robin sh streamtest1
TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 11.0.0.3
(11.0.0.3) port 0 AF_INET : demo
Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380  16384  16384    10.00    6814.60

Note: I had to revert 48925e372f04f5e35fec6269127c62b2c71ab794,
and I applied a patch
	virtio-pci: fix per-vq MSI-X request logic
which fixes a bug introduced by f68d24082e22ccee3077d11aeb6dc5354f0ca7f1.

Any tips on debugging this?

-- 
MST

^ permalink raw reply

* 2.6.32-rc5-git3: Reported regressions from 2.6.31
From: Rafael J. Wysocki @ 2009-10-26 18:45 UTC (permalink / raw)
  To: Linux Kernel Mailing List
  Cc: Adrian Bunk, Andrew Morton, Linus Torvalds, Natalie Protasevich,
	Kernel Testers List, Network Development, Linux ACPI,
	Linux PM List, Linux SCSI List, Linux Wireless List, DRI

This message contains a list of some regressions from 2.6.31, for which there
are no fixes in the mainline I know of.  If any of them have been fixed already,
please let me know.

If you know of any other unresolved regressions from 2.6.31, please let me know
either and I'll add them to the list.  Also, please let me know if any of the
entries below are invalid.

Each entry from the list will be sent additionally in an automatic reply to
this message with CCs to the people involved in reporting and handling the
issue.


Listed regressions statistics:

  Date          Total  Pending  Unresolved
  ----------------------------------------
  2009-10-26       66       42          37
  2009-10-12       48       31          27
  2009-10-02       22       15           9


Unresolved regressions
----------------------

Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14485
Subject		: System lockup running "cat /sys/kernel/debug/dri/0/i915_regs"
Submitter	: Miles Lane <miles.lane-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-26 4:00 (1 days old)
References	: http://marc.info/?l=linux-kernel&m=125652968117713&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14484
Subject		: no video output after suspend
Submitter	: Riccardo Magliocchetti <riccardo.magliocchetti-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-25 20:57 (2 days old)
References	: http://marc.info/?l=linux-kernel&m=125650430123713&w=4
Handled-By	: Jesse Barnes <jbarnes-Y1mF5jBUw70BENJcbMCuUQ@public.gmane.org>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14483
Subject		: WARNING: at drivers/base/sys.c:353 __sysdev_resume+0x54/0xca()
Submitter	: Justin Mattock <justinmattock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-25 19:58 (2 days old)
References	: http://marc.info/?l=linux-kernel&m=125650070420168&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14482
Subject		: kernel BUG at fs/dcache.c:670 +lvm +md +ext3
Submitter	: Alexander Clouter <alex-L4GPcECwBoDe9xe1eoZjHA@public.gmane.org>
Date		: 2009-10-23 10:30 (4 days old)
References	: http://lkml.org/lkml/2009/10/23/50


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14481
Subject		: umount blocked for more than 120 seconds after USB drive removal
Submitter	: Robert Hancock <hancockrwd-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-21 5:26 (6 days old)
References	: http://marc.info/?l=linux-kernel&m=125610280532245&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14479
Subject		: nfs oops
Submitter	: Egon Alter <egon.alter-hi6Y0CQ0nG0@public.gmane.org>
Date		: 2009-10-19 16:03 (8 days old)
References	: http://marc.info/?l=linux-kernel&m=125596822630410&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14477
Subject		: possible circular locking dependency in ISDN PPP
Submitter	: Tilman Schmidt <tilman-ZTO5kqT2PaM@public.gmane.org>
Date		: 2009-10-18 22:16 (9 days old)
References	: http://marc.info/?l=linux-kernel&m=125590423416087&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14473
Subject		: ATA related kernel warning after resume
Submitter	: Tino Keitel <tino.keitel-rAwCM5oiXHA@public.gmane.org>
Date		: 2009-10-14 6:55 (13 days old)
References	: http://marc.info/?l=linux-kernel&m=125550466624678&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14472
Subject		: EXT4 corruption
Submitter	: Shawn Starr <shawn.starr-bJEeYj9oJeDQT0dZR+AlfA@public.gmane.org>
Date		: 2009-10-13 2:07 (14 days old)
References	: http://marc.info/?l=linux-kernel&m=125539997508256&w=4
Handled-By	: Theodore Tso <tytso-3s7WtUTddSA@public.gmane.org>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14467
Subject		: Linker errors on ia64 with NR_CPUS=4096
Submitter	: Jeff Mahoney <jeffm-IBi9RG/b67k@public.gmane.org>
Date		: 2009-10-18 22:28 (9 days old)
First-Bad-Commit: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=34d76c41554a05425613d16efebb3069c4c545f0
References	: http://marc.info/?l=linux-kernel&m=125590493116720&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14466
Subject		: EFI boot on x86 fails in .32
Submitter	: Matthew Garrett <mjg59-1xO5oi07KQx4cg9Nei1l7Q@public.gmane.org>
Date		: 2009-10-20 0:34 (7 days old)
First-Bad-Commit: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=7bd867dfb4e0357e06a3211ab2bd0e714110def3
References	: http://marc.info/?l=linux-kernel&m=125599887314290&w=4
Handled-By	: Feng Tang <feng.tang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14442
Subject		: resume after hibernate: /dev/sdb drops and returns as /dev/sde
Submitter	: Duncan <1i5t5.duncan-j9pdmedNgrk@public.gmane.org>
Date		: 2009-10-20 01:52 (7 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14430
Subject		: sync() hangs in bdi_sched_wait
Submitter	: Petr Vandrovec <petr-vPk2MGR0e28uaRcfnNAh7A@public.gmane.org>
Date		: 2009-10-17 19:14 (10 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14415
Subject		: Reboot on kernel load
Submitter	: Brian Beardall <brian-sVkzCUl/XCrR7s880joybQ@public.gmane.org>
Date		: 2009-10-15 23:57 (12 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14408
Subject		: sysctl check failed
Submitter	: Peter Teoh <htmldeveloper-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-14 22:59 (13 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14406
Subject		: uvcvideo stopped work on Toshiba
Submitter	: okias <d.okias-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-14 19:08 (13 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14390
Subject		: "bind" a device to a driver doesn't not work anymore
Submitter	: Éric Piel <Eric.Piel-VkQ1JFuSMpfAbQlEx87xDw@public.gmane.org>
Date		: 2009-10-11 0:04 (16 days old)
References	: http://marc.info/?l=linux-kernel&m=125521979921241&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14389
Subject		: Build system issue
Submitter	: Peter Zijlstra <peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>
Date		: 2009-10-09 8:58 (18 days old)
First-Bad-Commit: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=575543347b5baed0ca927cb90ba8807396fe9cc9
References	: http://marc.info/?l=linux-kernel&m=125507914909152&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14387
Subject		: deadlock with fallocate
Submitter	: Thomas Neumann <tneumann-Rn4VEauK+AKRv+LV9MX5uipxlwaOVQ5f@public.gmane.org>
Date		: 2009-10-07 3:00 (20 days old)
References	: http://marc.info/?l=linux-kernel&m=125488495526471&w=4
Handled-By	: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14384
Subject		: tbench regression with 2.6.32-rc1
Submitter	: Zhang, Yanmin <yanmin_zhang-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Date		: 2009-10-09 9:51 (18 days old)
First-Bad-Commit: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=59abf02644c45f1591e1374ee7bb45dc757fcb88
References	: http://marc.info/?l=linux-kernel&m=125508216713138&w=4
Handled-By	: Peter Zijlstra <a.p.zijlstra-/NLkJaSkS4VmR6Xm/wNWPw@public.gmane.org>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14383
Subject		: hackbench regression with kernel 2.6.32-rc1
Submitter	: Zhang, Yanmin <yanmin_zhang-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Date		: 2009-10-09 9:19 (18 days old)
First-Bad-Commit: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=29cd8bae396583a2ee9a3340db8c5102acf9f6fd
References	: http://marc.info/?l=linux-kernel&m=125508007510274&w=4
Handled-By	: Peter Zijlstra <a.p.zijlstra-/NLkJaSkS4VmR6Xm/wNWPw@public.gmane.org>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14381
Subject		: iwlagn lost connection after s2ram (with warnings)
Submitter	: Carlos R. Mafra <crmafra2-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-07 14:20 (20 days old)
References	: http://marc.info/?l=linux-kernel&m=125492569119947&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14378
Subject		: Problems with net/core/skbuff.c
Submitter	: Massimo Cetra <mcetra-BBpJ+9iBSNKonA0d6jMUrA@public.gmane.org>
Date		: 2009-10-08 14:51 (19 days old)
References	: http://marc.info/?l=linux-kernel&m=125501488220358&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14376
Subject		: Kernel NULL pointer dereference/ kvm subsystem
Submitter	: Don Dupuis <dondster-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-06 14:38 (21 days old)
References	: http://marc.info/?l=linux-kernel&m=125484025021737&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14373
Subject		: Task blocked for more than 120 seconds
Submitter	: Zeno Davatz <zdavatz-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-02 10:16 (25 days old)
References	: http://marc.info/?l=linux-kernel&m=125447858618412&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14372
Subject		: ath5k wireless not working after suspend-resume - eeepc
Submitter	: Fabio Comolli <fabio.comolli-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-03 15:36 (24 days old)
References	: http://lkml.org/lkml/2009/10/3/91


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14355
Subject		: USB serial regression after 2.6.31.1 with Huawei E169 GSM modem
Submitter	: Benjamin Herrenschmidt <benh-XVmvHMARGAS8U2dJNN8I7kB+6BGkLq7r@public.gmane.org>
Date		: 2009-10-10 03:07 (17 days old)
References	: http://marc.info/?l=linux-kernel&m=125513456327542&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14354
Subject		: Bad corruption with 2.6.32-rc1 and upwards
Submitter	: Holger Freyther <zecke-MQnelBtSfJRAfugRpC6u6w@public.gmane.org>
Date		: 2009-10-09 15:42 (18 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14353
Subject		: BUG: sleeping function called from invalid context at kernel/mutex.c:280
Submitter	: Miles Lane <miles.lane-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-05 3:39 (22 days old)
References	: http://marc.info/?l=linux-kernel&m=125471432208671&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14352
Subject		: WARNING: at net/mac80211/scan.c:267
Submitter	: Maciej Rutecki <maciej.rutecki-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-08 00:30 (19 days old)
References	: http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2089#c7


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14334
Subject		: pcmcia suspend regression from 2.6.31.1 to 2.6.31.2 - Dell Inspiron 600m
Submitter	: Jose Marino <braket-PkbjNfxxIARBDgjK7y7TUQ@public.gmane.org>
Date		: 2009-10-06 15:44 (21 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14331
Subject		: Radeon XPRESS 200M: System hang with radeon DRI and Fedora 10 userspace unless DRI=off
Submitter	: Alex Villacis Lasso <avillaci-x0m+Mc+nT7uljOmnV8AmnkElSqmLX1BE@public.gmane.org>
Date		: 2009-10-06 00:29 (21 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14299
Subject		: oops in wireless, iwl3945 related?
Submitter	: Pavel Machek <pavel-+ZI9xUNit7I@public.gmane.org>
Date		: 2009-09-29 17:12 (28 days old)
References	: http://marc.info/?l=linux-kernel&m=125424439725743&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14298
Subject		: warning at manage.c:361 (set_irq_wake), matrix-keypad related?
Submitter	: Pavel Machek <pavel-+ZI9xUNit7I@public.gmane.org>
Date		: 2009-09-30 20:07 (27 days old)
References	: http://marc.info/?l=linux-kernel&m=125434130703538&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14297
Subject		: console resume broken since ba15ab0e8d
Submitter	: Sascha Hauer <s.hauer-bIcnvbaLZ9MEGnE8C9+IrQ@public.gmane.org>
Date		: 2009-09-30 15:11 (27 days old)
References	: http://marc.info/?l=linux-kernel&m=125432349404060&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14296
Subject		: spitz boots but suspend/resume is broken
Submitter	: Pavel Machek <pavel-+ZI9xUNit7I@public.gmane.org>
Date		: 2009-09-30 12:06 (27 days old)
References	: http://marc.info/?l=linux-kernel&m=125431244516449&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14277
Subject		: Caught 8-bit read from freed memory in b43 driver at association
Submitter	: Christian Casteyde <casteyde.christian-GANU6spQydw@public.gmane.org>
Date		: 2009-09-30 18:06 (27 days old)


Regressions with patches
------------------------

Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14480
Subject		: 2 locks held by cat -- running "find /sys | head -c 4" --> system hang
Submitter	: Miles Lane <miles.lane-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-20 16:11 (7 days old)
References	: http://marc.info/?l=linux-kernel&m=125605511728088&w=4
Handled-By	: Chris Wilson <chris-Y6uKTt2uX1cEflXRtASbqLVCufUGDwFn@public.gmane.org>
Patch		: http://patchwork.kernel.org/patch/54974/


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14380
Subject		: Video tearing/glitching with T400 laptops
Submitter	: Theodore Ts'o <tytso-3s7WtUTddSA@public.gmane.org>
Date		: 2009-10-02 22:40 (25 days old)
References	: http://marc.info/?l=linux-kernel&m=125452324520623&w=4
Handled-By	: Jesse Barnes <jbarnes-Y1mF5jBUw70BENJcbMCuUQ@public.gmane.org>
Patch		: http://marc.info/?l=linux-kernel&m=125591495325000&w=4


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14379
Subject		: ACPI Warning for _SB_.BAT0._BIF: Converted Buffer to expected String
Submitter	: Justin Mattock <justinmattock-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-08 21:46 (19 days old)
First-Bad-Commit: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=d9adc2e031bd22d5d9607a53a8d3b30e0b675f39
References	: http://marc.info/?l=linux-kernel&m=125504031328941&w=4
Handled-By	: Alexey Starikovskiy <astarikovskiy-l3A5Bk7waGM@public.gmane.org>
Patch		: http://bugzilla.kernel.org/attachment.cgi?id=23347


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14375
Subject		: Intel(R) I/OAT DMA Engine init failed
Submitter	: Alexander Beregalov <a.beregalov-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-02 9:46 (25 days old)
References	: http://marc.info/?l=linux-kernel&m=125447680016160&w=4
Handled-By	: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Patch		: http://patchwork.kernel.org/patch/51808/


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=14302
Subject		: Kernel panic on i386 machine when booting with profile=2
Submitter	: Shi, Alex <alex.shi-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Date		: 2009-10-01 3:23 (26 days old)
References	: http://marc.info/?l=linux-kernel&m=125436749607199&w=4
Handled-By	: Alex Shi <alex.shi-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Patch		: http://patchwork.kernel.org/patch/50813/


For details, please visit the bug entries and follow the links given in
references.

As you can see, there is a Bugzilla entry for each of the listed regressions.
There also is a Bugzilla entry used for tracking the regressions from 2.6.31,
unresolved as well as resolved, at:

http://bugzilla.kernel.org/show_bug.cgi?id=14230

Please let me know if there are any Bugzilla entries that should be added to
the list in there.

Thanks,
Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] virtio-net: fix data corruption with OOM
From: Michael S. Tsirkin @ 2009-10-26 18:42 UTC (permalink / raw)
  To: Rusty Russell; +Cc: virtualization, kvm, netdev
In-Reply-To: <200910261211.52148.rusty@rustcorp.com.au>

On Mon, Oct 26, 2009 at 12:11:51PM +1030, Rusty Russell wrote:
> On Mon, 26 Oct 2009 03:33:40 am Michael S. Tsirkin wrote:
> > virtio net used to unlink skbs from send queues on error,
> > but ever since 48925e372f04f5e35fec6269127c62b2c71ab794
> > we do not do this. This causes guest data corruption and crashes
> > with vhost since net core can requeue the skb or free it without
> > it being taken off the list.
> > 
> > This patch fixes this by queueing the skb after successfull
> > transmit.
> 
> I originally thought that this was racy: as soon as we do add_buf, we need to
> make sure we're ready for the callback (for virtio_pci, it's ->kick, but we
> shouldn't rely on that).

Modified the guest slightly, and I am getting crashes again.
I didn't have time to debug this, but based on previous experience,
I reverted 48925e372f04f5e35fec6269127c62b2c71ab794,
and the crash went away.
Rusty, what do you say we just revert 48925e372f04f5e35fec6269127c62b2c71ab794
for now?

How to reproduce: I used my vhost trees, and modified drivers/vhost/vhost.c :
-       vhost_workqueue = create_workqueue("vhost");
+       vhost_workqueue = create_singlethread_workqueue("vhost");

My guess is this modifies timing and uncovers more races,
but of course there is a possibility that the bug is in vhost.
Still, the fact that 2.6.31 and 48925e372f04f5e35fec6269127c62b2c71ab794
as a guest are both fine, this is a strong hint that
48925e372f04f5e35fec6269127c62b2c71ab794 is to blame.

[   24.555691] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008                      
[   24.556658] IP: [<ffffffffa003f1b1>] free_old_xmit_skbs+0x66/0xcd [virtio_net]                             
[   24.556658] PGD 3e9ee067 PUD 3f38d067 PMD 0                                                                
[   24.556658] Thread overran stack, or stack corrupted                                                       
[   24.556658] Oops: 0002 [#1] SMP                                                                            
[   24.556658] last sysfs file: /sys/devices/virtual/input/input1/capabilities/sw                             
[   24.556658] CPU 0                                                                                          
[   24.556658] Modules linked in: virtio_net virtio_blk virtio_pci virtio_ring virtio af_packet aacraid [last unloaded: scsi_wait_scan]                                                                                     
[   24.556658] Pid: 0, comm: swapper Tainted: G        W  2.6.32-rc4-net #6                                   
[   24.556658] RIP: 0010:[<ffffffffa003f1b1>]  [<ffffffffa003f1b1>] free_old_xmit_skbs+0x66/0xcd [virtio_net] 
[   24.556658] RSP: 0018:ffff880001c03d70  EFLAGS: 00010202                                                   
[   24.556658] RAX: ffff88003e951418 RBX: ffff88003e953398 RCX: 0000000000000000                              
[   24.556658] RDX: 0000000000000000 RSI: ffff880001c03d84 RDI: ffff88003e953398                              
[   24.556658] RBP: ffff880001c03db0 R08: ffff88003e2c949c R09: 00000000ffffffff                              
[   24.556658] R10: ffff880001c03f78 R11: 00000000fffbcc57 R12: ffff88003e65cdc0                              
[   24.556658] R13: 0000000000000000 R14: 2000000000000000 R15: ffff880001c03d84                              
[   24.556658] FS:  0000000000000000(0000) GS:ffff880001c00000(0000) knlGS:0000000000000000                   
[   24.556658] CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b                                              
[   24.556658] CR2: 0000000000000008 CR3: 000000003eee4000 CR4: 00000000000006b0                              
[   24.556658] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000                              
[   24.556658] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400                              
[   24.556658] Process swapper (pid: 0, threadinfo ffffffff8174e000, task ffffffff817c09f0)                   
[   24.556658] Stack:                                                                                         
[   24.556658]  0000000000000002 0000000000000000 0000000000000000 ffff88003e953398                           
[   24.556658] <0> ffff88003e953398 ffff88003e65cdc0 ffff88003e65c800 ffff88003e65ce70                        
[   24.556658] <0> ffff880001c03df0 ffffffffa003fb35 ffff88003e65cc28 ffff88003e953398                        
[   24.556658] Call Trace:                                                                                    
[   24.556658]  <IRQ>                                                                                         
[   24.556658]  [<ffffffffa003fb35>] start_xmit+0x38/0x15f [virtio_net]                                       
[   24.556658]  [<ffffffff813ff768>] dev_hard_start_xmit+0x26c/0x2d3                                          
[   24.556658]  [<ffffffff81412016>] sch_direct_xmit+0x5a/0x157                                               
[   24.556658]  [<ffffffff814121cf>] __qdisc_run+0xbc/0xdd                                                    
[   24.556658]  [<ffffffff813fce1c>] net_tx_action+0xc2/0x120                                                 
[   24.556658]  [<ffffffff81047efe>] __do_softirq+0xd8/0x192                                                  
[   24.556658]  [<ffffffff8100cb3c>] call_softirq+0x1c/0x28                                                   
[   24.556658]  [<ffffffff8100ddb7>] do_softirq+0x33/0x6b                                                     
[   24.556658]  [<ffffffff81047d5c>] irq_exit+0x36/0x75                                                       
[   24.556658]  [<ffffffff8100d692>] do_IRQ+0xa8/0xbf                                                         
[   24.556658]  [<ffffffff8100c3d3>] ret_from_intr+0x0/0xa                                                    
[   24.556658]  <EOI>                                                                                         
[   24.556658]  [<ffffffff81011de3>] ? default_idle+0x31/0x46                                                 
[   24.556658]  [<ffffffff81011dc5>] ? default_idle+0x13/0x46                                                 
[   24.556658]  [<ffffffff8100ae53>] ? cpu_idle+0x55/0x8d                                                     
[   24.556658]  [<ffffffff814d1982>] ? rest_init+0x66/0x68                                                    
[   24.556658]  [<ffffffff818adc5d>] ? start_kernel+0x360/0x36b                                               
[   24.556658]  [<ffffffff818ad29a>] ? x86_64_start_reservations+0xaa/0xae                                    
[   24.556658]  [<ffffffff818ad37f>] ? x86_64_start_kernel+0xe1/0xe8                                          
[   24.556658] Code: fc 26 00 00 00 75 75 41 ff 8c 24 c0 00 00 00 48 89 df 48 8b 13 48 8b 43 08 48 c7 03 00 00 00 00 48 c7 43 08 00 00 00 00 48 89 10 <48> 89 42 08 49 8b 54 24 20 8b 43 68 48 01 82 98 00 00 00 49 8b      
[   24.556658] RIP  [<ffffffffa003f1b1>] free_old_xmit_skbs+0x66/0xcd [virtio_net]                            
[   24.556658]  RSP <ffff880001c03d70>                                                                        
[   24.556658] CR2: 0000000000000008                                                                          
[   24.722629] ---[ end trace 6ac04221a0ae018b ]---                                                           
[   24.725010] Kernel panic - not syncing: Fatal exception in interrupt                                       
[   24.727696] Pid: 0, comm: swapper Tainted: G      D W  2.6.32-rc4-net #6                                   
[   24.730447] Call Trace:                                                                                    
[   24.732443]  <IRQ>  [<ffffffff814eb553>] panic+0x75/0x127                                                  
[   24.735097]  [<ffffffff814ee350>] oops_end+0xaa/0xba                                                       
[   24.737520]  [<ffffffff81029002>] no_context+0x1ea/0x1f9                                                   
[   24.740024]  [<ffffffff810291c4>] __bad_area_nosemaphore+0x1b3/0x1d9                                       
[   24.742779]  [<ffffffff810291f8>] bad_area_nosemaphore+0xe/0x10                                            
[   24.745399]  [<ffffffff814ef73c>] do_page_fault+0x186/0x2c3                                                
[   24.748009]  [<ffffffff814ed8bf>] page_fault+0x1f/0x30                                                     
[   24.750463]  [<ffffffffa003f1b1>] ? free_old_xmit_skbs+0x66/0xcd [virtio_net]                              
[   24.753299]  [<ffffffffa003fb35>] start_xmit+0x38/0x15f [virtio_net]                                       
[   24.755990]  [<ffffffff813ff768>] dev_hard_start_xmit+0x26c/0x2d3                                          
[   24.758635]  [<ffffffff81412016>] sch_direct_xmit+0x5a/0x157                                               
[   24.761204]  [<ffffffff814121cf>] __qdisc_run+0xbc/0xdd                                                    
[   24.763693]  [<ffffffff813fce1c>] net_tx_action+0xc2/0x120                                                 
[   24.766236]  [<ffffffff81047efe>] __do_softirq+0xd8/0x192                                                  
[   24.768754]  [<ffffffff8100cb3c>] call_softirq+0x1c/0x28                                                   
[   24.771326]  [<ffffffff8100ddb7>] do_softirq+0x33/0x6b                                                     
[   24.773793]  [<ffffffff81047d5c>] irq_exit+0x36/0x75                                                       
[   24.776241]  [<ffffffff8100d692>] do_IRQ+0xa8/0xbf                                                         
[   24.778705]  [<ffffffff8100c3d3>] ret_from_intr+0x0/0xa                                                    
[   24.781191]  <EOI>  [<ffffffff81011de3>] ? default_idle+0x31/0x46                                          
[   24.783961]  [<ffffffff81011dc5>] ? default_idle+0x13/0x46                                                 
[   24.786487]  [<ffffffff8100ae53>] ? cpu_idle+0x55/0x8d                                                     
[   24.788967]  [<ffffffff814d1982>] ? rest_init+0x66/0x68                                                    
[   24.791448]  [<ffffffff818adc5d>] ? start_kernel+0x360/0x36b                                               
[   24.794014]  [<ffffffff818ad29a>] ? x86_64_start_reservations+0xaa/0xae                                    
[   24.796747]  [<ffffffff818ad37f>] ? x86_64_start_kernel+0xe1/0xe8       



^ permalink raw reply

* Re: [PATCH next-next-2.6] netdev: better dev_name_hash
From: Stephen Hemminger @ 2009-10-26 17:45 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Octavian Purdila, Eric Dumazet, Krishna Kumar2,
	Hagen Paul Pfeifer, netdev
In-Reply-To: <20091026095516.02f1cb49@nehalam>

Another algorithm that scores well in my tests.

http://isthe.com/chongo/tech/comp/fnv/

Algorithm             Time       Ratio       Max   StdDev
string10             1.433267       1.00     39064   0.01
string_hash17        1.461422       1.00     39497   1.50
fnv1a                1.472216       1.00     39895   2.25
jhash_string         1.482494       1.00     39669   1.04


static unsigned int fnv32(const unsigned char *key, unsigned int len)
{
	uint32_t hval = 2166136261;
	unsigned int i;

	for (i = 0; i < len; i++) {
		hval ^= key[i];
		/* optimized multiply by 0x01000193 */
		hval += (hval<<1) + (hval<<4) + (hval<<7)
			+ (hval<<8) + (hval<<24);
	}

	return hval;
}

^ permalink raw reply

* Re: [PATCH 0/5] Candidate fix for increased number of GFP_ATOMIC failures V2
From: Tobias Oetiker @ 2009-10-26 17:37 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Frans Pop, Jiri Kosina, Sven Geggus, Karol Lewandowski,
	Rafael J. Wysocki, David Miller, Reinette Chatre, Kalle Valo,
	David Rientjes, KOSAKI Motohiro, Mohamed Abbas, Jens Axboe,
	John W. Linville, Pekka Enberg, Bartlomiej Zolnierkiewicz,
	Greg Kroah-Hartman, Stephan von Krawczynski, Kernel Testers List,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org\"
In-Reply-To: <1256221356-26049-1-git-send-email-mel-wPRd99KPJ+uzQB+pC5nmwQ@public.gmane.org>

Hi Mel,

I have no done additional tests ... and can report the following

Thursday Mel Gorman wrote:

>   1/5 page allocator: Always wake kswapd when restarting an allocation attempt after direct reclaim failed
>   2/5 page allocator: Do not allow interrupts to use ALLOC_HARDER
>
>
> 	These patches correct problems introduced by me during the 2.6.31-rc1
> 	merge window. The patches were not meant to introduce any functional
> 	changes but two were missed.
>
> 	If your problem goes away with just these two patches applied,
> 	please tell me.

1+2 do not help

> Test 3: If you are getting allocation failures, try with the following patch
>
>   3/5 vmscan: Force kswapd to take notice faster when high-order watermarks are being hit
>
> 	This is a functional change that causes kswapd to notice sooner
> 	when high-order watermarks have been hit. There have been a number
> 	of changes in page reclaim since 2.6.30 that might have delayed
> 	when kswapd kicks in for higher orders
>
> 	If your problem goes away with these three patches applied, please
> 	tell me

1+2+3 do not help either

> Test 4: If you are still getting failures, apply the following
>   4/5 page allocator: Pre-emptively wake kswapd when high-order watermarks are hit
>
> 	This patch is very heavy handed and pre-emptively kicks kswapd when
> 	watermarks are hit. It should only be necessary if there has been
> 	significant changes in the timing and density of page allocations
> 	from an unknown source. Tobias, this patch is largely aimed at you.
> 	You reported that with patches 3+4 applied that your problems went
> 	away. I need to know if patch 3 on its own is enough or if both
> 	are required
>
> 	If your problem goes away with these four patches applied, please
> 	tell me

3 allone does not help
3+4 does ...

cheers
tobi
-- 
Tobi Oetiker, OETIKER+PARTNER AG, Aarweg 15 CH-4600 Olten, Switzerland
http://it.oetiker.ch tobi-7K0TWYW2a3pyDzI6CaY1VQ@public.gmane.org ++41 62 775 9902 / sb: -9900

^ permalink raw reply

* Re: iwl3945, after a while stops working with "No space for Tx"
From: Fredi @ 2009-10-26 17:04 UTC (permalink / raw)
  To: reinette chatre
  Cc: YiZhu, Intel Linux Wireless, John W. Linville, TomasWinkler,
	AbhijeetKolekar, linux-wireless@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <1256574352.21134.7591.camel@rc-desk>

Hi
first thanks for your reply,
--- On Mon, 26/10/09, reinette chatre <reinette.chatre@intel.com> wrote:

> From: reinette chatre <reinette.chatre@intel.com>
> Subject: Re: iwl3945, after a while stops working with "No space for Tx"
> To: "Frederik Nosi" <fredin77@yahoo.com>
> Cc: "Zhu, Yi" <yi.zhu@intel.com>, "Intel Linux Wireless" <ilw@linux.intel.com>, "John W. Linville" <linville@tuxdriver.com>, "Winkler, Tomas" <tomas.winkler@intel.com>, "Kolekar, Abhijeet" <abhijeet.kolekar@intel.com>, "linux-wireless@vger.kernel.org" <linux-wireless@vger.kernel.org>, "netdev@vger.kernel.org" <netdev@vger.kernel.org>, "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
> Date: Monday, 26 October, 2009, 5:25 PM
> On Sat, 2009-10-24 at 06:44 -0700,
> Frederik Nosi wrote:
> > Hi,
> > first sorry if somebody is not the right contact, got
> the adressess from ./scripts/get_maintainer.pl -f
> drivers/net/wireless/iwlwifi.
> > From some kernel versions now, after some time that im
> using this card it stops working and on messages i get this
> errors:
> > 
> > Oct 24 14:38:24 kotys NetworkManager:
> <info>  Activation (wlan0) Stage 5 of 5 (IP
> Configure Commit) complete.
> > [snip pulseaudio's stuppid log spam warning]
> > Oct 24 14:40:07 kotys kernel: iwl3945 0000:04:00.0:
> Error sending REPLY_TX_PWR_TABLE_CMD: time out after 500ms.
> > Oct 24 14:40:09 kotys pulseaudio[16469]: sap.c:
> sendmsg() failed: Invalid argument
> > Oct 24 14:40:10 kotys kernel: iwl3945 0000:04:00.0:
> Error sending REPLY_SCAN_CMD: time out after 500ms.
> > Oct 24 14:40:11 kotys kernel: iwl3945 0000:04:00.0:
> Error sending REPLY_TX_PWR_TABLE_CMD: time out after 500ms.
> > Oct 24 14:40:11 kotys kernel: iwl3945 0000:04:00.0:
> Error sending POWER_TABLE_CMD: time out after 500ms.
> > Oct 24 14:40:14 kotys kernel: iwl3945 0000:04:00.0:
> set power fail, ret = -110
> > Oct 24 14:40:14 kotys kernel: No probe response from
> AP 00:1c:df:82:63:c9 after 500ms, disconnecting.
> > Oct 24 14:40:14 kotys NetworkManager:
> <info>  (wlan0): supplicant connection
> state:  completed -> disconnected
> > Oct 24 14:40:14 kotys NetworkManager:
> <info>  (wlan0): supplicant connection
> state:  disconnected -> scanning
> > Oct 24 14:40:14 kotys pulseaudio[16469]: sap.c:
> sendmsg() failed: Invalid argument
> > Oct 24 14:40:14 kotys kernel: iwl3945 0000:04:00.0:
> Error sending REPLY_RXON: time out after 500ms.
> > Oct 24 14:40:14 kotys kernel: iwl3945 0000:04:00.0:
> Error setting new configuration (-110).
> > Oct 24 14:40:15 kotys kernel: iwl3945 0000:04:00.0:
> Error sending REPLY_SCAN_CMD: time out after 500ms.
> > Oct 24 14:40:15 kotys kernel: wlan0: direct probe to
> AP 00:1c:df:82:63:c9 (try 1)
> > Oct 24 14:40:15 kotys NetworkManager:
> <info>  (wlan0): supplicant connection
> state:  scanning -> associating
> > Oct 24 14:40:15 kotys kernel: wlan0: direct probe to
> AP 00:1c:df:82:63:c9 (try 2)
> > Oct 24 14:40:15 kotys kernel: wlan0: direct probe to
> AP 00:1c:df:82:63:c9 (try 3)
> > Oct 24 14:40:15 kotys kernel: iwl3945 0000:04:00.0:
> Error sending REPLY_RXON: time out after 500ms.
> > Oct 24 14:40:15 kotys kernel: iwl3945 0000:04:00.0:
> Error setting new configuration (-110).
> > Oct 24 14:40:15 kotys kernel: wlan0: direct probe to
> AP 00:1c:df:82:63:c9 timed out
> > Oct 24 14:40:16 kotys kernel: iwl3945 0000:04:00.0:
> Error sending REPLY_TX_PWR_TABLE_CMD: time out after 500ms.
> > Oct 24 14:40:16 kotys kernel: iwl3945 0000:04:00.0:
> Error sending REPLY_RXON: time out after 500ms.
> > Oct 24 14:40:16 kotys kernel: iwl3945 0000:04:00.0:
> Error setting new configuration (-110).
> > 
> > 
> > When this happens i am able to use the card only after
> reloading the related modules, iwl3945 ecc. But the problem
> happens again after some minutes that im connected.
> > 
> > Kernel is 2.6.32-rc5 but his started happening around
> 2.6.31, not sure exactly what version as i update kernel
> often. Firmware is iwl3945-ucode-15.32.2.9.
> > 
> > Every other info that you need just ask. I hope i dont
> have to bisect as this is the laptop i use for work too, but
> if it's needed i'll do.
> > 
> > I'm not subscribed on any ML, so in case please cc
> me.
> > 
> 
> We currently have a bug open for this issue. Could you
> please add this
>
> information to
> http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=1944
> ?

Done, noticed that there's a patch for this issue. Will try and tell the results. And noticed the link on how to report possible firmware problems, will follow that procedure and post the results of that too.
 
> Thank you

Thaks for pointing me on the right direction!

> Reinette
> 
> 

Frederik


      

^ permalink raw reply

* Re: [PATCH v3 1/7] Only parse time stamp TCP option in time wait sock
From: William Allen Simpson @ 2009-10-26 16:56 UTC (permalink / raw)
  To: Gilad Ben-Yossef; +Cc: netdev, ori, Yony Amit
In-Reply-To: <1256544393-12450-2-git-send-email-gilad@codefidence.com>

Gilad Ben-Yossef wrote:
> Since we only use tcp_parse_options here to check for the exietence
> of TCP timestamp option in the header, it is better to call with
> the "established" flag on.
> 
Please explain how this patch is required for the other patches?

And more importantly, why it is better to call with established on?

And most importantly, what end cases you considered, and how this
interacts with the proposed rfc1323bis changes, especially on reset?

^ permalink raw reply

* Re: [PATCH next-next-2.6] netdev: better dev_name_hash
From: Stephen Hemminger @ 2009-10-26 16:55 UTC (permalink / raw)
  To: Octavian Purdila; +Cc: Eric Dumazet, Krishna Kumar2, Hagen Paul Pfeifer, netdev
In-Reply-To: <200910261752.51784.opurdila@ixiacom.com>

Added more algorithms to test...

Time is in seconds for 10000000 entries with hashbits = 8
Ratio is number of probes / ideal hash probes

Result sorted by distribution:

Algorithm             Time       Ratio       Max   StdDev
string10             1.434087       1.00     39064   0.01
SuperFastHash        1.469511       1.00     40497   2.17
string_hash17        1.472544       1.00     39497   1.50
jhash_string         1.501508       1.00     39669   1.04
crc                  2.826795       1.00     39088   0.07
md5_string           3.608253       1.00     39605   0.98
djb2                 1.462722       1.15     60681  76.16
string_hash31        1.457253       1.21     64950  91.12
sdbm                 1.566174       2.38    129900 232.22
pjw                  1.527306       2.45     99990 237.86
elf                  1.576096       2.45     99990 237.86
kr_hash              1.400072       7.80    468451 515.52
fletcher             1.449671       7.80    468451 515.52
full_name_hash       1.487707      13.09    562501 687.24
xor                  1.400403      13.36    583189 694.98
lastchar             1.348798      25.60   1000000 980.27

Another run sorted by speed:
Algorithm             Time       Ratio       Max   StdDev
lastchar             1.338545      25.60   1000000 980.27
kr_hash              1.398453       7.80    468451 515.52
xor                  1.398843      13.36    583189 694.98
string10             1.432756       1.00     39064   0.01
fletcher             1.448499       7.80    468451 515.52
string_hash31        1.457524       1.21     64950  91.12
string_hash17        1.462548       1.00     39497   1.50
djb2                 1.462956       1.15     60681  76.16
SuperFastHash        1.469907       1.00     40497   2.17
full_name_hash       1.486465      13.09    562501 687.24
jhash_string         1.500959       1.00     39669   1.04
pjw                  1.526097       2.45     99990 237.86
sdbm                 1.566533       2.38    129900 232.22
elf                  1.576470       2.45     99990 237.86
crc                  2.811210       1.00     39088   0.07
md5_string           3.604675       1.00     39605   0.98


^ permalink raw reply

* Re: [PATCH]NET/KS8695: add support NAPI for Rx
From: Ben Hutchings @ 2009-10-26 16:49 UTC (permalink / raw)
  To: Figo.zhang
  Cc: Daniel Silverstone, David S. Miller, netdev, Vincent Sanders,
	Ben Dooks
In-Reply-To: <1256572828.2148.5.camel@myhost>

On Tue, 2009-10-27 at 00:00 +0800, Figo.zhang wrote:
> Add support NAPI Rx API for KS8695NET driver.
> 
> Signed-off-by: Figo.zhang <figo1802@gmail.com>
> --- 
> drivers/net/arm/ks8695net.c |  165 +++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 165 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/net/arm/ks8695net.c b/drivers/net/arm/ks8695net.c
> index 2a7b774..7f9d4bd 100644
> --- a/drivers/net/arm/ks8695net.c
> +++ b/drivers/net/arm/ks8695net.c
> @@ -35,12 +35,16 @@
>  
>  #include <mach/regs-switch.h>
>  #include <mach/regs-misc.h>
> +#include <asm/mach/irq.h>
> +#include <mach/regs-irq.h>
> 
>  #include "ks8695net.h"
>  
>  #define MODULENAME	"ks8695_ether"
>  #define MODULEVERSION	"1.01"

I think this merits a version bump.
 
> +#define KS8695NET_NAPI  1
> +
>  /*
>   * Transmit and device reset timeout, default 5 seconds.
>   */
> @@ -152,6 +156,10 @@ struct ks8695_priv {
>  	enum ks8695_dtype dtype;
>  	void __iomem *io_regs;
>  
> +	#ifdef KS8695NET_NAPI
> +	struct napi_struct	napi;
> +	#endif
> +

NAPI is well-established and there should be no need to make it
optional.  So far as I'm aware, all other drivers that had it as an
option now use it unconditionally.

>  	const char *rx_irq_name, *tx_irq_name, *link_irq_name;
>  	int rx_irq, tx_irq, link_irq;
>  
> @@ -172,6 +180,7 @@ struct ks8695_priv {
>  	dma_addr_t rx_ring_dma;
>  	struct ks8695_skbuff rx_buffers[MAX_RX_DESC];
>  	int next_rx_desc_read;
> +	spinlock_t rx_lock;
>  
>  	int msg_enable;
>  };
> @@ -391,6 +400,155 @@ ks8695_tx_irq(int irq, void *dev_id)
>  	return IRQ_HANDLED;
>  }
>  
> +#ifdef KS8695NET_NAPI
> +static irqreturn_t
> +ks8695_rx_irq(int irq, void *dev_id)
> +{
> +	struct net_device *ndev = (struct net_device *)dev_id;
> +	struct ks8695_priv *ksp = netdev_priv(ndev);
> +	unsigned long status;
> +
> +	unsigned long mask_bit = 1 << ksp->rx_irq;
> +
> +	spin_lock(&ksp->rx_lock);
> +
> +	status = __raw_readl(KS8695_IRQ_VA + KS8695_INTST);
> +
> +	/*clean rx status bit*/
> +	__raw_writel(status | mask_bit , KS8695_IRQ_VA + KS8695_INTST);
> +
> +	if (status & mask_bit) {
> +		if (napi_schedule_prep(&ksp->napi)) {
> +			/*disable rx interrupt*/
> +			status &= ~mask_bit;
> +			__raw_writel(status , KS8695_IRQ_VA + KS8695_INTEN);
> +			__napi_schedule(&ksp->napi);
> +		}
> +	}
> +
> +	spin_unlock(&ksp->rx_lock);
> +	return IRQ_HANDLED;
> +}

The interrupt register manipulation here looks wrong, but I don't have
specific knowledge of this platform.

Since the interrupt control registers appear to be shared with other
devices, this needs to be serialised with manipulation by other drivers.

> +static int ks8695_rx(struct net_device *ndev, int budget)
> +{
> +	struct ks8695_priv *ksp = netdev_priv(ndev);
> +	struct sk_buff *skb;
> +	int buff_n;
> +	u32 flags;
> +	int pktlen;
> +	int last_rx_processed = -1;
> +	int received = 0;
> +
> +	buff_n = ksp->next_rx_desc_read;
> +	while (netif_running(ndev) && received < budget

netif_running() is quite redundant here.

> +			&& ksp->rx_buffers[buff_n].skb
> +			&& (!(ksp->rx_ring[buff_n].status &
> +					cpu_to_le32(RDES_OWN)))) {
> +			rmb();
> +			flags = le32_to_cpu(ksp->rx_ring[buff_n].status);
> +			/* Found an SKB which we own, this means we
> +			 * received a packet
> +			 */
> +			if ((flags & (RDES_FS | RDES_LS)) !=
> +			    (RDES_FS | RDES_LS)) {
> +				/* This packet is not the first and
> +				 * the last segment.  Therefore it is
> +				 * a "spanning" packet and we can't
> +				 * handle it
> +				 */
> +				goto rx_failure;
> +			}
> +
> +			if (flags & (RDES_ES | RDES_RE)) {
> +				/* It's an error packet */
> +				ndev->stats.rx_errors++;
> +				if (flags & RDES_TL)
> +					ndev->stats.rx_length_errors++;
> +				if (flags & RDES_RF)
> +					ndev->stats.rx_length_errors++;
> +				if (flags & RDES_CE)
> +					ndev->stats.rx_crc_errors++;
> +				if (flags & RDES_RE)
> +					ndev->stats.rx_missed_errors++;
> +
> +				goto rx_failure;
> +			}
> +
> +			pktlen = flags & RDES_FLEN;
> +			pktlen -= 4; /* Drop the CRC */
> +
> +			/* Retrieve the sk_buff */
> +			skb = ksp->rx_buffers[buff_n].skb;
> +
> +			/* Clear it from the ring */
> +			ksp->rx_buffers[buff_n].skb = NULL;
> +			ksp->rx_ring[buff_n].data_ptr = 0;
> +
> +			/* Unmap the SKB */
> +			dma_unmap_single(ksp->dev,
> +					 ksp->rx_buffers[buff_n].dma_ptr,
> +					 ksp->rx_buffers[buff_n].length,
> +					 DMA_FROM_DEVICE);
> +
> +			/* Relinquish the SKB to the network layer */
> +			skb_put(skb, pktlen);
> +			skb->protocol = eth_type_trans(skb, ndev);
> +			netif_receive_skb(skb);
> +
> +			/* Record stats */
> +			ndev->stats.rx_packets++;
> +			ndev->stats.rx_bytes += pktlen;
> +			goto rx_finished;
> +
> +rx_failure:
> +			/* This ring entry is an error, but we can
> +			 * re-use the skb
> +			 */
> +			/* Give the ring entry back to the hardware */
> +			ksp->rx_ring[buff_n].status = cpu_to_le32(RDES_OWN);
> +rx_finished:
> +			received++;
> +			/* And note this as processed so we can start
> +			 * from here next time
> +			 */
> +			last_rx_processed = buff_n;
> +			buff_n = (buff_n + 1) & MAX_RX_DESC_MASK;
> +			/*And note which RX descriptor we last did */
> +			if (likely(last_rx_processed != -1))
> +				ksp->next_rx_desc_read =
> +					(last_rx_processed + 1) &
> +					MAX_RX_DESC_MASK;
> +
> +			/* And refill the buffers */
> +			ks8695_refill_rxbuffers(ksp);
> +	}
> +	return received;
> +}
> +
> +static int ks8695_poll(struct napi_struct *napi, int budget)
> +{
> +	struct ks8695_priv *ksp = container_of(napi, struct ks8695_priv, napi);
> +	struct net_device *dev = ksp->ndev;
> +	unsigned long mask_bit = 1 << ksp->rx_irq;
> +	unsigned long isr = __raw_readl(KS8695_IRQ_VA + KS8695_INTEN);

This is surely the wrong place to be reading this register.

> +	unsigned long  work_done = 0;

Pointless initialisation.

> +
> +	work_done = ks8695_rx(dev, budget);
> +
> +	if (work_done < budget) {
> +		unsigned long flags;
> +		spin_lock_irqsave(&ksp->rx_lock, flags);
> +		/*enable rx interrupt*/
> +		__raw_writel(isr | mask_bit, KS8695_IRQ_VA + KS8695_INTEN);
> +		__napi_complete(napi);
> +		spin_unlock_irqrestore(&ksp->rx_lock, flags);
> +	}
> +	return work_done;
> +}
> +
> +#else
>  /**
>   *	ks8695_rx_irq - Receive IRQ handler
>   *	@irq: The IRQ which went off (ignored)
> @@ -503,6 +661,8 @@ rx_finished:
>  	return IRQ_HANDLED;
>  }
>  
> +#endif
> +
>  /**
>   *	ks8695_link_irq - Link change IRQ handler
>   *	@irq: The IRQ which went off (ignored)
> @@ -1472,6 +1632,10 @@ ks8695_probe(struct platform_device *pdev)
>  	SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops);
>  	ndev->watchdog_timeo	 = msecs_to_jiffies(watchdog);
>  
> +#ifdef KS8695NET_NAPI
> +	netif_napi_add(ndev, &ksp->napi, ks8695_poll, 64);
> +#endif
> +
>  	/* Retrieve the default MAC addr from the chip. */
>  	/* The bootloader should have left it in there for us. */
>  
> @@ -1505,6 +1669,7 @@ ks8695_probe(struct platform_device *pdev)
>  
>  	/* And initialise the queue's lock */
>  	spin_lock_init(&ksp->txq_lock);
> +	spin_lock_init(&ksp->rx_lock);
>  
>  	/* Specify the RX DMA ring buffer */
>  	ksp->rx_ring = ksp->ring_base + TX_RING_DMA_SIZE;

You're missing a netif_napi_del() on removal.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Re: [PATCH]NET/KS8695: add support NAPI for Rx
From: Daniel Silverstone @ 2009-10-26 16:27 UTC (permalink / raw)
  To: Figo.zhang; +Cc: David S. Miller, netdev, Vincent Sanders, Ben Dooks
In-Reply-To: <1256572828.2148.5.camel@myhost>

On Tue, Oct 27, 2009 at 12:00:28AM +0800, Figo.zhang wrote:
> +#ifdef KS8695NET_NAPI
> +static irqreturn_t
> +ks8695_rx_irq(int irq, void *dev_id)

This routine lacks its documentation comment.  This driver is fully documented
in order to serve as a good example for others.  Indeed this lack of
documentation comments continues through your patch, I won't bring up each
instance, instead trusting you to go back over your patch and sort them out.

> +	status = __raw_readl(KS8695_IRQ_VA + KS8695_INTST);
[snip]
> +	__raw_writel(status | mask_bit , KS8695_IRQ_VA + KS8695_INTST);
[snip]
> +			__raw_writel(status , KS8695_IRQ_VA + KS8695_INTEN);
[snip]
> +	unsigned long isr = __raw_readl(KS8695_IRQ_VA + KS8695_INTEN);
[snip]
> +		__raw_writel(isr | mask_bit, KS8695_IRQ_VA + KS8695_INTEN);

Please don't use __raw_readl or __raw_writel.  This driver was nice and clean,
don't ruin it.

Also, as an aside, you seem to add a spinlock (rx_lock) which afaict is only
used by NAPI related routines, and yet you include it regardless of NAPI being
enabled or not.  Did I misread your patch, or is this an oversight?

Regards,

Daniel.

-- 
Daniel Silverstone                              http://www.simtec.co.uk/

^ permalink raw reply

* Re: [RFC] [PATCH] udp: Don't save dst in udpv6_sendmsg()
From: Rick Jones @ 2009-10-26 16:45 UTC (permalink / raw)
  To: David Miller; +Cc: krkumar2, netdev
In-Reply-To: <20091024.064010.145864194.davem@davemloft.net>

David Miller wrote:
> From: Krishna Kumar <krkumar2@in.ibm.com>
> Date: Fri, 23 Oct 2009 16:43:36 +0530
> 
> 
>>Performance: I ran netperf UDPv6 RR to use connected sockets.
>>Tested with a 70 min run, aggregate of 5 netperf runs for
>>each result.
> 
> 
> Who actually uses connected UDP sockets? :-)

Somebody must, they sent me patches to optionally connect() the endpoints in a 
UDP_RR test :)

rick jones

trying to decide if he should by default set SO_DONTROUTE on UDP sockets to 
cover the backsides of testers who cause link-down events on devices under test 
with systems connected to their employer's site lans with the default route 
pointing at same...

^ permalink raw reply

* Re: [PATCH v3 4/7] Add the no SACK route option feature
From: William Allen Simpson @ 2009-10-26 16:38 UTC (permalink / raw)
  To: Gilad Ben-Yossef; +Cc: netdev, ori
In-Reply-To: <1256544393-12450-5-git-send-email-gilad@codefidence.com>

Gilad Ben-Yossef wrote:
> Implement querying and acting upon the no sack bit in the features
> field.
> 
> Signed-off-by: Gilad Ben-Yossef <gilad@codefidence.com>
> Sigend-off-by: Ori Finkelman <ori@comsleep.com>
> Sigend-off-by: Yony Amit <yony@comsleep.com>
> 
Please explain how this code turns SACK on when it is off globally?

As both Eric and I asked?

^ permalink raw reply

* Re: iwl3945, after a while stops working with "No space for Tx"
From: reinette chatre @ 2009-10-26 16:25 UTC (permalink / raw)
  To: Frederik Nosi
  Cc: Zhu, Yi, Intel Linux Wireless, John W. Linville, Winkler, Tomas,
	Kolekar, Abhijeet, linux-wireless@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <177253.23325.qm@web53903.mail.re2.yahoo.com>

On Sat, 2009-10-24 at 06:44 -0700, Frederik Nosi wrote:
> Hi,
> first sorry if somebody is not the right contact, got the adressess from ./scripts/get_maintainer.pl -f drivers/net/wireless/iwlwifi.
> From some kernel versions now, after some time that im using this card it stops working and on messages i get this errors:
> 
> Oct 24 14:38:24 kotys NetworkManager: <info>  Activation (wlan0) Stage 5 of 5 (IP Configure Commit) complete.
> [snip pulseaudio's stuppid log spam warning]
> Oct 24 14:40:07 kotys kernel: iwl3945 0000:04:00.0: Error sending REPLY_TX_PWR_TABLE_CMD: time out after 500ms.
> Oct 24 14:40:09 kotys pulseaudio[16469]: sap.c: sendmsg() failed: Invalid argument
> Oct 24 14:40:10 kotys kernel: iwl3945 0000:04:00.0: Error sending REPLY_SCAN_CMD: time out after 500ms.
> Oct 24 14:40:11 kotys kernel: iwl3945 0000:04:00.0: Error sending REPLY_TX_PWR_TABLE_CMD: time out after 500ms.
> Oct 24 14:40:11 kotys kernel: iwl3945 0000:04:00.0: Error sending POWER_TABLE_CMD: time out after 500ms.
> Oct 24 14:40:14 kotys kernel: iwl3945 0000:04:00.0: set power fail, ret = -110
> Oct 24 14:40:14 kotys kernel: No probe response from AP 00:1c:df:82:63:c9 after 500ms, disconnecting.
> Oct 24 14:40:14 kotys NetworkManager: <info>  (wlan0): supplicant connection state:  completed -> disconnected
> Oct 24 14:40:14 kotys NetworkManager: <info>  (wlan0): supplicant connection state:  disconnected -> scanning
> Oct 24 14:40:14 kotys pulseaudio[16469]: sap.c: sendmsg() failed: Invalid argument
> Oct 24 14:40:14 kotys kernel: iwl3945 0000:04:00.0: Error sending REPLY_RXON: time out after 500ms.
> Oct 24 14:40:14 kotys kernel: iwl3945 0000:04:00.0: Error setting new configuration (-110).
> Oct 24 14:40:15 kotys kernel: iwl3945 0000:04:00.0: Error sending REPLY_SCAN_CMD: time out after 500ms.
> Oct 24 14:40:15 kotys kernel: wlan0: direct probe to AP 00:1c:df:82:63:c9 (try 1)
> Oct 24 14:40:15 kotys NetworkManager: <info>  (wlan0): supplicant connection state:  scanning -> associating
> Oct 24 14:40:15 kotys kernel: wlan0: direct probe to AP 00:1c:df:82:63:c9 (try 2)
> Oct 24 14:40:15 kotys kernel: wlan0: direct probe to AP 00:1c:df:82:63:c9 (try 3)
> Oct 24 14:40:15 kotys kernel: iwl3945 0000:04:00.0: Error sending REPLY_RXON: time out after 500ms.
> Oct 24 14:40:15 kotys kernel: iwl3945 0000:04:00.0: Error setting new configuration (-110).
> Oct 24 14:40:15 kotys kernel: wlan0: direct probe to AP 00:1c:df:82:63:c9 timed out
> Oct 24 14:40:16 kotys kernel: iwl3945 0000:04:00.0: Error sending REPLY_TX_PWR_TABLE_CMD: time out after 500ms.
> Oct 24 14:40:16 kotys kernel: iwl3945 0000:04:00.0: Error sending REPLY_RXON: time out after 500ms.
> Oct 24 14:40:16 kotys kernel: iwl3945 0000:04:00.0: Error setting new configuration (-110).
> 
> 
> When this happens i am able to use the card only after reloading the related modules, iwl3945 ecc. But the problem happens again after some minutes that im connected.
> 
> Kernel is 2.6.32-rc5 but his started happening around 2.6.31, not sure exactly what version as i update kernel often. Firmware is iwl3945-ucode-15.32.2.9.
> 
> Every other info that you need just ask. I hope i dont have to bisect as this is the laptop i use for work too, but if it's needed i'll do.
> 
> I'm not subscribed on any ML, so in case please cc me.
> 

We currently have a bug open for this issue. Could you please add this
information to
http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=1944 ?

Thank you

Reinette




^ permalink raw reply

* [PATCH] vlan: allow VLAN ID 0 to be used
From: Eric Dumazet @ 2009-10-26 16:13 UTC (permalink / raw)
  Cc: Benny Amorsen, Gertjan Hofman, Matt Carlson,
	netdev@vger.kernel.org, Patrick McHardy, David S. Miller
In-Reply-To: <4AE563C7.5070702@gmail.com>

Eric Dumazet a écrit :
> VLAN id 0 is not usable on current kernel because we use 16 bits in skb to
>  store vlan_tci, and vlan_tci = 0 means there is no VLAN tagging.
> 
> 
> We could use high order bit (0x8000) to tell if vlan tagging is set or not.
> 

Here is the patch I cooked that permitted VLAN 0 to be used with tg3
(and other HW accelerated vlan nics I suppose)

[PATCH] vlan: allow VLAN ID 0 to be used

We currently use a 16 bit field (vlan_tci) to store VLAN ID on a skb.

0 value is used a special value, meaning VLAN ID not set.
This forbids use of VLAN ID 0

As VLAN ID is 12 bits, we can use high order bit as a flag, and
allow VLAN ID 0

Reported-by: Gertjan Hofman <gertjan_hofman@yahoo.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 7ff9af1..7dfcdb5 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -105,8 +105,9 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
 	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
 }
 
-#define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci)
-#define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci)
+#define VLAN_TAG_PRESENT		0x8000
+#define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
+#define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & 0x7fff)
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
@@ -231,7 +232,7 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci)
 static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb,
 						     u16 vlan_tci)
 {
-	skb->vlan_tci = vlan_tci;
+	skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci;
 	return skb;
 }
 
@@ -284,7 +285,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
 					 u16 *vlan_tci)
 {
 	if (vlan_tx_tag_present(skb)) {
-		*vlan_tci = skb->vlan_tci;
+		*vlan_tci = vlan_tx_tag_get(skb);
 		return 0;
 	} else {
 		*vlan_tci = 0;

^ permalink raw reply related

* [PATCH]NET/KS8695: add support NAPI for Rx
From: Figo.zhang @ 2009-10-26 16:00 UTC (permalink / raw)
  To: Daniel Silverstone, David S. Miller; +Cc: netdev, Vincent Sanders, Ben Dooks


Add support NAPI Rx API for KS8695NET driver.

Signed-off-by: Figo.zhang <figo1802@gmail.com>
--- 
drivers/net/arm/ks8695net.c |  165 +++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 165 insertions(+), 0 deletions(-)

diff --git a/drivers/net/arm/ks8695net.c b/drivers/net/arm/ks8695net.c
index 2a7b774..7f9d4bd 100644
--- a/drivers/net/arm/ks8695net.c
+++ b/drivers/net/arm/ks8695net.c
@@ -35,12 +35,16 @@
 
 #include <mach/regs-switch.h>
 #include <mach/regs-misc.h>
+#include <asm/mach/irq.h>
+#include <mach/regs-irq.h>
 
 #include "ks8695net.h"
 
 #define MODULENAME	"ks8695_ether"
 #define MODULEVERSION	"1.01"
 
+#define KS8695NET_NAPI  1
+
 /*
  * Transmit and device reset timeout, default 5 seconds.
  */
@@ -152,6 +156,10 @@ struct ks8695_priv {
 	enum ks8695_dtype dtype;
 	void __iomem *io_regs;
 
+	#ifdef KS8695NET_NAPI
+	struct napi_struct	napi;
+	#endif
+
 	const char *rx_irq_name, *tx_irq_name, *link_irq_name;
 	int rx_irq, tx_irq, link_irq;
 
@@ -172,6 +180,7 @@ struct ks8695_priv {
 	dma_addr_t rx_ring_dma;
 	struct ks8695_skbuff rx_buffers[MAX_RX_DESC];
 	int next_rx_desc_read;
+	spinlock_t rx_lock;
 
 	int msg_enable;
 };
@@ -391,6 +400,155 @@ ks8695_tx_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+#ifdef KS8695NET_NAPI
+static irqreturn_t
+ks8695_rx_irq(int irq, void *dev_id)
+{
+	struct net_device *ndev = (struct net_device *)dev_id;
+	struct ks8695_priv *ksp = netdev_priv(ndev);
+	unsigned long status;
+
+	unsigned long mask_bit = 1 << ksp->rx_irq;
+
+	spin_lock(&ksp->rx_lock);
+
+	status = __raw_readl(KS8695_IRQ_VA + KS8695_INTST);
+
+	/*clean rx status bit*/
+	__raw_writel(status | mask_bit , KS8695_IRQ_VA + KS8695_INTST);
+
+	if (status & mask_bit) {
+		if (napi_schedule_prep(&ksp->napi)) {
+			/*disable rx interrupt*/
+			status &= ~mask_bit;
+			__raw_writel(status , KS8695_IRQ_VA + KS8695_INTEN);
+			__napi_schedule(&ksp->napi);
+		}
+	}
+
+	spin_unlock(&ksp->rx_lock);
+	return IRQ_HANDLED;
+}
+
+static int ks8695_rx(struct net_device *ndev, int budget)
+{
+	struct ks8695_priv *ksp = netdev_priv(ndev);
+	struct sk_buff *skb;
+	int buff_n;
+	u32 flags;
+	int pktlen;
+	int last_rx_processed = -1;
+	int received = 0;
+
+	buff_n = ksp->next_rx_desc_read;
+	while (netif_running(ndev) && received < budget
+			&& ksp->rx_buffers[buff_n].skb
+			&& (!(ksp->rx_ring[buff_n].status &
+					cpu_to_le32(RDES_OWN)))) {
+			rmb();
+			flags = le32_to_cpu(ksp->rx_ring[buff_n].status);
+			/* Found an SKB which we own, this means we
+			 * received a packet
+			 */
+			if ((flags & (RDES_FS | RDES_LS)) !=
+			    (RDES_FS | RDES_LS)) {
+				/* This packet is not the first and
+				 * the last segment.  Therefore it is
+				 * a "spanning" packet and we can't
+				 * handle it
+				 */
+				goto rx_failure;
+			}
+
+			if (flags & (RDES_ES | RDES_RE)) {
+				/* It's an error packet */
+				ndev->stats.rx_errors++;
+				if (flags & RDES_TL)
+					ndev->stats.rx_length_errors++;
+				if (flags & RDES_RF)
+					ndev->stats.rx_length_errors++;
+				if (flags & RDES_CE)
+					ndev->stats.rx_crc_errors++;
+				if (flags & RDES_RE)
+					ndev->stats.rx_missed_errors++;
+
+				goto rx_failure;
+			}
+
+			pktlen = flags & RDES_FLEN;
+			pktlen -= 4; /* Drop the CRC */
+
+			/* Retrieve the sk_buff */
+			skb = ksp->rx_buffers[buff_n].skb;
+
+			/* Clear it from the ring */
+			ksp->rx_buffers[buff_n].skb = NULL;
+			ksp->rx_ring[buff_n].data_ptr = 0;
+
+			/* Unmap the SKB */
+			dma_unmap_single(ksp->dev,
+					 ksp->rx_buffers[buff_n].dma_ptr,
+					 ksp->rx_buffers[buff_n].length,
+					 DMA_FROM_DEVICE);
+
+			/* Relinquish the SKB to the network layer */
+			skb_put(skb, pktlen);
+			skb->protocol = eth_type_trans(skb, ndev);
+			netif_receive_skb(skb);
+
+			/* Record stats */
+			ndev->stats.rx_packets++;
+			ndev->stats.rx_bytes += pktlen;
+			goto rx_finished;
+
+rx_failure:
+			/* This ring entry is an error, but we can
+			 * re-use the skb
+			 */
+			/* Give the ring entry back to the hardware */
+			ksp->rx_ring[buff_n].status = cpu_to_le32(RDES_OWN);
+rx_finished:
+			received++;
+			/* And note this as processed so we can start
+			 * from here next time
+			 */
+			last_rx_processed = buff_n;
+			buff_n = (buff_n + 1) & MAX_RX_DESC_MASK;
+			/*And note which RX descriptor we last did */
+			if (likely(last_rx_processed != -1))
+				ksp->next_rx_desc_read =
+					(last_rx_processed + 1) &
+					MAX_RX_DESC_MASK;
+
+			/* And refill the buffers */
+			ks8695_refill_rxbuffers(ksp);
+	}
+	return received;
+}
+
+static int ks8695_poll(struct napi_struct *napi, int budget)
+{
+	struct ks8695_priv *ksp = container_of(napi, struct ks8695_priv, napi);
+	struct net_device *dev = ksp->ndev;
+	unsigned long mask_bit = 1 << ksp->rx_irq;
+	unsigned long isr = __raw_readl(KS8695_IRQ_VA + KS8695_INTEN);
+
+	unsigned long  work_done = 0;
+
+	work_done = ks8695_rx(dev, budget);
+
+	if (work_done < budget) {
+		unsigned long flags;
+		spin_lock_irqsave(&ksp->rx_lock, flags);
+		/*enable rx interrupt*/
+		__raw_writel(isr | mask_bit, KS8695_IRQ_VA + KS8695_INTEN);
+		__napi_complete(napi);
+		spin_unlock_irqrestore(&ksp->rx_lock, flags);
+	}
+	return work_done;
+}
+
+#else
 /**
  *	ks8695_rx_irq - Receive IRQ handler
  *	@irq: The IRQ which went off (ignored)
@@ -503,6 +661,8 @@ rx_finished:
 	return IRQ_HANDLED;
 }
 
+#endif
+
 /**
  *	ks8695_link_irq - Link change IRQ handler
  *	@irq: The IRQ which went off (ignored)
@@ -1472,6 +1632,10 @@ ks8695_probe(struct platform_device *pdev)
 	SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops);
 	ndev->watchdog_timeo	 = msecs_to_jiffies(watchdog);
 
+#ifdef KS8695NET_NAPI
+	netif_napi_add(ndev, &ksp->napi, ks8695_poll, 64);
+#endif
+
 	/* Retrieve the default MAC addr from the chip. */
 	/* The bootloader should have left it in there for us. */
 
@@ -1505,6 +1669,7 @@ ks8695_probe(struct platform_device *pdev)
 
 	/* And initialise the queue's lock */
 	spin_lock_init(&ksp->txq_lock);
+	spin_lock_init(&ksp->rx_lock);
 
 	/* Specify the RX DMA ring buffer */
 	ksp->rx_ring = ksp->ring_base + TX_RING_DMA_SIZE;



^ permalink raw reply related

* Re: [PATCH next-next-2.6] netdev: better dev_name_hash
From: Octavian Purdila @ 2009-10-26 15:52 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Krishna Kumar2, Hagen Paul Pfeifer, netdev
In-Reply-To: <4AE5B84E.8040505@gmail.com>

On Monday 26 October 2009 16:55:10 you wrote:
> 
> This is because you chose a 65536 slots hash table, to store 16000 elements
> 
> The ideal function should be :
> 
> $ ./dev_name_hash ixunc 16000 5 16
> score 16000
> 
> unsigned int dev_name_hash_new10bis(const char *name)
> {
> 	unsigned hash = 0;
> 	int len = strnlen(name, IFNAMSIZ);
> 	int i;
> 
> 	for (i = 0; i < len; ++i)
> 		hash = 10 * hash + (name[i] - '0');
> 	return hash;
> }
> 

Eric, thanks a lot for your help. This is turning into a very instructive 
thread for me :)

10bis performs better for ixunc but interestingly performs worse for ixint 
now. I also get mixed results for the two when using other names like ppp or 
gtp.

2 - new10, 3 - new10bis

score 49852
$ ./dev_name_hash ixint 32000 3 14
score 53194
$ ./dev_name_hash ixunc 32000 2 14
score 55232
$ ./dev_name_hash ixunc 32000 3 14
score 48168

> But should we really care ?

I'm just testing various common cases we use here ({ixint,ixunc,gtp,ppp,gre} 
{1000,16000,32000,128000} {14,16}). 

Ideally we want a hash function that performs better in all cases  - but I 
understand that it may not be possible.

I will play more with it and see if I can come up with something better, but 
in any case the new{10,10bis,17,31} performs much better than full_name_hash 
and most of the time better that jhash .

^ permalink raw reply

* Re: [PATCH v2 8/8] Document future removal of sysctl_tcp_* options
From: Gilad Ben-Yossef @ 2009-10-26 15:51 UTC (permalink / raw)
  To: Bill Fink; +Cc: Eric Dumazet, William Allen Simpson, netdev, Ilpo Järvinen
In-Reply-To: <20091026110855.98a19f7a.billfink@mindspring.com>

Bill Fink wrote:

>
>> OK. It really sounds like we should go with my first suggestion: global 
>> sysctl based kill switches, just as we have now and in addition, the 
>> ability to kill TCP options per route. The TCP option will be used if 
>> and only if both kill switches (global and per route) are not set.
>>     
>
> This wording is confusing.  The global kill switch not being set
> really means that the sysctl is set.  And this assumes the per-route
> default is not set.  Correct?
>   
Now it is my turn to get confused, because I didn't understand your 
question :-)

What I suggest is to leave the sysctl exactly as they are now:

- You leave them be (value of 1), the respective TCP option is 
supported. This is the default.
- You turn them off (write 0), the respective TCP option is not supported.

What I suggest to *add* is the following ability:

- If you have the TCP option support turned on (default, value of one), 
you can turn support for the option for a specific route using a ip 
route option.

Hope that made it clearer.
>   
>> What we achieve is:
>>
>> 1. Global kill switches work exactly as they do now, whether you use the 
>> new per route options or not, so backwards compatible.
>>
>> 2. In addition, if the global kill switch is not in effect, you can also 
>> kill the options on a per route basis.
>>
>> I'm going to send third version of the patch to this effect, minus the 
>> new remote DoS possibility that Ilpo pointed out and leaving the global 
>> sysctl kill switches be.
>>
>> If you like it, please ACK ;-)
>>     
>
> IIUC this doesn't seem right to me.  I believe the global setting
> should be a default and the route specific an override.  Your scheme
> would mean that if I set a global option to disable timestamps, then
> I couldn't enable timestamps on specific routes using the per route
> setting.
>   
Yes. You understand my intention perfectly.

Let me try to explain why I believe this is the correct behavior to 
implement:

1. This is the closest thing to what we have now. Today you write 0 to 
the sysctl and that TCP option is turned off globally. Period.  My 
suggestion leaves this behavior as is now regardless if you've used per 
route settings. The other way make a very subtle change in the meaning 
of writing 0 to the sysctl.

I believe very subtle changes to meaning of long established interfaces 
is bad way to go. It's better to change interfaces on users, but it is 
even worse to maek something that they have long used do something just 
slightly different.

2. If the per route options needs to be "default, of or off" instead of 
"on or off", we'd need to move from 1 bit to store the option to, well 
2s bit in theory, but probably 32 bits in practice, since we can't use 
RTAX_FEATURES any longer.

Yes, we can invent RTAX_FEATURES_TWO_BITS or some such, but I'd say that 
is ugly :-)

3. I believe that the scenario of needing to set the support of a TCP 
option globally off and just turn it on for a specific route is not very 
likely to be needed and losing it is a small price to pay for 1 + 2.

> And it also doesn't seem to address Eric's scenario.  If I understand
> his concern correctly, what seems to be needed is a third global
> reset value (not calling it a setting since the actual global setting
> wouldn't be changed), which would reset any per-route override settings
> to the global default setting.
>
>   
Well, I do not believe this is what Eric meant (Eric?) but if it is then 
I fail to see why
to require from the per route TCP options switches what is not required 
of any other
route specific option already existing, since AFAIK we don't have a 
"reset to default values" to the other options already supported.

Having said all that, I have no issue with re-spinning the patch with 
your suggestion.
I don't feel all that much which is the correct way- I just want to get 
as much feedback as possible
since I'm suggesting to add a new user interface options and we all know 
it is very hard to back peddle
on those, so I'm trying to make sure to get enough feedback to do it 
right the firs time.

So any feedback from anyone regarding favorite interface? it seems each 
person fancy a different one :-)

Thanks!
Gilad

-- 
Gilad Ben-Yossef
Chief Coffee Drinker & CTO
Codefidence Ltd.

Web:   http://codefidence.com
Cell:  +972-52-8260388
Skype: gilad_codefidence
Tel:   +972-8-9316883 ext. 201
Fax:   +972-8-9316884
Email: gilad@codefidence.com

Check out our Open Source technology and training blog - http://tuxology.net

	"The biggest risk you can take it is to take no risk."
		-- Mark Zuckerberg and probably others

^ permalink raw reply

* [PATCH] can: sja1000: fix bug using library functions for skb allocation
From: Wolfgang Grandegger @ 2009-10-26 15:46 UTC (permalink / raw)
  To: Linux Netdev List; +Cc: Socketcan-core@lists.berlios.de, Kurt Van Dijck

Commit 7b6856a0 "can: provide library functions for skb allocation"
did not properly remove two lines of the SJA1000 driver resulting in
a 'skb_over_panic' when calling skb_put, as reported by Kurt.

Signed-off-by: Kurt Van Dijck <kurt.van.dijck@eia.be>
Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
---
 drivers/net/can/sja1000/sja1000.c |    2 --
 1 file changed, 2 deletions(-)

Index: net-next-2.6/drivers/net/can/sja1000/sja1000.c
===================================================================
--- net-next-2.6.orig/drivers/net/can/sja1000/sja1000.c
+++ net-next-2.6/drivers/net/can/sja1000/sja1000.c
@@ -321,8 +321,6 @@ static void sja1000_rx(struct net_device
 	if (fi & FI_RTR)
 		id |= CAN_RTR_FLAG;

-	cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame));
-	memset(cf, 0, sizeof(struct can_frame));
 	cf->can_id = id;
 	cf->can_dlc = dlc;
 	for (i = 0; i < dlc; i++)

^ permalink raw reply

* Fw: [Bug 14470] New: freez in TCP stack
From: Stephen Hemminger @ 2009-10-26 15:41 UTC (permalink / raw)
  To: netdev



Begin forwarded message:

Date: Mon, 26 Oct 2009 12:47:22 GMT
From: bugzilla-daemon@bugzilla.kernel.org
To: shemminger@linux-foundation.org
Subject: [Bug 14470] New: freez in TCP stack


http://bugzilla.kernel.org/show_bug.cgi?id=14470

           Summary: freez in TCP stack
           Product: Networking
           Version: 2.5
    Kernel Version: 2.6.31
          Platform: All
        OS/Version: Linux
              Tree: Mainline
            Status: NEW
          Severity: high
          Priority: P1
         Component: IPV4
        AssignedTo: shemminger@linux-foundation.org
        ReportedBy: kolo@albatani.cz
        Regression: No


We are hiting kernel panics on Dell R610 servers with e1000e NICs; it apears
usualy under a high network trafic ( around 100Mbit/s) but it is not a rule it
has happened even on low trafic.

Servers are used as reverse http proxy (varnish).

On 6 equal servers this panic happens aprox 2 times a day depending on network
load. Machine completly freezes till the management watchdog reboots. 


We had to put serial console on these servers to catch the oops. Is there
anything else We can do to debug this?
The RIP is always the same:

RIP: 0010:[<ffffffff814203cc>]  [<ffffffff814203cc>]
tcp_xmit_retransmit_queue+0x8c/0x290

rest of the oops always differs a litle ... here is an example:

RIP: 0010:[<ffffffff814203cc>]  [<ffffffff814203cc>]
tcp_xmit_retransmit_queue+0x8c/0x290
RSP: 0018:ffffc90000003a40  EFLAGS: 00010246
RAX: ffff8807e7420678 RBX: ffff8807e74205c0 RCX: 0000000000000000
RDX: 000000004598a105 RSI: 0000000000000000 RDI: ffff8807e74205c0
RBP: ffffc90000003a80 R08: 0000000000000003 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: ffff8807e74205c0 R14: ffff8807e7420678 R15: 0000000000000000
FS:  0000000000000000(0000) GS:ffffc90000000000(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 0000000000000000 CR3: 0000000001001000 CR4: 00000000000006f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process swapper (pid: 0, threadinfo ffffffff81608000, task ffffffff81631440)
Stack:
 ffffc90000003a60 0000000000000000 4598a105e74205c0 000000004598a101
<0> 000000000000050e ffff8807e74205c0 0000000000000003 0000000000000000
<0> ffffc90000003b40 ffffffff8141ae4a ffff8807e7420678 0000000000000000
Call Trace:
 <IRQ>
 [<ffffffff8141ae4a>] tcp_ack+0x170a/0x1dd0
 [<ffffffff8141c362>] tcp_rcv_state_process+0x122/0xab0
 [<ffffffff81422c6c>] tcp_v4_do_rcv+0xac/0x220
 [<ffffffff813fd02f>] ? nf_iterate+0x5f/0x90
 [<ffffffff81424b26>] tcp_v4_rcv+0x586/0x6b0
 [<ffffffff813fd0c5>] ? nf_hook_slow+0x65/0xf0
 [<ffffffff81406b70>] ? ip_local_deliver_finish+0x0/0x120
 [<ffffffff81406bcf>] ip_local_deliver_finish+0x5f/0x120
 [<ffffffff8140715b>] ip_local_deliver+0x3b/0x90
 [<ffffffff81406971>] ip_rcv_finish+0x141/0x340
 [<ffffffff8140701f>] ip_rcv+0x24f/0x350
 [<ffffffff813e7ced>] netif_receive_skb+0x20d/0x2f0
 [<ffffffff813e7e90>] napi_skb_finish+0x40/0x50
 [<ffffffff813e82f4>] napi_gro_receive+0x34/0x40
 [<ffffffff8133e0c8>] e1000_receive_skb+0x48/0x60
 [<ffffffff81342342>] e1000_clean_rx_irq+0xf2/0x330
 [<ffffffff813410a1>] e1000_clean+0x81/0x2a0
 [<ffffffff81054ce1>] ? ktime_get+0x11/0x50
 [<ffffffff813eaf1c>] net_rx_action+0x9c/0x130
 [<ffffffff81046940>] ? get_next_timer_interrupt+0x1d0/0x210
 [<ffffffff81041bd7>] __do_softirq+0xb7/0x160
 [<ffffffff8100c27c>] call_softirq+0x1c/0x30
 [<ffffffff8100e04d>] do_softirq+0x3d/0x80
 [<ffffffff81041b0b>] irq_exit+0x7b/0x90
 [<ffffffff8100d613>] do_IRQ+0x73/0xe0
 [<ffffffff8100bb13>] ret_from_intr+0x0/0xa
 <EOI>
 [<ffffffff81296e6c>] ? acpi_idle_enter_bm+0x245/0x271
 [<ffffffff81296e62>] ? acpi_idle_enter_bm+0x23b/0x271
 [<ffffffff813c7a08>] ? cpuidle_idle_call+0x98/0xf0
 [<ffffffff8100a104>] ? cpu_idle+0x94/0xd0
 [<ffffffff81468db6>] ? rest_init+0x66/0x70
 [<ffffffff816a082f>] ? start_kernel+0x2ef/0x340
 [<ffffffff8169fd54>] ? x86_64_start_reservations+0x84/0x90
 [<ffffffff8169fe32>] ? x86_64_start_kernel+0xd2/0x100
Code: 00 eb 28 8b 83 d0 03 00 00 41 39 44 24 40 0f 89 00 01 00 00 41 0f b6 cd
41 bd 2f 00 00 00 83 e1 03 0f 84 fc 00 00 00 4d 8b 24 24 <49> 8b 04 24 4d 39 f4
0f 18 08 0f 84 d9 00 00 00 4c 3b a3 b8 01
RIP  [<ffffffff814203cc>] tcp_xmit_retransmit_queue+0x8c/0x290
 RSP <ffffc90000003a40>
CR2: 0000000000000000
---[ end trace d97d99c9ae1d52cc ]---
Kernel panic - not syncing: Fatal exception in interrupt
Pid: 0, comm: swapper Tainted: G      D    2.6.31 #2
Call Trace:
 <IRQ>  [<ffffffff8103cab0>] panic+0xa0/0x170
 [<ffffffff8100bb13>] ? ret_from_intr+0x0/0xa
 [<ffffffff8103c74e>] ? print_oops_end_marker+0x1e/0x20
 [<ffffffff8100f38e>] oops_end+0x9e/0xb0
 [<ffffffff81025b9a>] no_context+0x15a/0x250
 [<ffffffff81025e2b>] __bad_area_nosemaphore+0xdb/0x1c0
 [<ffffffff813e89e9>] ? dev_hard_start_xmit+0x269/0x2f0
 [<ffffffff81025fae>] bad_area_nosemaphore+0xe/0x10
 [<ffffffff8102639f>] do_page_fault+0x17f/0x260
 [<ffffffff8147eadf>] page_fault+0x1f/0x30
 [<ffffffff814203cc>] ? tcp_xmit_retransmit_queue+0x8c/0x290
 [<ffffffff8141ae4a>] tcp_ack+0x170a/0x1dd0
 [<ffffffff8141c362>] tcp_rcv_state_process+0x122/0xab0
 [<ffffffff81422c6c>] tcp_v4_do_rcv+0xac/0x220
 [<ffffffff813fd02f>] ? nf_iterate+0x5f/0x90
 [<ffffffff81424b26>] tcp_v4_rcv+0x586/0x6b0
 [<ffffffff813fd0c5>] ? nf_hook_slow+0x65/0xf0
 [<ffffffff81406b70>] ? ip_local_deliver_finish+0x0/0x120
 [<ffffffff81406bcf>] ip_local_deliver_finish+0x5f/0x120
 [<ffffffff8140715b>] ip_local_deliver+0x3b/0x90
 [<ffffffff81406971>] ip_rcv_finish+0x141/0x340
 [<ffffffff8140701f>] ip_rcv+0x24f/0x350
 [<ffffffff813e7ced>] netif_receive_skb+0x20d/0x2f0
 [<ffffffff813e7e90>] napi_skb_finish+0x40/0x50
 [<ffffffff813e82f4>] napi_gro_receive+0x34/0x40
 [<ffffffff8133e0c8>] e1000_receive_skb+0x48/0x60
 [<ffffffff81342342>] e1000_clean_rx_irq+0xf2/0x330
 [<ffffffff813410a1>] e1000_clean+0x81/0x2a0
 [<ffffffff81054ce1>] ? ktime_get+0x11/0x50
 [<ffffffff813eaf1c>] net_rx_action+0x9c/0x130
 [<ffffffff81046940>] ? get_next_timer_interrupt+0x1d0/0x210
 [<ffffffff81041bd7>] __do_softirq+0xb7/0x160
 [<ffffffff8100c27c>] call_softirq+0x1c/0x30
 [<ffffffff8100e04d>] do_softirq+0x3d/0x80
 [<ffffffff81041b0b>] irq_exit+0x7b/0x90
 [<ffffffff8100d613>] do_IRQ+0x73/0xe0
 [<ffffffff8100bb13>] ret_from_intr+0x0/0xa
 <EOI>  [<ffffffff81296e6c>] ? acpi_idle_enter_bm+0x245/0x271
 [<ffffffff81296e62>] ? acpi_idle_enter_bm+0x23b/0x271
 [<ffffffff813c7a08>] ? cpuidle_idle_call+0x98/0xf0
 [<ffffffff8100a104>] ? cpu_idle+0x94/0xd0
 [<ffffffff81468db6>] ? rest_init+0x66/0x70
 [<ffffffff816a082f>] ? start_kernel+0x2ef/0x340
 [<ffffffff8169fd54>] ? x86_64_start_reservations+0x84/0x90
 [<ffffffff8169fe32>] ? x86_64_start_kernel+0xd2/0x100

-- 
Configure bugmail: http://bugzilla.kernel.org/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are the assignee for the bug.


-- 

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox