Netdev List
 help / color / mirror / Atom feed
* [PATCH 04/19] net: Do delayed neigh confirmation.
From: David Miller @ 2012-07-03  9:46 UTC (permalink / raw)
  To: netdev


When a dst_confirm() happens, mark the confirmation as pending in the
dst.  Then on the next packet out, when we have the neigh in-hand, do
the update.

This removes the dependency in dst_confirm() of dst's having an
attached neigh.

While we're here, remove the explicit 'dst' NULL check, all except 2
or 3 call sites ensure it's not NULL.  So just fix those cases up.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h       |   29 +++++++++++++++++++++--------
 include/net/neighbour.h |   15 ---------------
 net/core/dst.c          |    3 ++-
 net/ipv4/ip_output.c    |    2 +-
 net/ipv4/tcp_input.c    |   19 +++++++++++++------
 net/ipv6/ip6_output.c   |    2 +-
 6 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index f0bf3b8..84e7a3f 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -51,7 +51,7 @@ struct dst_entry {
 	int			(*input)(struct sk_buff *);
 	int			(*output)(struct sk_buff *);
 
-	int			flags;
+	unsigned short		flags;
 #define DST_HOST		0x0001
 #define DST_NOXFRM		0x0002
 #define DST_NOPOLICY		0x0004
@@ -62,6 +62,8 @@ struct dst_entry {
 #define DST_FAKE_RTABLE		0x0080
 #define DST_XFRM_TUNNEL		0x0100
 
+	unsigned short		pending_confirm;
+
 	short			error;
 	short			obsolete;
 	unsigned short		header_len;	/* more space at head required */
@@ -371,7 +373,8 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb)
 
 extern int dst_discard(struct sk_buff *skb);
 extern void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
-		       int initial_ref, int initial_obsolete, int flags);
+		       int initial_ref, int initial_obsolete,
+		       unsigned short flags);
 extern void __dst_free(struct dst_entry *dst);
 extern struct dst_entry *dst_destroy(struct dst_entry *dst);
 
@@ -395,14 +398,24 @@ static inline void dst_rcu_free(struct rcu_head *head)
 
 static inline void dst_confirm(struct dst_entry *dst)
 {
-	if (dst) {
-		struct neighbour *n;
+	dst->pending_confirm = 1;
+}
 
-		rcu_read_lock();
-		n = dst_get_neighbour_noref(dst);
-		neigh_confirm(n);
-		rcu_read_unlock();
+static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
+				   struct sk_buff *skb)
+{
+	struct hh_cache *hh;
+
+	if (unlikely(dst->pending_confirm)) {
+		n->confirmed = jiffies;
+		dst->pending_confirm = 0;
 	}
+
+	hh = &n->hh;
+	if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
+		return neigh_hh_output(hh, skb);
+	else
+		return n->output(n, skb);
 }
 
 static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index e1d18bd..344d898 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -309,12 +309,6 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh)
 
 #define neigh_hold(n)	atomic_inc(&(n)->refcnt)
 
-static inline void neigh_confirm(struct neighbour *neigh)
-{
-	if (neigh)
-		neigh->confirmed = jiffies;
-}
-
 static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 {
 	unsigned long now = jiffies;
@@ -358,15 +352,6 @@ static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
 	return dev_queue_xmit(skb);
 }
 
-static inline int neigh_output(struct neighbour *n, struct sk_buff *skb)
-{
-	struct hh_cache *hh = &n->hh;
-	if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
-		return neigh_hh_output(hh, skb);
-	else
-		return n->output(n, skb);
-}
-
 static inline struct neighbour *
 __neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat)
 {
diff --git a/net/core/dst.c b/net/core/dst.c
index 43d94ce..a6e19a2 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -152,7 +152,7 @@ EXPORT_SYMBOL(dst_discard);
 const u32 dst_default_metrics[RTAX_MAX];
 
 void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
-		int initial_ref, int initial_obsolete, int flags)
+		int initial_ref, int initial_obsolete, unsigned short flags)
 {
 	struct dst_entry *dst;
 
@@ -188,6 +188,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
 	dst->__use = 0;
 	dst->lastuse = jiffies;
 	dst->flags = flags;
+	dst->pending_confirm = 0;
 	dst->next = NULL;
 	if (!(flags & DST_NOCOUNT))
 		dst_entries_add(ops, 1);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6e9a266..cc52679 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -198,7 +198,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 	if (unlikely(!neigh))
 		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
 	if (neigh) {
-		int res = neigh_output(neigh, skb);
+		int res = dst_neigh_output(dst, neigh, skb);
 
 		rcu_read_unlock_bh();
 		return res;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8416f8a..ca0d0e7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -740,13 +740,13 @@ void tcp_update_metrics(struct sock *sk)
 	if (sysctl_tcp_nometrics_save)
 		return;
 
-	dst_confirm(dst);
-
 	if (dst && (dst->flags & DST_HOST)) {
 		const struct inet_connection_sock *icsk = inet_csk(sk);
 		int m;
 		unsigned long rtt;
 
+		dst_confirm(dst);
+
 		if (icsk->icsk_backoff || !tp->srtt) {
 			/* This session failed to estimate rtt. Why?
 			 * Probably, no packets returned in time.
@@ -3869,9 +3869,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 			tcp_cong_avoid(sk, ack, prior_in_flight);
 	}
 
-	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
-		dst_confirm(__sk_dst_get(sk));
-
+	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
+		struct dst_entry *dst = __sk_dst_get(sk);
+		if (dst)
+			dst_confirm(dst);
+	}
 	return 1;
 
 no_queue:
@@ -6140,9 +6142,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
 		case TCP_FIN_WAIT1:
 			if (tp->snd_una == tp->write_seq) {
+				struct dst_entry *dst;
+
 				tcp_set_state(sk, TCP_FIN_WAIT2);
 				sk->sk_shutdown |= SEND_SHUTDOWN;
-				dst_confirm(__sk_dst_get(sk));
+
+				dst = __sk_dst_get(sk);
+				if (dst)
+					dst_confirm(dst);
 
 				if (!sock_flag(sk, SOCK_DEAD))
 					/* Wake up lingering close() */
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a233a7c..c94e4aa 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -125,7 +125,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
 	rcu_read_lock();
 	neigh = dst_get_neighbour_noref(dst);
 	if (neigh) {
-		int res = neigh_output(neigh, skb);
+		int res = dst_neigh_output(dst, neigh, skb);
 
 		rcu_read_unlock();
 		return res;
-- 
1.7.10

^ permalink raw reply related

* [PATCH 03/19] sunrpc: Don't do a dst_confirm() on an input routes.
From: David Miller @ 2012-07-03  9:46 UTC (permalink / raw)
  To: netdev


xs_udp_data_ready() is operating on received packets, and tries to
do a dst_confirm() on the dst attached to the SKB.

This isn't right, dst confirmation is for output routes, not input
routes.  It's for resetting the timers on the nexthop neighbour entry
for the route, indicating that we've got good evidence that we've
successfully reached it.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sunrpc/xprtsock.c |    3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 890b03f..62d0dac 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1014,9 +1014,6 @@ static void xs_udp_data_ready(struct sock *sk, int len)
 
 	UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS);
 
-	/* Something worked... */
-	dst_confirm(skb_dst(skb));
-
 	xprt_adjust_cwnd(task, copied);
 	xprt_complete_rqst(task, copied);
 
-- 
1.7.10

^ permalink raw reply related

* [PATCH 02/19] ipv4: Don't report neigh uptodate state in rtcache procfs.
From: David Miller @ 2012-07-03  9:45 UTC (permalink / raw)
  To: netdev


Soon routes will not have a cached neigh attached, nor will we
be able to necessarily go directly to a neigh from an arbitrary
route.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c |   12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 2f40363..bae3638 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -418,13 +418,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 			   "HHUptod\tSpecDst");
 	else {
 		struct rtable *r = v;
-		struct neighbour *n;
-		int len, HHUptod;
-
-		rcu_read_lock();
-		n = dst_get_neighbour_noref(&r->dst);
-		HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0;
-		rcu_read_unlock();
+		int len;
 
 		seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
 			      "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
@@ -438,9 +432,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 			(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
 			      dst_metric(&r->dst, RTAX_RTTVAR)),
 			r->rt_key_tos,
-			-1,
-			HHUptod,
-			0, &len);
+			-1, 0, 0, &len);
 
 		seq_printf(seq, "%*s\n", 127 - len, "");
 	}
-- 
1.7.10

^ permalink raw reply related

* [PATCH 01/19] ipv4: Make neigh lookups directly in output packet path.
From: David Miller @ 2012-07-03  9:45 UTC (permalink / raw)
  To: netdev


Do not use the dst cached neigh, we'll be getting rid of that.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/arp.h       |   28 +++++++++++++++++++---------
 include/net/neighbour.h |   11 +++++++++--
 net/core/neighbour.c    |   12 +++++++-----
 net/ipv4/ip_output.c    |   12 ++++++++----
 net/ipv4/route.c        |    6 +-----
 5 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/include/net/arp.h b/include/net/arp.h
index 4a1f3fb..4617d98 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -15,24 +15,34 @@ static inline u32 arp_hashfn(u32 key, const struct net_device *dev, u32 hash_rnd
 	return val * hash_rnd;
 }
 
-static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key)
+static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
 {
-	struct neigh_hash_table *nht;
+	struct neigh_hash_table *nht = rcu_dereference_bh(arp_tbl.nht);
 	struct neighbour *n;
 	u32 hash_val;
 
-	rcu_read_lock_bh();
-	nht = rcu_dereference_bh(arp_tbl.nht);
+	if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+		key = 0;
+
 	hash_val = arp_hashfn(key, dev, nht->hash_rnd[0]) >> (32 - nht->hash_shift);
 	for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
 	     n != NULL;
 	     n = rcu_dereference_bh(n->next)) {
-		if (n->dev == dev && *(u32 *)n->primary_key == key) {
-			if (!atomic_inc_not_zero(&n->refcnt))
-				n = NULL;
-			break;
-		}
+		if (n->dev == dev && *(u32 *)n->primary_key == key)
+			return n;
 	}
+
+	return NULL;
+}
+
+static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key)
+{
+	struct neighbour *n;
+
+	rcu_read_lock_bh();
+	n = __ipv4_neigh_lookup_noref(dev, key);
+	if (n && !atomic_inc_not_zero(&n->refcnt))
+		n = NULL;
 	rcu_read_unlock_bh();
 
 	return n;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 6cdfeed..e1d18bd 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -202,9 +202,16 @@ extern struct neighbour *	neigh_lookup(struct neigh_table *tbl,
 extern struct neighbour *	neigh_lookup_nodev(struct neigh_table *tbl,
 						   struct net *net,
 						   const void *pkey);
-extern struct neighbour *	neigh_create(struct neigh_table *tbl,
+extern struct neighbour *	__neigh_create(struct neigh_table *tbl,
+					       const void *pkey,
+					       struct net_device *dev,
+					       bool want_ref);
+static inline struct neighbour *neigh_create(struct neigh_table *tbl,
 					     const void *pkey,
-					     struct net_device *dev);
+					     struct net_device *dev)
+{
+	return __neigh_create(tbl, pkey, dev, true);
+}
 extern void			neigh_destroy(struct neighbour *neigh);
 extern int			__neigh_event_send(struct neighbour *neigh, struct sk_buff *skb);
 extern int			neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d81d026..a793af9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -474,8 +474,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 }
 EXPORT_SYMBOL(neigh_lookup_nodev);
 
-struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
-			       struct net_device *dev)
+struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
+				 struct net_device *dev, bool want_ref)
 {
 	u32 hash_val;
 	int key_len = tbl->key_len;
@@ -535,14 +535,16 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 	     n1 = rcu_dereference_protected(n1->next,
 			lockdep_is_held(&tbl->lock))) {
 		if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
-			neigh_hold(n1);
+			if (want_ref)
+				neigh_hold(n1);
 			rc = n1;
 			goto out_tbl_unlock;
 		}
 	}
 
 	n->dead = 0;
-	neigh_hold(n);
+	if (want_ref)
+		neigh_hold(n);
 	rcu_assign_pointer(n->next,
 			   rcu_dereference_protected(nht->hash_buckets[hash_val],
 						     lockdep_is_held(&tbl->lock)));
@@ -558,7 +560,7 @@ out_neigh_release:
 	neigh_release(n);
 	goto out;
 }
-EXPORT_SYMBOL(neigh_create);
+EXPORT_SYMBOL(__neigh_create);
 
 static u32 pneigh_hash(const void *pkey, int key_len)
 {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 2630900..6e9a266 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -170,6 +170,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 	struct net_device *dev = dst->dev;
 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
 	struct neighbour *neigh;
+	u32 nexthop;
 
 	if (rt->rt_type == RTN_MULTICAST) {
 		IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
@@ -191,15 +192,18 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 		skb = skb2;
 	}
 
-	rcu_read_lock();
-	neigh = dst_get_neighbour_noref(dst);
+	rcu_read_lock_bh();
+	nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr;
+	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
+	if (unlikely(!neigh))
+		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
 	if (neigh) {
 		int res = neigh_output(neigh, skb);
 
-		rcu_read_unlock();
+		rcu_read_unlock_bh();
 		return res;
 	}
-	rcu_read_unlock();
+	rcu_read_unlock_bh();
 
 	net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
 			    __func__);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6a5afc7..2f40363 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1098,17 +1098,13 @@ static int slow_chain_length(const struct rtable *head)
 
 static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr)
 {
-	static const __be32 inaddr_any = 0;
 	struct net_device *dev = dst->dev;
 	const __be32 *pkey = daddr;
 	const struct rtable *rt;
 	struct neighbour *n;
 
 	rt = (const struct rtable *) dst;
-
-	if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
-		pkey = &inaddr_any;
-	else if (rt->rt_gateway)
+	if (rt->rt_gateway)
 		pkey = (const __be32 *) &rt->rt_gateway;
 
 	n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
-- 
1.7.10

^ permalink raw reply related

* [PATCH 0/19] Disconnect neigh from dst_entry
From: David Miller @ 2012-07-03  9:45 UTC (permalink / raw)
  To: netdev


This finally severs neighbour table entries from dst_entry enough that
we no longer depend upon them outside of the individual protocols.

Besides being a major step towards making routing cache removal
practical, it also means an end to the infamous "neighbour table
overflow" condition.

Routes in ipv4 no longer refer to neighbour table entries, they are
used on an as-needed basis during packet output in a refcount-less
manner.

Therefore garbage collection is trivial since almost nothing actually
holds onto neighbour table references.

On the routing cache removal side, this set of changes removes another
dependency upon rt->rt_dst.  The only one left is for inetpeer, and
once that is severed (I have a rough plan for it) rt->rt_dst is finally
without any use and we can construct ipv4 routes directly inside of
FIB table entries.

Signed-off-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply

* Re: [PATCH] qlge: fix endian issue
From: RongQing Li @ 2012-07-03  9:44 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, ron.mercer
In-Reply-To: <20120703.022250.295045256493986282.davem@davemloft.net>

2012/7/3 David Miller <davem@davemloft.net>:
> From: roy.qing.li@gmail.com
> Date: Tue,  3 Jul 2012 16:47:56 +0800
>
>> Signed-off-by: RongQing.Li <roy.qing.li@gmail.com>
>
> Is "RongQing.Li" really how you would write your name?

Yes

Chinese name, Li RongQing

I maybe should not add the dot.

-RongQing

^ permalink raw reply

* Re: BNX2 MIPS06 firmware in tree is 6.2.1, driver wants 6.2.3
From: Eric Dumazet @ 2012-07-03  9:30 UTC (permalink / raw)
  To: Tony Vroon; +Cc: Michael Chan, LKML, netdev, David S. Miller, Anthony G. Basile
In-Reply-To: <4FF2B64A.5020002@linx.net>

On Tue, 2012-07-03 at 10:07 +0100, Tony Vroon wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
> 
> On 03/07/12 10:05, Eric Dumazet wrote:
> > You're supposed to upgrade firmware blobs on your own.
> 
> But the firmware is in the kernel tree still, should the outdated
> files be removed?

I believe firmware/README.AddingFirmware contains the rationale.

^ permalink raw reply

* [PATCH] netem: fix rate extension and drop accounting
From: Eric Dumazet @ 2012-07-03  9:25 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, Hagen Paul Pfeifer, Yuchung Cheng, Andreas Terzis,
	Mark Gordon

From: Eric Dumazet <edumazet@google.com>

commit 7bc0f28c7a0c (netem: rate extension) did wrong maths when packet
is enqueued while queue is not empty.

Result is unexpected cumulative delays

# tc qd add dev eth0 root est 1sec 4sec netem delay 200ms rate 100kbit
# ping -i 0.1 172.30.42.18
PING 172.30.42.18 (172.30.42.18) 56(84) bytes of data.
64 bytes from 172.30.42.18: icmp_req=1 ttl=64 time=208 ms
64 bytes from 172.30.42.18: icmp_req=2 ttl=64 time=424 ms
64 bytes from 172.30.42.18: icmp_req=3 ttl=64 time=838 ms
64 bytes from 172.30.42.18: icmp_req=4 ttl=64 time=1142 ms
64 bytes from 172.30.42.18: icmp_req=5 ttl=64 time=1335 ms
64 bytes from 172.30.42.18: icmp_req=6 ttl=64 time=1949 ms
64 bytes from 172.30.42.18: icmp_req=7 ttl=64 time=2450 ms
64 bytes from 172.30.42.18: icmp_req=8 ttl=64 time=2840 ms
64 bytes from 172.30.42.18: icmp_req=9 ttl=64 time=3121 ms
64 bytes from 172.30.42.18: icmp_req=10 ttl=64 time=3291 ms
64 bytes from 172.30.42.18: icmp_req=11 ttl=64 time=3784 ms

This patch also fixes a double drop accounting in case packet is dropped
in tfifo_enqueue()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Andreas Terzis <aterzis@google.com>
Cc: Mark Gordon <msg@google.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
---
 net/sched/sch_netem.c |   14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index a2a95aa..e8b5ac3 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -368,7 +368,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	/* We don't fill cb now as skb_unshare() may invalidate it */
 	struct netem_skb_cb *cb;
 	struct sk_buff *skb2;
-	int ret;
 	int count = 1;
 
 	/* Random duplication */
@@ -443,14 +442,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				 * calculate this time bonus and substract
 				 * from delay.
 				 */
-				delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
+				delay -= netem_skb_cb(skb_peek(list))->time_to_send - now;
 				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
 			}
 		}
 
 		cb->time_to_send = now + delay;
 		++q->counter;
-		ret = tfifo_enqueue(skb, sch);
+		return tfifo_enqueue(skb, sch);
 	} else {
 		/*
 		 * Do re-ordering by putting one out of N packets at the front
@@ -462,16 +461,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		__skb_queue_head(&sch->q, skb);
 		sch->qstats.backlog += qdisc_pkt_len(skb);
 		sch->qstats.requeues++;
-		ret = NET_XMIT_SUCCESS;
-	}
-
-	if (ret != NET_XMIT_SUCCESS) {
-		if (net_xmit_drop_count(ret)) {
-			sch->qstats.drops++;
-			return ret;
-		}
 	}
-
 	return NET_XMIT_SUCCESS;
 }
 

^ permalink raw reply related

* Re: [PATCH] qlge: fix endian issue
From: David Miller @ 2012-07-03  9:22 UTC (permalink / raw)
  To: roy.qing.li; +Cc: netdev, ron.mercer
In-Reply-To: <1341305276-17053-1-git-send-email-roy.qing.li@gmail.com>

From: roy.qing.li@gmail.com
Date: Tue,  3 Jul 2012 16:47:56 +0800

> Signed-off-by: RongQing.Li <roy.qing.li@gmail.com>

Is "RongQing.Li" really how you would write your name?

^ permalink raw reply

* Re: [PATCH net-next 09/10] net/mlx4_en: Manage flow steering rules with ethtool
From: Or Gerlitz @ 2012-07-03  9:00 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: davem, roland, yevgenyp, oren, netdev, Hadar Hen Zion, Amir Vadai
In-Reply-To: <1341158452.4852.107.camel@deadeye.wl.decadent.org.uk>

On 7/1/2012 7:00 PM, Ben Hutchings wrote:
>> +#define not_all_zeros_or_all_ones(field, type) \
>> >+	(field && (type)~field)
>> >+
>> >+static int mlx4_en_validate_flow(struct net_device *dev,
>> >+				 struct ethtool_rxnfc *cmd)
>> >+{
>> >+	struct ethtool_usrip4_spec *l3_mask;
>> >+	struct ethtool_tcpip4_spec *l4_mask;
>> >+	struct ethhdr *eth_mask;
>> >+	u64 full_mac = ~0ull;
>> >+	u64 zero_mac = 0;
>> >+
>> >+	if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
>> >+		return -EINVAL;
>> >+
>> >+	switch (cmd->fs.flow_type & ~FLOW_EXT) {
>> >+	case TCP_V4_FLOW:
>> >+	case UDP_V4_FLOW:
>> >+		if (cmd->fs.h_u.tcp_ip4_spec.tos)
>> >+			return -EOPNOTSUPP;
> I suspect that your filter ignores TOS, rather than only matching TOS ==
> 0, so you should actually be checking the corresponding field in the
> mask (fs.m_u). [...]

OK, thanks for pointing this over, will fix.

> >+		break;
> >+	case IP_USER_FLOW:
> >+		l3_mask = &cmd->fs.m_u.usr_ip4_spec;
> >+		if (cmd->fs.h_u.usr_ip4_spec.l4_4_bytes ||
> >+		    cmd->fs.h_u.usr_ip4_spec.tos ||
> I think this should be checking l4_4_bytes and tos in the mask.

OK

>
>> >+		    cmd->fs.h_u.usr_ip4_spec.proto ||
>> >+		    cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 ||
>> >+		    (!cmd->fs.h_u.usr_ip4_spec.ip4src &&
>> >+		     !cmd->fs.h_u.usr_ip4_spec.ip4dst) ||
>> >+		    not_all_zeros_or_all_ones(l3_mask->ip4src, __be32) ||
>> >+		    not_all_zeros_or_all_ones(l3_mask->ip4dst, __be32))
>> >+			return -EOPNOTSUPP;
>> >+		break;
>> >+	case ETHER_FLOW:
>> >+		eth_mask = &cmd->fs.m_u.ether_spec;
>> >+		if (memcmp(eth_mask->h_source, &zero_mac, ETH_ALEN))
>> >+			return -EOPNOTSUPP;
>> >+		if (!memcmp(eth_mask->h_dest, &zero_mac, ETH_ALEN))
>> >+			return -EOPNOTSUPP;
> But in the next statement you test whether eth_mask->h_dest is either
> all-zeroes or all-ones.  Is all-zeroes valid or not?  I suspect you
> actually intend to reject the case where both h_dest and h_proto are masked out.

indeed, this code section can be better written, will fix for V1


>
>> >+		if (not_all_zeros_or_all_ones(eth_mask->h_proto, __be16) ||
>> >+		    (memcmp(eth_mask->h_dest, &zero_mac, ETH_ALEN) &&
>> >+		     memcmp(eth_mask->h_dest, &full_mac, ETH_ALEN)))
>> >+			return -EOPNOTSUPP;
>> >+		break;
>> >+	default:
>> >+		return -EOPNOTSUPP;
>> >+	}
>> >+
>> >+	if ((cmd->fs.flow_type & FLOW_EXT)) {
>> >+		if (cmd->fs.m_ext.vlan_etype ||
>> >+		    not_all_zeros_or_all_ones(cmd->fs.m_ext.vlan_tci,
>> >+					       __be16)) {
>> >+			return -EOPNOTSUPP;
>> >+		}
>> >+	}
>> >+
>> >+	return 0;
>> >+}

^ permalink raw reply

* Re: [PATCH net-next 09/10] net/mlx4_en: Manage flow steering rules with ethtool
From: Or Gerlitz @ 2012-07-03  8:58 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: davem, roland, Yevgeny Petrilin, Oren Duer, netdev,
	Hadar Hen Zion, Amir Vadai
In-Reply-To: <1341280041.2590.39.camel@bwh-desktop.uk.solarflarecom.com>

On 7/3/2012 4:47 AM, Ben Hutchings wrote:
>> Under this logic, we can use the values and not the masks, isn't that?
> No, it's perfectly valid to specify a filter that matches, for example,
> a destination IP address of 0.0.0.0 with mask of 255.255.255.255.  So
> you really need to check the mask.  If your filter hardware doesn't
> support zero values for some fields then you'll need to reject them in
> mlx4_en_validate_flow.

Got it, will change to use masks all over the place, as you pointed out 
we need to do.

^ permalink raw reply

* Re: [PATCH 00/12] Swap-over-NFS without deadlocking V8
From: Mel Gorman @ 2012-07-03  8:58 UTC (permalink / raw)
  To: Eric B Munson
  Cc: Andrew Morton, Linux-MM, Linux-Netdev, Linux-NFS, LKML,
	David Miller, Trond Myklebust, Neil Brown, Christoph Hellwig,
	Peter Zijlstra, Mike Christie, Sebastian Andrzej Siewior
In-Reply-To: <20120703001051.GA5508@mgebm.net>

On Mon, Jul 02, 2012 at 08:10:51PM -0400, Eric B Munson wrote:
> On Mon, 02 Jul 2012, Mel Gorman wrote:
> 
> > On Sun, Jul 01, 2012 at 01:22:54PM -0400, Eric B Munson wrote:
> > > On Fri, 29 Jun 2012, Mel Gorman wrote:
> > > 
> > > > Changelog since V7
> > > >   o Rebase to linux-next 20120629
> > > >   o bi->page_dma instead of bi->page in intel driver
> > > >   o Build fix for !CONFIG_NET					(sebastian)
> > > >   o Restore PF_MEMALLOC flags correctly in all cases		(jlayton)
> > > > 
> > > > Changelog since V6
> > > >   o Rebase to linux-next 20120622
> > > > 
> > > > Changelog since V5
> > > >   o Rebase to v3.5-rc3
> > > > 
> > > > Changelog since V4
> > > >   o Catch if SOCK_MEMALLOC flag is cleared with rmem tokens	(davem)
> > > > 
> > > > Changelog since V3
> > > >   o Rebase to 3.4-rc5
> > > >   o kmap pages for writing to swap				(akpm)
> > > >   o Move forward declaration to reduce chance of duplication	(akpm)
> > > > 
> > > > Changelog since V2
> > > >   o Nothing significant, just rebases. A radix tree lookup is replaced with
> > > >     a linear search would be the biggest rebase artifact
> > > > 
> > > > This patch series is based on top of "Swap-over-NBD without deadlocking v14"
> > > > as it depends on the same reservation of PF_MEMALLOC reserves logic.
> > > > 
> > > > When a user or administrator requires swap for their application, they
> > > > create a swap partition and file, format it with mkswap and activate it with
> > > > swapon. In diskless systems this is not an option so if swap if required
> > > > then swapping over the network is considered.  The two likely scenarios
> > > > are when blade servers are used as part of a cluster where the form factor
> > > > or maintenance costs do not allow the use of disks and thin clients.
> > > > 
> > > > The Linux Terminal Server Project recommends the use of the Network
> > > > Block Device (NBD) for swap but this is not always an option.  There is
> > > > no guarantee that the network attached storage (NAS) device is running
> > > > Linux or supports NBD. However, it is likely that it supports NFS so there
> > > > are users that want support for swapping over NFS despite any performance
> > > > concern. Some distributions currently carry patches that support swapping
> > > > over NFS but it would be preferable to support it in the mainline kernel.
> > > > 
> > > > Patch 1 avoids a stream-specific deadlock that potentially affects TCP.
> > > > 
> > > > Patch 2 is a small modification to SELinux to avoid using PFMEMALLOC
> > > > 	reserves.
> > > > 
> > > > Patch 3 adds three helpers for filesystems to handle swap cache pages.
> > > > 	For example, page_file_mapping() returns page->mapping for
> > > > 	file-backed pages and the address_space of the underlying
> > > > 	swap file for swap cache pages.
> > > > 
> > > > Patch 4 adds two address_space_operations to allow a filesystem
> > > > 	to pin all metadata relevant to a swapfile in memory. Upon
> > > > 	successful activation, the swapfile is marked SWP_FILE and
> > > > 	the address space operation ->direct_IO is used for writing
> > > > 	and ->readpage for reading in swap pages.
> > > > 
> > > > Patch 5 notes that patch 3 is bolting
> > > > 	filesystem-specific-swapfile-support onto the side and that
> > > > 	the default handlers have different information to what
> > > > 	is available to the filesystem. This patch refactors the
> > > > 	code so that there are generic handlers for each of the new
> > > > 	address_space operations.
> > > > 
> > > > Patch 6 adds an API to allow a vector of kernel addresses to be
> > > > 	translated to struct pages and pinned for IO.
> > > > 
> > > > Patch 7 adds support for using highmem pages for swap by kmapping
> > > > 	the pages before calling the direct_IO handler.
> > > > 
> > > > Patch 8 updates NFS to use the helpers from patch 3 where necessary.
> > > > 
> > > > Patch 9 avoids setting PF_private on PG_swapcache pages within NFS.
> > > > 
> > > > Patch 10 implements the new swapfile-related address_space operations
> > > > 	for NFS and teaches the direct IO handler how to manage
> > > > 	kernel addresses.
> > > > 
> > > > Patch 11 prevents page allocator recursions in NFS by using GFP_NOIO
> > > > 	where appropriate.
> > > > 
> > > > Patch 12 fixes a NULL pointer dereference that occurs when using
> > > > 	swap-over-NFS.
> > > > 
> > > > With the patches applied, it is possible to mount a swapfile that is on an
> > > > NFS filesystem. Swap performance is not great with a swap stress test taking
> > > > roughly twice as long to complete than if the swap device was backed by NBD.
> > > 
> > > To test this set I am using memory cgroups to force swap usage.  I am seeing
> > > the cgroup controller killing my processes instead of using the nfs swapfile.
> > > 
> > 
> > How sure are you that this is not a cgroup bug? For dirty file data on some
> > kernels, cgroups can prematurely kill processes if pages are not being
> > cleaned fast enough. I would not expect the same problem for anonymous
> > pages but it's worth considering. Please also test with a normal swapfile.
> > 
> > If OOM is disabled and the process hangs, try capturing a sysrq+t and
> > see where the process is stuck.
> > 
> 
> It looks like the problem is with cgroups, when I run without cgroups and limit
> memory on the boot command line everything works fine.  To test I limited the
> machine to 1G of ram then ran several memory benchmarks with work set sizes of
> 1.5G, all completed successfully with my swap file located on an NFS share.
> 
> Tested-by: Eric B Munson <emunson@mgebm.net>

Thanks a lot for testing.

-- 
Mel Gorman
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply

* Re: [PATCH net-next 09/10] net/mlx4_en: Manage flow steering rules with ethtool
From: Or Gerlitz @ 2012-07-03  8:56 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: davem, roland, Yevgeny Petrilin, Oren Duer, netdev,
	Hadar Hen Zion, Amir Vadai
In-Reply-To: <1341280041.2590.39.camel@bwh-desktop.uk.solarflarecom.com>

On 7/3/2012 4:47 AM, Ben Hutchings wrote:
> If the hardware can only match the VID then you need to validate that
> the mask is either 0 or 0xfff instead of 0 or 0xffff.

sure, will fix

^ permalink raw reply

* [PATCH] qlge: fix endian issue
From: roy.qing.li @ 2012-07-03  8:47 UTC (permalink / raw)
  To: netdev; +Cc: ron.mercer

From: RongQing.Li <roy.qing.li@gmail.com>

commit 6d29b1ef introduces a bug, ntohs is __be16_to_cpu,
not cpu_to_be16.

We always use htons on IP_OFFSET and IP_MF, then compare
with network package.

Signed-off-by: RongQing.Li <roy.qing.li@gmail.com>
---
 drivers/net/ethernet/qlogic/qlge/qlge_main.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 09d8d33..7c520fa 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -1546,7 +1546,7 @@ static void ql_process_mac_rx_page(struct ql_adapter *qdev,
 			struct iphdr *iph =
 				(struct iphdr *) ((u8 *)addr + ETH_HLEN);
 			if (!(iph->frag_off &
-				cpu_to_be16(IP_MF|IP_OFFSET))) {
+				htons(IP_MF|IP_OFFSET))) {
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 				netif_printk(qdev, rx_status, KERN_DEBUG,
 					     qdev->ndev,
@@ -1654,7 +1654,7 @@ static void ql_process_mac_rx_skb(struct ql_adapter *qdev,
 			/* Unfragmented ipv4 UDP frame. */
 			struct iphdr *iph = (struct iphdr *) skb->data;
 			if (!(iph->frag_off &
-				ntohs(IP_MF|IP_OFFSET))) {
+				htons(IP_MF|IP_OFFSET))) {
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 				netif_printk(qdev, rx_status, KERN_DEBUG,
 					     qdev->ndev,
@@ -1968,7 +1968,7 @@ static void ql_process_mac_split_rx_intr(struct ql_adapter *qdev,
 		/* Unfragmented ipv4 UDP frame. */
 			struct iphdr *iph = (struct iphdr *) skb->data;
 			if (!(iph->frag_off &
-				ntohs(IP_MF|IP_OFFSET))) {
+				htons(IP_MF|IP_OFFSET))) {
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 				netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
 					     "TCP checksum done!\n");
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH net-next 09/10] net/mlx4_en: Manage flow steering rules with ethtool
From: Or Gerlitz @ 2012-07-03  8:14 UTC (permalink / raw)
  To: Andreas Schwab
  Cc: David Laight, Joe Perches, Ben Hutchings, davem, roland, yevgenyp,
	oren, netdev, Hadar Hen Zion, Amir Vadai
In-Reply-To: <m28vf2o0oa.fsf@igel.home>

On 7/2/2012 2:35 PM, Andreas Schwab wrote:
>
> 	field == 0 || field == (typeof field)~(typeof field)0
> You can avoid that by using (typeof field)-1.
>

OK, thanks everybody, we will take that path.

Or.

^ permalink raw reply

* AW: AW: AW: RFC: replace packets already in queue
From: Erdt, Ralph @ 2012-07-03  7:29 UTC (permalink / raw)
  To: Eric Dumazet, Nicolas de Pesloüan; +Cc: Rick Jones, netdev@vger.kernel.org
In-Reply-To: <1341266168.22621.466.camel@edumazet-glaptop>

> > If I were you, I would use a tun/tap interface and manage a private
> > packet queue in userspace. This way, you wouldn't have to manage the
> overhead of porting your kernel code to every new kernel versions.
> >
> 
> This seems a good idea.
> 
> Then you can do other coalescing stuff, like TCP ACK that could be
> aggregated to single ACK as well.

Thanks for the idea. But this is option when just doing the replace thing.
But the charm of the qdisc solution is the complete integration to TC. It's complete compatible to the other options, so that you can create a bigger TC rule set. And creating the rules is a standard operation for the administrators - they know TC.

In terms of the other coalescing stuff (we didn't use TCP, because it's not possible) - it's already done in the device "driver" I mentioned. Yes, we can extend the "driver", but the qdisc solution has the benefit that there is a clear separation.

Nevertheless we will discuss the idea internally. Maybe the group got another idea based on this.

^ permalink raw reply

* Urgent Assitance Needed
From: Anita George @ 2012-07-03  7:15 UTC (permalink / raw)
  To: Recipients

[-- Attachment #1: Mail message body --]
[-- Type: text/plain, Size: 36 bytes --]

OPEN AND READ THE ATTACHMENT  FILE..

[-- Attachment #2: George.rtf --]
[-- Type: application/octet-stream, Size: 3447 bytes --]

^ permalink raw reply

* Re: [RFC PATCH net-next] ipvs: add missing lock in ip_vs_ftp_init_conn()
From: Julian Anastasov @ 2012-07-03  7:12 UTC (permalink / raw)
  To: Xiaotian Feng
  Cc: netdev, lvs-devel, netfilter-devel, netfilter, coreteam,
	linux-kernel, Xiaotian Feng, Wensong Zhang, Simon Horman,
	Pablo Neira Ayuso, Patrick McHardy, David S. Miller
In-Reply-To: <1340890587-8169-1-git-send-email-xtfeng@gmail.com>


	Hello,

On Thu, 28 Jun 2012, Xiaotian Feng wrote:

> We met a kernel panic in 2.6.32.43 kernel:
> 
> [2680191.848044] IPVS: ip_vs_conn_hash(): request for already hashed, called from run_timer_softirq+0x175/0x1d0
> <snip>
> [2680311.849009] general protection fault: 0000 [#1] SMP
> [2680311.853001] RIP: 0010:[<ffffffff815f155c>]  [<ffffffff815f155c>] ip_vs_conn_expire+0xdc/0x2f0
> [2680311.853001] RSP: 0018:ffff880028303e70  EFLAGS: 00010202
> [2680311.853001] RAX: dead000000200200 RBX: ffff8801aad00b80 RCX: 0000000000001d90
> [2680311.853001] RDX: dead000000100100 RSI: 000000004fd59800 RDI: ffff8801aad00c08
> <snip>
> [2680311.853001] Call Trace:
> [2680311.853001]  <IRQ>
> [2680311.853001]  [<ffffffff815f1480>] ? ip_vs_conn_expire+0x0/0x2f0
> [2680311.853001]  [<ffffffff8104e2a5>] run_timer_softirq+0x175/0x1d0
> [2680311.853001]  [<ffffffff81021a48>] ? lapic_next_event+0x18/0x20
> [2680311.853001]  [<ffffffff81049a13>] __do_softirq+0xb3/0x150
> [2680311.853001]  [<ffffffff8100cc5c>] call_softirq+0x1c/0x30
> [2680311.853001]  [<ffffffff8100ea9a>] do_softirq+0x4a/0x80
> [2680311.853001]  [<ffffffff81049957>] irq_exit+0x77/0x80
> [2680311.853001]  [<ffffffff81021f2c>] smp_apic_timer_interrupt+0x6c/0xa0
> [2680311.853001]  [<ffffffff8100c633>] apic_timer_interrupt+0x13/0x20
> [2680311.853001]  <EOI>
> [2680311.853001]  [<ffffffff81013b52>] ? mwait_idle+0x52/0x70
> [2680311.853001]  [<ffffffff8100a7b0>] ? enter_idle+0x20/0x30
> [2680311.853001]  [<ffffffff8100ac62>] ? cpu_idle+0x52/0x80
> [2680311.853001]  [<ffffffff816d504d>] ? start_secondary+0x19d/0x280
> 
> rax and rdx is LIST_POISON1 and LIST_POISON2, so kernel is list_del() on an already deleted
> connection and result the general protect fault.
> 
> The "request for already hashed" warning, told us someone might change the connection flags
> incorrectly, like described in commit aea9d711, it changes the connection flags, but doesn't
> put the connection back to the list. So ip_vs_conn_hash() throw a warning and return.
> Later, when ip_vs_conn_expire fire again, ip_vs_conn_unhash() will find the HASHED connection
> and list_del() it, then kernel panic happened.
> 
> After code review, the only chance that kernel change connection flag without protection is
> in ip_vs_ftp_init_conn().
> 
> Signed-off-by: Xiaotian Feng <dannyfeng@tencent.com>
> Cc: Wensong Zhang <wensong@linux-vs.org>
> Cc: Simon Horman <horms@verge.net.au>
> Cc: Julian Anastasov <ja@ssi.bg>
> Cc: Pablo Neira Ayuso <pablo@netfilter.org>
> Cc: Patrick McHardy <kaber@trash.net>
> Cc: "David S. Miller" <davem@davemloft.net> 

	For the fix below:

Acked-by: Julian Anastasov <ja@ssi.bg>

	Simon, the change looks ok. ip_vs_ftp_init_conn is called
from context where cp->lock is not locked (no double lock), so it
should be safe for the backup.

	Only that the comment is not specifying that we
fix a problem in the backup server.

> ---
>  net/netfilter/ipvs/ip_vs_ftp.c |    2 ++
>  1 files changed, 2 insertions(+), 0 deletions(-)
> 
> diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
> index b20b29c..c2bc264 100644
> --- a/net/netfilter/ipvs/ip_vs_ftp.c
> +++ b/net/netfilter/ipvs/ip_vs_ftp.c
> @@ -65,8 +65,10 @@ static int ip_vs_ftp_pasv;
>  static int
>  ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
>  {
> +	spin_lock(&cp->lock);
>  	/* We use connection tracking for the command connection */
>  	cp->flags |= IP_VS_CONN_F_NFCT;
> +	spin_unlock(&cp->lock);
>  	return 0;
>  }
>  
> -- 
> 1.7.1

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply

* Re: Deleting an alias causes rest to get deleted
From: Julian Anastasov @ 2012-07-03  6:34 UTC (permalink / raw)
  To: Volkan Yazıcı; +Cc: netdev
In-Reply-To: <4FF1FC74.8080401@gmail.com>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 4139 bytes --]


	Hello,

On Mon, 2 Jul 2012, Volkan Yazıcı wrote:

> Hi!
> 
> I observe an IP aliasing anomaly that occurs when I try to delete an IP alias
> from an interface. That is, when I delete the first address in a set of IP
> aliased addresses assigned according to a particular subnet, rest of the
> aliases get deleted as well. Check out the below snippet.

	This is in Linux may be from the 2.3/2.4 times

> 
>    $ *for I in `seq 1 6`; do sudo ip addr add 192.168.2.$I/29 dev eth0;
>    done*
>    $ ip addr list
>    1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN
>         link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>         inet 127.0.0.1/8 scope host lo
>         inet6 ::1/128 scope host
>            valid_lft forever preferred_lft forever
>    2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast
>    state UP qlen 1000
>         link/ether 00:24:54:b9:1c:f8 brd ff:ff:ff:ff:ff:ff
>    *inet 192.168.1.200/24 brd 192.168.1.255 scope global eth0**
>         inet 192.168.2.1/29 scope global eth0
>         inet 192.168.2.2/29 scope global secondary eth0
>         inet 192.168.2.3/29 scope global secondary eth0
>         inet 192.168.2.4/29 scope global secondary eth0
>         inet 192.168.2.5/29 scope global secondary eth0
>         inet 192.168.2.6/29 scope global secondary eth0*
>         inet6 fe80::224:54ff:feb9:1cf8/64 scope link
>            valid_lft forever preferred_lft forever
>    3: wlan0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN qlen 1000
>         link/ether e8:39:df:6a:21:2a brd ff:ff:ff:ff:ff:ff
>    $ *sudo ip addr del 192.168.2.1/29 dev eth0*
>    $ ip addr list
>    1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN
>         link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>         inet 127.0.0.1/8 scope host lo
>         inet6 ::1/128 scope host
>            valid_lft forever preferred_lft forever
>    2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast
>    state UP qlen 1000
>         link/ether 00:24:54:b9:1c:f8 brd ff:ff:ff:ff:ff:ff
>    *inet 192.168.1.200/24 brd 192.168.1.255 scope global eth0*
>         inet6 fe80::224:54ff:feb9:1cf8/64 scope link
>            valid_lft forever preferred_lft forever
>    3: wlan0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN qlen 1000
>         link/ether e8:39:df:6a:21:2a brd ff:ff:ff:ff:ff:ff
> 
> Per see, deleting 192.168.2.1/29 causes the rest of the aliased interfaces get
> deleted as well. This is something that is slightly documented in the ifconfig
> manual: /for every scope (i.e. same net with address/netmask combination) all
> aliases are deleted, if you delete the first (primary)/. So what is the right
> way of just deleting the first (primary) alias without affecting the rest? If
> this is a scoping issue, is it possible to assign each alias as primary within
> its own dedicated scope?

	There is (yet) undocumented feature for the
interfaces:

/proc/sys/net/ipv4/conf/*/promote_secondaries

	You set it for specific interface _or_ for "all".
It defaults to 0. When you enable it, deleting a primary
address will not delete all secondary addresses but will
change the next secondary as primary. The term alias may
refer for addresses on same interface while here the
problem is for a subset of addresses - from same subnet.
Primary is the first address added for the configured
subnet, all next addresses in subnet are added as
secondaries as shown by ip addr list.

> As a side note, when I first asked this question to Stephen Hemminger (he
> forwarded me to this mailing list) he also told me that "/In Linux the
> interface aliases are really a legacy from the BSD style addressing, and don't
> act the same. It is not common practice to use them./" Is that really the
> case? Because, as you know, IP aliasing is the heart of a majority of the
> high-availability and clustering solutions in Linux. Is IP aliasing a really
> deprecated technology in Linux? Should we avoid using it? If so, what do you
> recommend as an alternative?

	Please do not stop using aliases!

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply

* RE: [PATCH net 4/7] qlge: Fixed double pci free upon tx_ring->q allocation failure.
From: Jitendra Kalsaria @ 2012-07-03  5:56 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: David Miller, netdev, Ron Mercer, Dept-NX Linux NIC Driver
In-Reply-To: <1341278521.2590.30.camel@bwh-desktop.uk.solarflarecom.com>



-----Original Message-----
From: Ben Hutchings [mailto:bhutchings@solarflare.com] 
Sent: Monday, July 02, 2012 6:22 PM
To: Jitendra Kalsaria
Cc: David Miller; netdev; Ron Mercer; Dept-NX Linux NIC Driver
Subject: Re: [PATCH net 4/7] qlge: Fixed double pci free upon tx_ring->q allocation failure.

On Mon, 2012-07-02 at 19:41 -0400, Jitendra Kalsaria wrote:
> From: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
> 
> Signed-off-by: Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
> ---
>  drivers/net/ethernet/qlogic/qlge/qlge_main.c |   14 +++++++-------
>  1 files changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
> index cdbc860..9ecd15f 100644
> --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
> +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
> @@ -2701,20 +2701,20 @@ static int ql_alloc_tx_resources(struct ql_adapter *qdev,
>  	    pci_alloc_consistent(qdev->pdev, tx_ring->wq_size,
>  				 &tx_ring->wq_base_dma);
>  
> -	if ((tx_ring->wq_base == NULL) ||
> -	    tx_ring->wq_base_dma & WQ_ADDR_ALIGN) {
> -		netif_err(qdev, ifup, qdev->ndev, "tx_ring alloc failed.\n");
> -		return -ENOMEM;
> -	}
> +	if (!tx_ring->wq_base || tx_ring->wq_base_dma & WQ_ADDR_ALIGN)
> +		goto err;
> +

So in case pci_alloc_consistent() fails, you now try to free anyway.
Not sure whether that's safe; do you feel lucky?

[JK] Ahh, my apology. 

>  	tx_ring->q =
>  	    kmalloc(tx_ring->wq_len * sizeof(struct tx_ring_desc), GFP_KERNEL);
> -	if (tx_ring->q == NULL)
> +	if (!tx_ring->q)
>  		goto err;

Unrelated change.

>  	return 0;
>  err:
>  	pci_free_consistent(qdev->pdev, tx_ring->wq_size,
> -			    tx_ring->wq_base, tx_ring->wq_base_dma);
> +			tx_ring->wq_base, tx_ring->wq_base_dma);

This was nicely indented before...

> +	tx_ring->wq_base = NULL;
> +	netif_err(qdev, ifup, qdev->ndev, "tx_ring alloc failed.\n");
>  	return -ENOMEM;
>  }
>  

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.



^ permalink raw reply

* Re: [PATCH v6] bonding support for IPv6 transmit hashing
From: David Miller @ 2012-07-03  5:43 UTC (permalink / raw)
  To: linux; +Cc: fubar, netdev
In-Reply-To: <4FF2853D.9070707@8192.net>

From: John Eaglesham <linux@8192.net>
Date: Mon, 02 Jul 2012 22:38:05 -0700

> On 7/2/2012 10:14 PM, David Miller wrote:
>> From: John Eaglesham <linux@8192.net>
>> Date: Mon, 02 Jul 2012 22:01:20 -0700
>>
>>> I replaced the mixed tabs and spaces with all tabs when I updated that
>>> function, but in retrospect the tabs and spaces were likely
>>> intentional. I will revert.
>>
>> They were.  Don't change existing coding style unless you are certain
>> it is wrong.
>>
>> Besides any such changes are completely outside of the scope of
>> the changes you are making, so you should have left them out in
>> any event.
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> 
> Should the indents in the code I am adding follow the style guide and
> use only tabs, or follow the style already present and mix spaces and
> tabs for indentation?

You should use a mix of tabs, as necessary, to get things to line up
how I told you they need to line up.

^ permalink raw reply

* Re: [PATCH v6] bonding support for IPv6 transmit hashing
From: John Eaglesham @ 2012-07-03  5:38 UTC (permalink / raw)
  To: David Miller; +Cc: fubar, netdev
In-Reply-To: <20120702.221425.418792617776598841.davem@davemloft.net>

On 7/2/2012 10:14 PM, David Miller wrote:
> From: John Eaglesham <linux@8192.net>
> Date: Mon, 02 Jul 2012 22:01:20 -0700
>
>> I replaced the mixed tabs and spaces with all tabs when I updated that
>> function, but in retrospect the tabs and spaces were likely
>> intentional. I will revert.
>
> They were.  Don't change existing coding style unless you are certain
> it is wrong.
>
> Besides any such changes are completely outside of the scope of
> the changes you are making, so you should have left them out in
> any event.
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

Should the indents in the code I am adding follow the style guide and 
use only tabs, or follow the style already present and mix spaces and 
tabs for indentation?

Thanks,
John

^ permalink raw reply

* Re: Awaiting upsteam? - Re: [PATCH net-next v3] em_canid: Ematch rule to match CAN frames according to their identifiers
From: David Miller @ 2012-07-03  5:22 UTC (permalink / raw)
  To: socketcan; +Cc: mkl, netdev, linux-can, lartc
In-Reply-To: <4FF28005.4050108@hartkopp.net>

From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 03 Jul 2012 07:15:49 +0200

> Hello Dave,
> 
> i've seen that you tagged this patch as "Awaiting upstream" in Patchwork.
> 
> Does this mean, that *you* are waiting for another re-spin of the patch OR do
> you expect this patch go through a sub-tree like Marcs can-next tree?

It should go through the CAN tree.

^ permalink raw reply

* Awaiting upsteam? - Re: [PATCH net-next v3] em_canid: Ematch rule to match CAN frames according to their identifiers
From: Oliver Hartkopp @ 2012-07-03  5:15 UTC (permalink / raw)
  To: David Miller; +Cc: Marc Kleine-Budde, netdev, linux-can, lartc
In-Reply-To: <4FF1E26F.5000106@hartkopp.net>

Hello Dave,

i've seen that you tagged this patch as "Awaiting upstream" in Patchwork.

Does this mean, that *you* are waiting for another re-spin of the patch OR do
you expect this patch go through a sub-tree like Marcs can-next tree?

Who is committing these "awaiting upstream" patches?

Thanks for clarification,
Oliver

^ permalink raw reply

* Re: [PATCH v6] bonding support for IPv6 transmit hashing
From: David Miller @ 2012-07-03  5:14 UTC (permalink / raw)
  To: linux; +Cc: fubar, netdev
In-Reply-To: <4FF27CA0.7030708@8192.net>

From: John Eaglesham <linux@8192.net>
Date: Mon, 02 Jul 2012 22:01:20 -0700

> I replaced the mixed tabs and spaces with all tabs when I updated that
> function, but in retrospect the tabs and spaces were likely
> intentional. I will revert.

They were.  Don't change existing coding style unless you are certain
it is wrong.

Besides any such changes are completely outside of the scope of
the changes you are making, so you should have left them out in
any event.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox