Netdev List
 help / color / mirror / Atom feed
* [PATCH] ipv4: Elide fib_validate_source() completely when possible.
From: David Miller @ 2012-06-29  9:05 UTC (permalink / raw)
  To: netdev


If rpfilter is off (or the SKB has an IPSEC path) and there are not
tclassid users, we don't have to do anything at all when
fib_validate_source() is invoked besides setting the itag to zero.

We monitor tclassid uses with a counter (modified only under RTNL and
marked __read_mostly) and we protect the fib_validate_source() real
work with a test against this counter and whether rpfilter is to be
done.

Having a way to know whether we need no tclassid processing or not
also opens the door for future optimized rpfilter algorithms that do
not perform full FIB lookups.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h  |    1 +
 include/net/ip_fib.h     |    5 +++++
 net/core/fib_rules.c     |    4 ++++
 net/ipv4/fib_frontend.c  |   32 ++++++++++++++++++++++++--------
 net/ipv4/fib_rules.c     |   16 +++++++++++++++-
 net/ipv4/fib_semantics.c |   10 ++++++++++
 6 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 075f1e3..e361f48 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -52,6 +52,7 @@ struct fib_rules_ops {
 					     struct sk_buff *,
 					     struct fib_rule_hdr *,
 					     struct nlattr **);
+	void			(*delete)(struct fib_rule *);
 	int			(*compare)(struct fib_rule *,
 					   struct fib_rule_hdr *,
 					   struct nlattr **);
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 619f68a..3dc7c96 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -235,6 +235,11 @@ extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 			       u8 tos, int oif, struct net_device *dev,
 			       struct in_device *idev, u32 *itag);
 extern void fib_select_default(struct fib_result *res);
+#ifdef CONFIG_IP_ROUTE_CLASSID
+extern int fib_num_tclassid_users;
+#else
+#define fib_num_tclassid_users 0
+#endif
 
 /* Exported by fib_semantics.c */
 extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 72cceb7..ab7db83 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -151,6 +151,8 @@ static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
 
 	list_for_each_entry_safe(rule, tmp, &ops->rules_list, list) {
 		list_del_rcu(&rule->list);
+		if (ops->delete)
+			ops->delete(rule);
 		fib_rule_put(rule);
 	}
 }
@@ -499,6 +501,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
 		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
 				   NETLINK_CB(skb).pid);
+		if (ops->delete)
+			ops->delete(rule);
 		fib_rule_put(rule);
 		flush_route_cache(ops);
 		rules_ops_put(ops);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c84cff5..ae528d1 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -31,6 +31,7 @@
 #include <linux/if_addr.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
+#include <linux/cache.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -217,6 +218,10 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 	return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
 }
 
+#ifdef CONFIG_IP_ROUTE_CLASSID
+int fib_num_tclassid_users __read_mostly;
+#endif
+
 /* Given (packet source, input interface) and optional (dst, oif, tos):
  * - (main) check, that source is valid i.e. not broadcast or our local
  *   address.
@@ -225,11 +230,11 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
  * - check, that packet arrived from expected physical interface.
  * called with rcu_read_lock()
  */
-int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
-			int oif, struct net_device *dev, struct in_device *idev,
-			u32 *itag)
+static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
+				 u8 tos, int oif, struct net_device *dev,
+				 int rpf, struct in_device *idev, u32 *itag)
 {
-	int ret, no_addr, rpf, accept_local;
+	int ret, no_addr, accept_local;
 	struct fib_result res;
 	struct flowi4 fl4;
 	struct net *net;
@@ -242,12 +247,9 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos,
 	fl4.flowi4_tos = tos;
 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
 
-	no_addr = rpf = accept_local = 0;
+	no_addr = accept_local = 0;
 	no_addr = idev->ifa_list == NULL;
 
-	/* Ignore rp_filter for packets protected by IPsec. */
-	rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
-
 	accept_local = IN_DEV_ACCEPT_LOCAL(idev);
 	fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
 
@@ -303,6 +305,20 @@ e_rpf:
 	return -EXDEV;
 }
 
+/* Ignore rp_filter for packets protected by IPsec. */
+int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
+			u8 tos, int oif, struct net_device *dev,
+			struct in_device *idev, u32 *itag)
+{
+	int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
+
+	if (!r && !fib_num_tclassid_users) {
+		*itag = 0;
+		return 0;
+	}
+	return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
+}
+
 static inline __be32 sk_extract_addr(struct sockaddr *addr)
 {
 	return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 2d043f7..b23fd95 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -169,8 +169,11 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 		rule4->dst = nla_get_be32(tb[FRA_DST]);
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
-	if (tb[FRA_FLOW])
+	if (tb[FRA_FLOW]) {
 		rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
+		if (rule4->tclassid)
+			fib_num_tclassid_users++;
+	}
 #endif
 
 	rule4->src_len = frh->src_len;
@@ -184,6 +187,16 @@ errout:
 	return err;
 }
 
+static void fib4_rule_delete(struct fib_rule *rule)
+{
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
+
+	if (rule4->tclassid)
+		fib_num_tclassid_users--;
+#endif
+}
+
 static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
 			     struct nlattr **tb)
 {
@@ -256,6 +269,7 @@ static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
 	.action		= fib4_rule_action,
 	.match		= fib4_rule_match,
 	.configure	= fib4_rule_configure,
+	.delete		= fib4_rule_delete,
 	.compare	= fib4_rule_compare,
 	.fill		= fib4_rule_fill,
 	.default_pref	= fib_default_rule_pref,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 415f823..c46c20b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -163,6 +163,12 @@ void free_fib_info(struct fib_info *fi)
 		return;
 	}
 	fib_info_cnt--;
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	change_nexthops(fi) {
+		if (nexthop_nh->nh_tclassid)
+			fib_num_tclassid_users--;
+	} endfor_nexthops(fi);
+#endif
 	call_rcu(&fi->rcu, free_fib_info_rcu);
 }
 
@@ -421,6 +427,8 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 #ifdef CONFIG_IP_ROUTE_CLASSID
 			nla = nla_find(attrs, attrlen, RTA_FLOW);
 			nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
+			if (nexthop_nh->nh_tclassid)
+				fib_num_tclassid_users++;
 #endif
 		}
 
@@ -815,6 +823,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		nh->nh_flags = cfg->fc_flags;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		nh->nh_tclassid = cfg->fc_flow;
+		if (nh->nh_tclassid)
+			fib_num_tclassid_users++;
 #endif
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 		nh->nh_weight = 1;
-- 
1.7.10

^ permalink raw reply related

* Re: [RFC PATCH net-next] ipvs: add missing lock in ip_vs_ftp_init_conn()
From: Julian Anastasov @ 2012-06-29  9:04 UTC (permalink / raw)
  To: Xiaotian Feng
  Cc: netdev, lvs-devel, netfilter-devel, netfilter, coreteam,
	linux-kernel, Xiaotian Feng, Wensong Zhang, Simon Horman,
	Pablo Neira Ayuso, Patrick McHardy, David S. Miller
In-Reply-To: <CAJn8CcFy=K+Aizpi0pvnpXCOYXhgyq12oBgVaPvMthW_fwn4Pg@mail.gmail.com>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1925 bytes --]


	Hello,

On Fri, 29 Jun 2012, Xiaotian Feng wrote:

> > On Thu, 28 Jun 2012, Xiaotian Feng wrote:
> >
> >> We met a kernel panic in 2.6.32.43 kernel:
> >>
> >> [2680191.848044] IPVS: ip_vs_conn_hash(): request for already hashed, called from run_timer_softirq+0x175/0x1d0
> >> <snip>
> >> [2680311.849009] general protection fault: 0000 [#1] SMP

	What we see here is 120 seconds between 2680191 and
2680311. It can mean 2 things:

- some state timeout, it depends on your forwarding method.
What is it? NAT? DR?

- 60 seconds for ip_vs_conn_expire retries

> >> After code review, the only chance that kernel change connection flag without protection is
> >> in ip_vs_ftp_init_conn().
> >
> >        Hm, ip_vs_ftp_init_conn is called before 1st hashing,
> > from ip_vs_bind_app() in ip_vs_conn_new() before
> > ip_vs_conn_hash(). It should be another problem with
> > the flags. How different is IPVS in 2.6.32.43 compared to
> > recent kernels? If commit aea9d711 is present, I'm not
> > aware of other similar problems.
> 
> ip_vs_bind_app() is also called by ip_vs_try_bind_dest(), which can be
> traced to ip_vs_proc_conn().
> I've checked the changes in upstream, but nothing helps since aea9d711
> has been taken into 2.6.32.28 kernel.

	OK, this fix should make it safe for master-backup
sync and it should be applied but I suspect you are not
using sync, right? And then this fix will not solve the oops.

	There are no many places that rehash conn:

ip_vs_conn_fill_cport
	- used for FTP

ip_vs_check_template:
	- do you have persistence configured?

	After you provide details for the used forwarding
method, persistence and sync we should think how such races
with rehashing can lead to double hlist_del. May be
you can modify the debug message in ip_vs_conn_hash, so
that we can see cp->flags and ntohs of cp->cport, cp->dport
and cp->vport when oops happens again.

Regards

--
Julian Anastasov <ja@ssi.bg>

^ permalink raw reply

* AW: RFC: replace packets already in queue
From: Erdt, Ralph @ 2012-06-29  8:46 UTC (permalink / raw)
  To: Rick Jones; +Cc: netdev@vger.kernel.org
In-Reply-To: <4FEC854E.8080603@hp.com>

Hello Rick Jones,

> You might want to try the recent "codel" additions to the stack.  They
> seek to keep the size of queues more manageable while still allowing
> the occasional burst.
Thank you for your hint. This is surly a needful solution in normal network, but this didn't help us:
We are working with very heterogeneous networks:
Internal: 100MBit and more.
Extern: 9,6*K*Bit and LESS(*), and shared, and...
A few other information: wireless (higher packet loss rate), medium access time > 100ms, RTT (standard ping) with IDLE network: 1,5 *seconds*, RTT with network load: minutes(!), and so on. Just very shocking..

TCP isn't usable over such a link. So we are only sending UDP. The codel didn't help us, as codel addresses the flow speed. It's dropping "randomly" (I know it's not random in the lower level, but it's random from the application's perspective) packets. 

I'm addressing the amount of information: Trying to reduce it intelligently by REPLACING old packets with new ones.. Surely - the application must handle this. But in such a network a administrator have to configure the queues and he knows the applications.
In one private mail someone guesses that we are making VoIP. No - we just want to send status information (e.g. sensor information) which will get deprecated, when a new information is available.

I know, this is a very special problem, which didn't occur in normal or even abnormal situations. But I'm sure there are some other people having the this problem, too. So I'm glad to share my solution.

(*you remember the good ol' times with modems over telephone lines? When the internet was called BBS? And how it suddenly feels, when the BBS starts using ANSI? This was comfortable compared to our problem..)

Greetings
Ralph Erdt

^ permalink raw reply

* "Winner
From: Motorola Award @ 2012-06-29  8:35 UTC (permalink / raw)
  To: Recipients

You Won £400,000.00GBP from Motorola Promotion 2012. Bee Line Courier Service UK (beeline@diploma.com)for your Check delivery with your Name,Address,Country,Phone Number. call this number +448719152576 for more info 

^ permalink raw reply

* Re: [PATCH net-next] fq_codel: report congestion notification at enqueue time
From: David Miller @ 2012-06-29  8:04 UTC (permalink / raw)
  To: eric.dumazet; +Cc: nanditad, netdev, codel, ycheng, ncardwell, mattmathis
In-Reply-To: <1340949008.29822.73.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 29 Jun 2012 07:50:08 +0200

> Hmm, problem is the sender thinks the packet was queued for
> transmission.
> 
>         ret = macvlan_queue_xmit(skb, dev);
>         if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
>                 struct macvlan_pcpu_stats *pcpu_stats;
> 
>                 pcpu_stats = this_cpu_ptr(vlan->pcpu_stats);
>                 u64_stats_update_begin(&pcpu_stats->syncp);
>                 pcpu_stats->tx_packets++;
>                 pcpu_stats->tx_bytes += len;
>                 u64_stats_update_end(&pcpu_stats->syncp);
>         } else {
>                 this_cpu_inc(vlan->pcpu_stats->tx_dropped);
>         }

Ok, that is the meaning this has taken on.  Same test exists in
vlan_dev.c and this test used to be present also in the ipip.h macros
some time ago.

Nobody really does anything special with this value, except to
translate it to a zero 0 when propagating back to sockets.

The only thing it guards is the selection of which statistic to
increment.

For all practical purposes it is treated as NET_XMIT_SUCCESS except in
one location, pktgen, where it causes the errors counter to increment.

Looking this over, I'd say we should just get rid of it.

^ permalink raw reply

* Re: [PATCH net-next 1/1] netxen_nic: restrict force firmware dump when dump is disabled.
From: David Miller @ 2012-06-29  7:54 UTC (permalink / raw)
  To: rajesh.borundia; +Cc: netdev, ameen.rahman, manish.chopra
In-Reply-To: <1340950341-27252-2-git-send-email-rajesh.borundia@qlogic.com>

From: Rajesh Borundia <rajesh.borundia@qlogic.com>
Date: Fri, 29 Jun 2012 02:12:21 -0400

> From: Manish chopra <manish.chopra@qlogic.com>
> 
> o Set the ethtool_dump flag (=ETH_FW_DUMP_DISABLE) when dump is disabled.
> o update driver version to 4.0.80
> 
> Signed-off-by: Manish chopra <manish.chopra@qlogic.com>
> Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next] net: l2tp_eth: provide tx_dropped counter
From: David Miller @ 2012-06-29  7:54 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1340950513.29822.103.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 29 Jun 2012 08:15:13 +0200

> From: Eric Dumazet <edumazet@google.com>
> 
> Change l2tp_xmit_skb() to return NET_XMIT_DROP in case skb is dropped.
> 
> Use kfree_skb() instead dev_kfree_skb() for drop_monitor pleasure.
> 
> Support tx_dropped counter for l2tp_eth
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next] fq_codel: report congestion notification at enqueue time
From: David Miller @ 2012-06-29  7:53 UTC (permalink / raw)
  To: eric.dumazet; +Cc: nanditad, netdev, codel, ycheng, ncardwell, mattmathis
In-Reply-To: <1340949008.29822.73.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 29 Jun 2012 07:50:08 +0200

> Hmm, problem is the sender thinks the packet was queued for
> transmission.
> 
>         ret = macvlan_queue_xmit(skb, dev);
>         if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
>                 struct macvlan_pcpu_stats *pcpu_stats;
> 
>                 pcpu_stats = this_cpu_ptr(vlan->pcpu_stats);
>                 u64_stats_update_begin(&pcpu_stats->syncp);
>                 pcpu_stats->tx_packets++;
>                 pcpu_stats->tx_bytes += len;
>                 u64_stats_update_end(&pcpu_stats->syncp);
>         } else {
>                 this_cpu_inc(vlan->pcpu_stats->tx_dropped);
>         }
> 
> NET_XMIT_CN has a lazy semantic it seems.
> 
> I will just dont rely on it.

I think we cannot just ignore this issue.  I will take a deeper look,
because we should have NET_XMIT_CN be very well defined and adjust any
mis-use.

^ permalink raw reply

* Re: [PATCH] ipv6_tunnel: Allow receiving packets on the fallback tunnel if they pass sanity checks
From: David Miller @ 2012-06-29  7:52 UTC (permalink / raw)
  To: phil; +Cc: netdev, phild, ville.nuorvala
In-Reply-To: <20120629041552.GA27362@ipom.com>

From: Phil Dibowitz <phil@ipom.com>
Date: Thu, 28 Jun 2012 21:15:52 -0700

> From: Ville Nuorvala <ville.nuorvala@gmail.com>
> 
> At Facebook, we do Layer-3 DSR via IP-in-IP tunneling. Our load balancers wrap
> an extra IP header on incoming packets so they can be routed to the backend.
> In the v4 tunnel driver, when these packets fall on the default tunl0 device,
> the behavior is to decapsulate them and drop them back on the stack. So our
> setup is that tunl0 has the VIP and eth0 has (obviously) the backend's real
> address.
> 
> In IPv6 we do the same thing, but the v6 tunnel driver didn't have this same
> behavior - if you didn't have an explicit tunnel setup, it would drop the
> packet.
> 
> This patch brings that v4 feature to the v6 driver.
> 
> The same IPv6 address checks are performed as with any normal tunnel,
> but as the fallback tunnel endpoint addresses are unspecified, the checks
> must be performed on a per-packet basis, rather than at tunnel
> configuration time.
> 
> [Patch description modified by phil@ipom.com]
> 
> Signed-off-by: Ville Nuorvala <ville.nuorvala@gmail.com>
> Tested-by: Phil Dibowitz <phil@ipom.com>

Applied to net-next

^ permalink raw reply

* Re: [Xen-devel] [PATCH 1/1] xen/netback: only non-freed SKB is queued into tx_queue
From: David Miller @ 2012-06-29  7:50 UTC (permalink / raw)
  To: Ian.Campbell; +Cc: annie.li, xen-devel, netdev, konrad.wilk, kurt.hackel
In-Reply-To: <1340954589.5953.12.camel@dagon.hellion.org.uk>

From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Fri, 29 Jun 2012 08:23:09 +0100

> On Fri, 2012-06-29 at 00:55 +0100, David Miller wrote:
>> From: annie.li@oracle.com
>> Date: Wed, 27 Jun 2012 18:46:58 +0800
>> 
>> > From: Annie Li <Annie.li@oracle.com>
>> > 
>> > After SKB is queued into tx_queue, it will be freed if request_gop is NULL.
>> > However, no dequeue action is called in this situation, it is likely that
>> > tx_queue constains freed SKB. This patch should fix this issue, and it is
>> > based on 3.5.0-rc4+.
>> > 
>> > This issue is found through code inspection, no bug is seen with it currently.
>> > I run netperf test for several hours, and no network regression was found.
>> > 
>> > Signed-off-by: Annie Li <annie.li@oracle.com>
>> 
>> I lack the expertiece necessary to properly review this, so I really
>> need a Xen expert to look this over.
> 
> Sorry, I put it to one side waiting for the repost to netdev and then
> forgot about it...
> 
> Yes, this change looks good to me:
> 
> Acked-by: Ian Campbell <ian.campbell@citrix.com>

Thanks, applied to net-next.

^ permalink raw reply

* Re: [PATCH net-next] caif-hsi: Fix merge issues.
From: David Miller @ 2012-06-29  7:48 UTC (permalink / raw)
  To: sjur.brandeland; +Cc: netdev, sjurbren
In-Reply-To: <1340951780-27406-1-git-send-email-sjur.brandeland@stericsson.com>

From: sjur.brandeland@stericsson.com
Date: Fri, 29 Jun 2012 08:36:20 +0200

> From: Sjur Brændeland <sjur.brandeland@stericsson.com>
> 
> Fix the failing merge in net-next by reverting the last
> net-next merge for caif_hsi.c and then merge in the commit:
> "caif-hsi: Bugfix - Piggyback'ed embedded CAIF frame lost"
> from the net repository. 
> 
> The commit:"caif-hsi: Add missing return in error path" from
> net repository was dropped, as it changed code previously removed in the 
> net-next repository.
> 
> Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>

Applied, thanks a lot.

^ permalink raw reply

* Re: [Xen-devel] [PATCH 1/1] xen/netback: only non-freed SKB is queued into tx_queue
From: Ian Campbell @ 2012-06-29  7:23 UTC (permalink / raw)
  To: David Miller
  Cc: annie.li@oracle.com, xen-devel@lists.xensource.com,
	netdev@vger.kernel.org, konrad.wilk@oracle.com,
	kurt.hackel@oracle.com
In-Reply-To: <20120628.165550.1816352825092253548.davem@davemloft.net>

On Fri, 2012-06-29 at 00:55 +0100, David Miller wrote:
> From: annie.li@oracle.com
> Date: Wed, 27 Jun 2012 18:46:58 +0800
> 
> > From: Annie Li <Annie.li@oracle.com>
> > 
> > After SKB is queued into tx_queue, it will be freed if request_gop is NULL.
> > However, no dequeue action is called in this situation, it is likely that
> > tx_queue constains freed SKB. This patch should fix this issue, and it is
> > based on 3.5.0-rc4+.
> > 
> > This issue is found through code inspection, no bug is seen with it currently.
> > I run netperf test for several hours, and no network regression was found.
> > 
> > Signed-off-by: Annie Li <annie.li@oracle.com>
> 
> I lack the expertiece necessary to properly review this, so I really
> need a Xen expert to look this over.

Sorry, I put it to one side waiting for the repost to netdev and then
forgot about it...

Yes, this change looks good to me:

Acked-by: Ian Campbell <ian.campbell@citrix.com>

^ permalink raw reply

* RE: linux-next: manual merge of the net-next tree with the net tree
From: Sjur BRENDELAND @ 2012-06-29  6:46 UTC (permalink / raw)
  To: Stephen Rothwell, David Miller, netdev@vger.kernel.org
  Cc: linux-next@vger.kernel.org, linux-kernel@vger.kernel.org,
	Per ELLEFSEN, Kim LILLIESTIERNA
In-Reply-To: <20120626131543.21e4338894b19f5e02f8bdde@canb.auug.org.au>

Hi Stephen,

> Today's linux-next merge of the net-next tree got a conflict in
> drivers/net/caif/caif_hsi.c between commits 3935600a7f34 ("caif-hsi:
> Bugfix - Piggyback'ed embedded CAIF frame lost") and 1fdc7630b2cb
> ("caif-hsi: Add missing return in error path") from the net tree and
> commits 4e7bb59d49fb ("caif-hsi: Removed dead code") and c41254006377
> ("caif-hsi: Add rtnl support") from the net-next tree.
> 
> I fixed them up (see below) and can carry the fix as necessary.

Sorry for late response. Your merge looks perfect.

Thanks,
Sjur

^ permalink raw reply

* Re: [patch net-next 1/4] net: introduce new priv_flag indicating iface capable of change mac when running
From: Jiri Pirko @ 2012-06-29  6:41 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: mst, netdev, shimoda.hiroaki, virtualization, danny.kukawka,
	edumazet, davem
In-Reply-To: <1340921854.2577.16.camel@bwh-desktop.uk.solarflarecom.com>

Fri, Jun 29, 2012 at 12:17:34AM CEST, bhutchings@solarflare.com wrote:
>On Thu, 2012-06-28 at 16:10 +0200, Jiri Pirko wrote:
>> Introduce IFF_LIFE_ADDR_CHANGE priv_flag and use it to disable
>> netif_running() check in eth_mac_addr()
>>
>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>> ---
>>  include/linux/if.h |    2 ++
>>  net/ethernet/eth.c |    2 +-
>>  2 files changed, 3 insertions(+), 1 deletion(-)
>> 
>> diff --git a/include/linux/if.h b/include/linux/if.h
>> index f995c66..fd9ee7c 100644
>> --- a/include/linux/if.h
>> +++ b/include/linux/if.h
>> @@ -81,6 +81,8 @@
>>  #define IFF_UNICAST_FLT	0x20000		/* Supports unicast filtering	*/
>>  #define IFF_TEAM_PORT	0x40000		/* device used as team port */
>>  #define IFF_SUPP_NOFCS	0x80000		/* device supports sending custom FCS */
>> +#define IFF_LIFE_ADDR_CHANGE 0x100000	/* device supports hardware address
>> +					 * change when it's running */
>[...]
>
>Any device that has IFF_UNICAST_FLT can update the unicast MAC filter
>while it's running; doesn't that go hand-in-hand with being able to
>handle changes to the primary MAC address?  Is the new flag really
>necessary at all?

Hmm, this makes sense. But, can you guarantee that all devices behave like this?

Also, there are many devices that does not support unicast filtering
and yet they support updating mac adress while running.

Jirka

>
>Ben.
>
>-- 
>Ben Hutchings, Staff Engineer, Solarflare
>Not speaking for my employer; that's the marketing department's job.
>They asked us to note that Solarflare product names are trademarked.
>

^ permalink raw reply

* [PATCH net-next] caif-hsi: Fix merge issues.
From: sjur.brandeland @ 2012-06-29  6:36 UTC (permalink / raw)
  To: davem; +Cc: netdev, sjurbren, Sjur Brændeland

From: Sjur Brændeland <sjur.brandeland@stericsson.com>

Fix the failing merge in net-next by reverting the last
net-next merge for caif_hsi.c and then merge in the commit:
"caif-hsi: Bugfix - Piggyback'ed embedded CAIF frame lost"
from the net repository. 

The commit:"caif-hsi: Add missing return in error path" from
net repository was dropped, as it changed code previously removed in the 
net-next repository.

Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>

---
Hi Dave,

>Sjur please send me any necessary fixups and please be more mindful in
>the future of the incredible merge pain you put me through when you
>have such fundamentally overlapping changes like that and don't
>provide me with a sample merge resolution like other people do.

Yes this merge went really bad, sorry for the pain I have caused you!
I'll try to provide you with merge instruction or sample merge
resolutions in the future if there are conflicts.

Regards,
Sjur


---
 drivers/net/caif/caif_hsi.c |   72 ++++++++++++++++---------------------------
 1 files changed, 27 insertions(+), 45 deletions(-)

diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
index 087eb83..0def8b3 100644
--- a/drivers/net/caif/caif_hsi.c
+++ b/drivers/net/caif/caif_hsi.c
@@ -1131,51 +1131,7 @@ static void cfhsi_setup(struct net_device *dev)
 	cfhsi->cfdev.use_stx = false;
 	cfhsi->cfdev.use_fcs = false;
 	cfhsi->ndev = dev;
-}
-
-int cfhsi_probe(struct platform_device *pdev)
-{
-	struct cfhsi_ops *(*get_ops)(void);
-	struct cfhsi *cfhsi = NULL;
-	struct net_device *ndev;
-	int res;
-
-	ndev = alloc_netdev(sizeof(struct cfhsi), "cfhsi%d", cfhsi_setup);
-	if (!ndev)
-		return -ENODEV;
-
-	cfhsi = netdev_priv(ndev);
-	cfhsi->ndev = ndev;
-	cfhsi->pdev = pdev;
-
-	get_ops = symbol_get(cfhsi_get_ops);
-	if (!get_ops) {
-		pr_err("%s: failed to get the cfhsi_ops\n", __func__);
-		return -ENODEV;
-	}
-
-	/* Assign the HSI device. */
-	cfhsi->ops = (*get_ops)();
-	if (!cfhsi->ops) {
-		pr_err("%s: failed to get the cfhsi_ops\n", __func__);
-		goto err;
-	}
-
-	/* Assign the driver to this HSI device. */
-	cfhsi->ops->cb_ops = &cfhsi->cb_ops;
-	res = register_netdevice(ndev);
-	if (res) {
-		dev_err(&ndev->dev, "%s: Registration error: %d.\n",
-			__func__, res);
-		free_netdev(ndev);
-	}
-	/* Add CAIF HSI device to list. */
-	list_add_tail(&cfhsi->list, &cfhsi_list);
-
-	return res;
-err:
-	symbol_put(cfhsi_get_ops);
-	return -ENODEV;
+	cfhsi->cfg = hsi_default_config;
 }
 
 static int cfhsi_open(struct net_device *ndev)
@@ -1454,6 +1410,7 @@ static int caif_hsi_newlink(struct net *src_net, struct net_device *dev,
 			  struct nlattr *tb[], struct nlattr *data[])
 {
 	struct cfhsi *cfhsi = NULL;
+	struct cfhsi_ops *(*get_ops)(void);
 
 	ASSERT_RTNL();
 
@@ -1461,7 +1418,32 @@ static int caif_hsi_newlink(struct net *src_net, struct net_device *dev,
 	cfhsi_netlink_parms(data, cfhsi);
 	dev_net_set(cfhsi->ndev, src_net);
 
+	get_ops = symbol_get(cfhsi_get_ops);
+	if (!get_ops) {
+		pr_err("%s: failed to get the cfhsi_ops\n", __func__);
+		return -ENODEV;
+	}
+
+	/* Assign the HSI device. */
+	cfhsi->ops = (*get_ops)();
+	if (!cfhsi->ops) {
+		pr_err("%s: failed to get the cfhsi_ops\n", __func__);
+		goto err;
+	}
+
+	/* Assign the driver to this HSI device. */
+	cfhsi->ops->cb_ops = &cfhsi->cb_ops;
+	if (register_netdevice(dev)) {
+		pr_warn("%s: caif_hsi device registration failed\n", __func__);
+		goto err;
+	}
+	/* Add CAIF HSI device to list. */
+	list_add_tail(&cfhsi->list, &cfhsi_list);
+
 	return 0;
+err:
+	symbol_put(cfhsi_get_ops);
+	return -ENODEV;
 }
 
 static struct rtnl_link_ops caif_hsi_link_ops __read_mostly = {
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH net-next 0/1] netxen: bug fix
From: Rajesh Borundia @ 2012-06-29  6:12 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman

Please apply it to net-next.

Thanks,
Rajesh

^ permalink raw reply

* [PATCH net-next 1/1] netxen_nic: restrict force firmware dump when dump is disabled.
From: Rajesh Borundia @ 2012-06-29  6:12 UTC (permalink / raw)
  To: davem; +Cc: netdev, ameen.rahman, Manish chopra
In-Reply-To: <1340950341-27252-1-git-send-email-rajesh.borundia@qlogic.com>

From: Manish chopra <manish.chopra@qlogic.com>

o Set the ethtool_dump flag (=ETH_FW_DUMP_DISABLE) when dump is disabled.
o update driver version to 4.0.80

Signed-off-by: Manish chopra <manish.chopra@qlogic.com>
Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
---
 drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    4 ++--
 .../ethernet/qlogic/netxen/netxen_nic_ethtool.c    |   13 ++++++++++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index 37ccbe5..eb3dfdb 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -53,8 +53,8 @@
 
 #define _NETXEN_NIC_LINUX_MAJOR 4
 #define _NETXEN_NIC_LINUX_MINOR 0
-#define _NETXEN_NIC_LINUX_SUBVERSION 79
-#define NETXEN_NIC_LINUX_VERSIONID  "4.0.79"
+#define _NETXEN_NIC_LINUX_SUBVERSION 80
+#define NETXEN_NIC_LINUX_VERSIONID  "4.0.80"
 
 #define NETXEN_VERSION_CODE(a, b, c)	(((a) << 24) + ((b) << 16) + (c))
 #define _major(v)	(((v) >> 24) & 0xff)
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
index 9103e3e..10468e7 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ethtool.c
@@ -826,7 +826,12 @@ netxen_get_dump_flag(struct net_device *netdev, struct ethtool_dump *dump)
 		dump->len = mdump->md_dump_size;
 	else
 		dump->len = 0;
-	dump->flag = mdump->md_capture_mask;
+
+	if (!mdump->md_enabled)
+		dump->flag = ETH_FW_DUMP_DISABLE;
+	else
+		dump->flag = mdump->md_capture_mask;
+
 	dump->version = adapter->fw_version;
 	return 0;
 }
@@ -840,8 +845,10 @@ netxen_set_dump(struct net_device *netdev, struct ethtool_dump *val)
 
 	switch (val->flag) {
 	case NX_FORCE_FW_DUMP_KEY:
-		if (!mdump->md_enabled)
-			mdump->md_enabled = 1;
+		if (!mdump->md_enabled) {
+			netdev_info(netdev, "FW dump not enabled\n");
+			return 0;
+		}
 		if (adapter->fw_mdump_rdy) {
 			netdev_info(netdev, "Previous dump not cleared, not forcing dump\n");
 			return 0;
-- 
1.7.3.3

^ permalink raw reply related

* [PATCH net-next] net: l2tp_eth: provide tx_dropped counter
From: Eric Dumazet @ 2012-06-29  6:15 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

From: Eric Dumazet <edumazet@google.com>

Change l2tp_xmit_skb() to return NET_XMIT_DROP in case skb is dropped.

Use kfree_skb() instead dev_kfree_skb() for drop_monitor pleasure.

Support tx_dropped counter for l2tp_eth

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: James Chapman <jchapman@katalix.com>
---
 net/l2tp/l2tp_core.c |   11 ++++++-----
 net/l2tp/l2tp_eth.c  |   15 ++++++++++-----
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 32b2155..393355d 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1128,6 +1128,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	int headroom;
 	int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
 	int udp_len;
+	int ret = NET_XMIT_SUCCESS;
 
 	/* Check that there's enough headroom in the skb to insert IP,
 	 * UDP and L2TP headers. If not enough, expand it to
@@ -1137,8 +1138,8 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 		uhlen + hdr_len;
 	old_headroom = skb_headroom(skb);
 	if (skb_cow_head(skb, headroom)) {
-		dev_kfree_skb(skb);
-		goto abort;
+		kfree_skb(skb);
+		return NET_XMIT_DROP;
 	}
 
 	new_headroom = skb_headroom(skb);
@@ -1156,7 +1157,8 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
-		dev_kfree_skb(skb);
+		kfree_skb(skb);
+		ret = NET_XMIT_DROP;
 		goto out_unlock;
 	}
 
@@ -1215,8 +1217,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 out_unlock:
 	bh_unlock_sock(sk);
 
-abort:
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
 
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 47b259f..f9ee74d 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -44,6 +44,7 @@ struct l2tp_eth {
 	struct list_head	list;
 	atomic_long_t		tx_bytes;
 	atomic_long_t		tx_packets;
+	atomic_long_t		tx_dropped;
 	atomic_long_t		rx_bytes;
 	atomic_long_t		rx_packets;
 	atomic_long_t		rx_errors;
@@ -92,12 +93,15 @@ static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct l2tp_eth *priv = netdev_priv(dev);
 	struct l2tp_session *session = priv->session;
+	unsigned int len = skb->len;
+	int ret = l2tp_xmit_skb(session, skb, session->hdr_len);
 
-	atomic_long_add(skb->len, &priv->tx_bytes);
-	atomic_long_inc(&priv->tx_packets);
-
-	l2tp_xmit_skb(session, skb, session->hdr_len);
-
+	if (likely(ret == NET_XMIT_SUCCESS)) {
+		atomic_long_add(len, &priv->tx_bytes);
+		atomic_long_inc(&priv->tx_packets);
+	} else {
+		atomic_long_inc(&priv->tx_dropped);
+	}
 	return NETDEV_TX_OK;
 }
 
@@ -108,6 +112,7 @@ static struct rtnl_link_stats64 *l2tp_eth_get_stats64(struct net_device *dev,
 
 	stats->tx_bytes   = atomic_long_read(&priv->tx_bytes);
 	stats->tx_packets = atomic_long_read(&priv->tx_packets);
+	stats->tx_dropped = atomic_long_read(&priv->tx_dropped);
 	stats->rx_bytes   = atomic_long_read(&priv->rx_bytes);
 	stats->rx_packets = atomic_long_read(&priv->rx_packets);
 	stats->rx_errors  = atomic_long_read(&priv->rx_errors);

^ permalink raw reply related

* Re: [PATCH net-next] fq_codel: report congestion notification at enqueue time
From: Eric Dumazet @ 2012-06-29  5:50 UTC (permalink / raw)
  To: David Miller; +Cc: nanditad, netdev, codel, ycheng, ncardwell, mattmathis
In-Reply-To: <20120628.222934.767995619021650710.davem@davemloft.net>

On Thu, 2012-06-28 at 22:29 -0700, David Miller wrote:

> I am pretty sure the behavior in RED is intentional.
> 
> It's a soft push back on TCP.
> 

tcp_enter_cwr() is called the same for DROP and CN

> We're taking this path when we are unable to sucessfully ECN mark a
> packet.  But our intention was to do so.
> 

Hmm, problem is the sender thinks the packet was queued for
transmission.

        ret = macvlan_queue_xmit(skb, dev);
        if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
                struct macvlan_pcpu_stats *pcpu_stats;

                pcpu_stats = this_cpu_ptr(vlan->pcpu_stats);
                u64_stats_update_begin(&pcpu_stats->syncp);
                pcpu_stats->tx_packets++;
                pcpu_stats->tx_bytes += len;
                u64_stats_update_end(&pcpu_stats->syncp);
        } else {
                this_cpu_inc(vlan->pcpu_stats->tx_dropped);
        }

NET_XMIT_CN has a lazy semantic it seems.

I will just dont rely on it.

^ permalink raw reply

* Re: [PATCH net-next] fq_codel: report congestion notification at enqueue time
From: David Miller @ 2012-06-29  5:29 UTC (permalink / raw)
  To: eric.dumazet; +Cc: nanditad, netdev, codel, ycheng, ncardwell, mattmathis
In-Reply-To: <1340947448.29822.41.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 29 Jun 2012 07:24:08 +0200

> By the way, I am not sure NET_XMIT_CN is correctly used in RED.
> 
> Or maybe my understanding of NET_XMIT_CN is wrong.
> 
> If a the packet is dropped in enqueue(), why use NET_XMIT_CN instead of 
> NET_XMIT_DROP ?
> 
> This seems to mean : I dropped _this_ packet, but dont worry too much, I
> might accept other packets, so please go on...

I am pretty sure the behavior in RED is intentional.

It's a soft push back on TCP.

We're taking this path when we are unable to sucessfully ECN mark a
packet.  But our intention was to do so.

^ permalink raw reply

* Re: [PATCH net-next] fq_codel: report congestion notification at enqueue time
From: Dave Taht @ 2012-06-29  5:24 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Nandita Dukkipati, netdev, Yuchung Cheng, codel, Matt Mathis,
	Neal Cardwell, David Miller
In-Reply-To: <1340945457.29822.7.camel@edumazet-glaptop>

On Fri, Jun 29, 2012 at 12:50 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:

> A router will have no use of this feature, not sure you need to spend
> time trying this ;)

It's not yer ordinary router...

A cerowrt router has iperf, netperf/netserver from svn with congestion
control switching and classification setting, rsync (with same),
samba, transmission, a polipo proxy, scamper, and a legion of other
network analysis tools on-board and available as optional packages.

and it's used in the bufferbloat project as a thoroughly understood
platform for originating, receiving, AND routing packets on a real
embedded home gateway platform that end users actually use, through a
decent set of drivers, on ethernet and wifi.

I am always concerned when changes to the stack like
GSO/GRO/BQL/fq_codel go into linux - or things like the infinite
window in ECN bug from a few months back happen - as they hold promise
to mutate (or explain) the statistics and analysis we've accumulated
over the last year and a half.

And as I'm hoping to do a major test run shortly to get some fresh
statistics vs a vs fq_codel vs the old sfqred tests ( I'm looking
forward to redoing this one in particular:
http://www.teklibre.com/~d/bloat/hoqvssfqred.ps - )

... and you are about to change what those stats are going to look
like, under load, with this change... I kind of need to
understand/track it/parse it/capture it. I've got sufficient hardware
now to easily A/B things.

(sorry for the noise on the lists)


-- 
Dave Täht

^ permalink raw reply

* [PATCH] ipv4: Remove extraneous assignment of dst->tclassid.
From: David Miller @ 2012-06-29  5:24 UTC (permalink / raw)
  To: netdev


We already set it several lines above.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c |    3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 919d69e..6a5afc7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2327,9 +2327,6 @@ local_input:
 	rth->rt_key_tos	= tos;
 	rth->rt_dst	= daddr;
 	rth->rt_src	= saddr;
-#ifdef CONFIG_IP_ROUTE_CLASSID
-	rth->dst.tclassid = itag;
-#endif
 	rth->rt_route_iif = dev->ifindex;
 	rth->rt_iif	= dev->ifindex;
 	rth->rt_oif	= 0;
-- 
1.7.10

^ permalink raw reply related

* Re: [PATCH net-next] fq_codel: report congestion notification at enqueue time
From: Eric Dumazet @ 2012-06-29  5:24 UTC (permalink / raw)
  To: David Miller; +Cc: nanditad, netdev, codel, ycheng, ncardwell, mattmathis
In-Reply-To: <20120628.221252.2220466000873887315.davem@davemloft.net>

On Thu, 2012-06-28 at 22:12 -0700, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Fri, 29 Jun 2012 06:53:12 +0200
> 
> > Please dont apply this patch, I'll submit an updated version later.
> 
> Ok.

By the way, I am not sure NET_XMIT_CN is correctly used in RED.

Or maybe my understanding of NET_XMIT_CN is wrong.

If a the packet is dropped in enqueue(), why use NET_XMIT_CN instead of 
NET_XMIT_DROP ?

This seems to mean : I dropped _this_ packet, but dont worry too much, I
might accept other packets, so please go on...

diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 633e32d..0fc5b6c 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -77,7 +77,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		sch->qstats.overlimits++;
 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
 			q->stats.prob_drop++;
-			goto congestion_drop;
+			goto drop;
 		}
 
 		q->stats.prob_mark++;
@@ -88,7 +88,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
 		    !INET_ECN_set_ce(skb)) {
 			q->stats.forced_drop++;
-			goto congestion_drop;
+			goto drop;
 		}
 
 		q->stats.forced_mark++;
@@ -104,9 +104,8 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	}
 	return ret;
 
-congestion_drop:
-	qdisc_drop(skb, sch);
-	return NET_XMIT_CN;
+drop:
+	return qdisc_drop(skb, sch);
 }
 
 static struct sk_buff *red_dequeue(struct Qdisc *sch)

^ permalink raw reply related

* Re: [PATCH net-next] fq_codel: report congestion notification at enqueue time
From: David Miller @ 2012-06-29  5:12 UTC (permalink / raw)
  To: eric.dumazet; +Cc: nanditad, netdev, codel, ycheng, ncardwell, mattmathis
In-Reply-To: <1340945592.29822.8.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 29 Jun 2012 06:53:12 +0200

> Please dont apply this patch, I'll submit an updated version later.

Ok.

^ permalink raw reply

* Re: [PATCH net-next] fq_codel: report congestion notification at enqueue time
From: Eric Dumazet @ 2012-06-29  4:53 UTC (permalink / raw)
  To: David Miller
  Cc: Nandita Dukkipati, netdev, codel, Yuchung Cheng, Neal Cardwell,
	Matt Mathis
In-Reply-To: <1340903237.13187.151.camel@edumazet-glaptop>

On Thu, 2012-06-28 at 19:07 +0200, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> At enqueue time, check sojourn time of packet at head of the queue,
> and return NET_XMIT_CN instead of NET_XMIT_SUCCESS if this sejourn
> time is above codel @target.
> 
> This permits local TCP stack to call tcp_enter_cwr() and reduce its cwnd
> without drops (for example if ECN is not enabled for the flow)
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Dave Taht <dave.taht@bufferbloat.net>
> Cc: Tom Herbert <therbert@google.com>
> Cc: Matt Mathis <mattmathis@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
> Cc: Nandita Dukkipati <nanditad@google.com>
> Cc: Neal Cardwell <ncardwell@google.com>
> ---

Please dont apply this patch, I'll submit an updated version later.

Thanks

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox