Netdev List
 help / color / mirror / Atom feed
* [PATCH 1/1 net-next] ipv4: ip_frag_queue function clean-up
From: Fabian Frederick @ 2014-11-04 19:41 UTC (permalink / raw)
  To: linux-kernel
  Cc: Fabian Frederick, David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev

-remove unnecessary else after break
-declare free_it sk_buff * only once (like prev and next)

Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
 net/ipv4/ip_fragment.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2811cc1..153c402 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -317,7 +317,7 @@ static int ip_frag_reinit(struct ipq *qp)
 /* Add new segment to existing queue. */
 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
-	struct sk_buff *prev, *next;
+	struct sk_buff *prev, *next, *free_it;
 	struct net_device *dev;
 	int flags, offset;
 	int ihl, end;
@@ -432,23 +432,23 @@ found:
 			if (next->ip_summed != CHECKSUM_UNNECESSARY)
 				next->ip_summed = CHECKSUM_NONE;
 			break;
-		} else {
-			struct sk_buff *free_it = next;
+		}
 
-			/* Old fragment is completely overridden with
-			 * new one drop it.
-			 */
-			next = next->next;
+		free_it = next;
 
-			if (prev)
-				prev->next = next;
-			else
-				qp->q.fragments = next;
+		/* Old fragment is completely overridden with
+		 * new one drop it.
+		 */
+		next = next->next;
 
-			qp->q.meat -= free_it->len;
-			sub_frag_mem_limit(&qp->q, free_it->truesize);
-			kfree_skb(free_it);
-		}
+		if (prev)
+			prev->next = next;
+		else
+			qp->q.fragments = next;
+
+		qp->q.meat -= free_it->len;
+		sub_frag_mem_limit(&qp->q, free_it->truesize);
+		kfree_skb(free_it);
 	}
 
 	FRAG_CB(skb)->offset = offset;
-- 
1.9.3

^ permalink raw reply related

* [PATCH 1/1 net-next] ipv4: remove 0/NULL assignment on static
From: Fabian Frederick @ 2014-11-04 19:37 UTC (permalink / raw)
  To: linux-kernel
  Cc: Fabian Frederick, David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev

static values are automatically initialized to 0

Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
 net/ipv4/ipconfig.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index a896da5..7fa18bc 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -115,7 +115,7 @@
  */
 int ic_set_manually __initdata = 0;		/* IPconfig parameters set manually */
 
-static int ic_enable __initdata = 0;		/* IP config enabled? */
+static int ic_enable __initdata;		/* IP config enabled? */
 
 /* Protocol choice */
 int ic_proto_enabled __initdata = 0
@@ -130,7 +130,7 @@ int ic_proto_enabled __initdata = 0
 #endif
 			;
 
-static int ic_host_name_set __initdata = 0;	/* Host name set by us? */
+static int ic_host_name_set __initdata;	/* Host name set by us? */
 
 __be32 ic_myaddr = NONE;		/* My IP address */
 static __be32 ic_netmask = NONE;	/* Netmask for local subnet */
@@ -160,17 +160,17 @@ static u8 ic_domain[64];		/* DNS (not NIS) domain name */
 static char user_dev_name[IFNAMSIZ] __initdata = { 0, };
 
 /* Protocols supported by available interfaces */
-static int ic_proto_have_if __initdata = 0;
+static int ic_proto_have_if __initdata;
 
 /* MTU for boot device */
-static int ic_dev_mtu __initdata = 0;
+static int ic_dev_mtu __initdata;
 
 #ifdef IPCONFIG_DYNAMIC
 static DEFINE_SPINLOCK(ic_recv_lock);
-static volatile int ic_got_reply __initdata = 0;    /* Proto(s) that replied */
+static volatile int ic_got_reply __initdata;    /* Proto(s) that replied */
 #endif
 #ifdef IPCONFIG_DHCP
-static int ic_dhcp_msgtype __initdata = 0;	/* DHCP msg type received */
+static int ic_dhcp_msgtype __initdata;	/* DHCP msg type received */
 #endif
 
 
@@ -186,8 +186,8 @@ struct ic_device {
 	__be32 xid;
 };
 
-static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
-static struct net_device *ic_dev __initdata = NULL;	/* Selected device */
+static struct ic_device *ic_first_dev __initdata;	/* List of open device */
+static struct net_device *ic_dev __initdata;		/* Selected device */
 
 static bool __init ic_is_init_dev(struct net_device *dev)
 {
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH 1/1 net-next] esp4: remove assignment in if condition
From: Joe Perches @ 2014-11-04 19:33 UTC (permalink / raw)
  To: Fabian Frederick
  Cc: linux-kernel, Steffen Klassert, Herbert Xu, David S. Miller,
	Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI,
	Patrick McHardy, netdev
In-Reply-To: <1415129311-12480-1-git-send-email-fabf@skynet.be>

On Tue, 2014-11-04 at 20:28 +0100, Fabian Frederick wrote:

trivia:

> diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
[]
> @@ -392,8 +392,11 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
>  	if (elen <= 0)
>  		goto out;
>  
> -	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
> +	err = skb_cow_data(skb, 0, &trailer);
> +
> +	if (err < 0)
>  		goto out;

Generally it's better/more common to use

	foo = bar();
	if (!foo)
		[error_handler...]
	
without the blank line between the set
and the test.

^ permalink raw reply

* [PATCH 1/1 net-next] ipv4: use seq_puts instead of seq_printf where possible
From: Fabian Frederick @ 2014-11-04 19:31 UTC (permalink / raw)
  To: linux-kernel
  Cc: Fabian Frederick, David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev

Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
 net/ipv4/proc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 8e3eb39..f0d4eb8 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -296,12 +296,12 @@ static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals,
 	int j;
 
 	if (count) {
-		seq_printf(seq, "\nIcmpMsg:");
+		seq_puts(seq, "\nIcmpMsg:");
 		for (j = 0; j < count; ++j)
 			seq_printf(seq, " %sType%u",
 				type[j] & 0x100 ? "Out" : "In",
 				type[j] & 0xff);
-		seq_printf(seq, "\nIcmpMsg:");
+		seq_puts(seq, "\nIcmpMsg:");
 		for (j = 0; j < count; ++j)
 			seq_printf(seq, " %lu", vals[j]);
 	}
@@ -342,7 +342,7 @@ static void icmp_put(struct seq_file *seq)
 	seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
 	for (i = 0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " In%s", icmpmibmap[i].name);
-	seq_printf(seq, " OutMsgs OutErrors");
+	seq_puts(seq, " OutMsgs OutErrors");
 	for (i = 0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " Out%s", icmpmibmap[i].name);
 	seq_printf(seq, "\nIcmp: %lu %lu %lu",
-- 
1.9.3

^ permalink raw reply related

* [PATCH 1/1 net-next] esp4: remove assignment in if condition
From: Fabian Frederick @ 2014-11-04 19:28 UTC (permalink / raw)
  To: linux-kernel
  Cc: Fabian Frederick, Steffen Klassert, Herbert Xu, David S. Miller,
	Alexey Kuznetsov, James Morris, Hideaki YOSHIFUJI,
	Patrick McHardy, netdev

Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
 net/ipv4/esp4.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 360b565..9dd66ee 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -392,8 +392,11 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (elen <= 0)
 		goto out;
 
-	if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+	err = skb_cow_data(skb, 0, &trailer);
+
+	if (err < 0)
 		goto out;
+
 	nfrags = err;
 
 	assoclen = sizeof(*esph);
-- 
1.9.3

^ permalink raw reply related

* [PATCH 1/1 net-next] tcp: spelling s/plugable/pluggable
From: Fabian Frederick @ 2014-11-04 19:25 UTC (permalink / raw)
  To: linux-kernel
  Cc: Fabian Frederick, David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev

Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
 net/ipv4/tcp_cong.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index b1c5970..27ead0d 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -1,5 +1,5 @@
 /*
- * Plugable TCP congestion control support and newReno
+ * Pluggable TCP congestion control support and newReno
  * congestion control.
  * Based on ideas from I/O scheduler support and Web100.
  *
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH 00/20] kselftest install target feature
From: Kees Cook @ 2014-11-04 19:22 UTC (permalink / raw)
  To: Shuah Khan
  Cc: Greg KH, Andrew Morton, Michal Marek, David S. Miller,
	tranmanphong-Re5JQEeQqe8AvxtiuMwx3w, David Herrmann, Hugh Dickins,
	bobby.prani-Re5JQEeQqe8AvxtiuMwx3w, Eric W. Biederman,
	Serge E. Hallyn, linux-kbuild, LKML, Linux API,
	Network Development
In-Reply-To: <cover.1415117102.git.shuahkh-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org>

On Tue, Nov 4, 2014 at 9:10 AM, Shuah Khan <shuahkh-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org> wrote:
> This patch series adds a new kselftest_install make target
> to enable selftest install. When make kselftest_install is
> run, selftests are installed on the system. A new install
> target is added to selftests Makefile which will install
> targets for the tests that are specified in INSTALL_TARGETS.
> During install, a script is generated to run tests that are
> installed. This script will be installed in the selftest install
> directory. Individual test Makefiles are changed to add to the
> script. This will allow new tests to add install and run test
> commands to the generated kselftest script.

I'm all for making the self tests more available, but I don't think
this is the right approach. My primary objection is that it creates a
second way to run tests, and that means any changes and additions need
to be updated in two places. I'd much rather just maintain the single
"make" targets instead. Having "make" available on the target device
doesn't seem too bad to me. Is there a reason that doesn't work for
your situation?

I would, however, like to see some better standardization of the test
"framework" that we've got in there already. (For example, some
failures fail the "make", some don't, there are various reporting
methods for success/failure depending on the test, etc.)

-Kees

> Approach:
>
> make kselftest_target:
> -- exports kselftest INSTALL_KSFT_PATH
>    default $(INSTALL_MOD_PATH)/lib/kselftest/$(KERNELRELEASE)
> -- exports path for ksefltest.sh
> -- runs selftests make install target:
>
> selftests make install target
> -- creates kselftest.sh script in install install dir
> -- runs install targets for all INSTALL_TARGETS
>
> Individual test make install targets:
> -- install test programs and/or scripts in install dir
> -- append to the ksefltest.sh file to add commands to run test
>
> Shuah Khan (20):
>   selftests/user: move test out of Makefile into a shell script
>   selftests/net: move test out of Makefile into a shell script
>   kbuild: kselftest_install - add a new make target to install selftests
>   selftests: add install target to enable installing selftests
>   selftests/breakpoints: add install target to enable installing test
>   selftests/cpu-hotplug: add install target to enable installing test
>   selftests/efivarfs: add install target to enable installing test
>   selftests/firmware: add install target to enable installing test
>   selftests/ipc: add install target to enable installing test
>   selftests/kcmp: add install target to enable installing test
>   selftests/memfd: add install target to enable installing test
>   selftests/memory-hotplug: add install target to enable installing test
>   selftests/mount: add install target to enable installing test
>   selftests/mqueue: add install target to enable installing test
>   selftests/net: add install target to enable installing test
>   selftests/ptrace: add install target to enable installing test
>   selftests/sysctl: add install target to enable installing test
>   selftests/timers: add install target to enable installing test
>   selftests/vm: add install target to enable installing test
>   selftests/user: add install target to enable installing test
>
>  Makefile                                        | 17 +++++++++++++++++
>  tools/testing/selftests/Makefile                | 14 ++++++++++++++
>  tools/testing/selftests/breakpoints/Makefile    | 12 ++++++++++++
>  tools/testing/selftests/cpu-hotplug/Makefile    |  9 +++++++++
>  tools/testing/selftests/efivarfs/Makefile       | 13 ++++++++++++-
>  tools/testing/selftests/firmware/Makefile       | 20 ++++++++++++++++++++
>  tools/testing/selftests/ipc/Makefile            | 11 +++++++++++
>  tools/testing/selftests/kcmp/Makefile           | 12 ++++++++++++
>  tools/testing/selftests/memfd/Makefile          | 10 ++++++++++
>  tools/testing/selftests/memory-hotplug/Makefile |  9 +++++++++
>  tools/testing/selftests/mount/Makefile          |  7 +++++++
>  tools/testing/selftests/mqueue/Makefile         |  8 ++++++++
>  tools/testing/selftests/net/Makefile            | 18 +++++++++++-------
>  tools/testing/selftests/net/test_bpf.sh         | 10 ++++++++++
>  tools/testing/selftests/ptrace/Makefile         | 11 +++++++++--
>  tools/testing/selftests/sysctl/Makefile         | 10 ++++++++++
>  tools/testing/selftests/timers/Makefile         |  7 +++++++
>  tools/testing/selftests/user/Makefile           | 15 ++++++++-------
>  tools/testing/selftests/user/test_user_copy.sh  | 10 ++++++++++
>  tools/testing/selftests/vm/Makefile             |  7 +++++++
>  20 files changed, 213 insertions(+), 17 deletions(-)
>  create mode 100755 tools/testing/selftests/net/test_bpf.sh
>  create mode 100755 tools/testing/selftests/user/test_user_copy.sh
>
> --
> 1.9.1
>



-- 
Kees Cook
Chrome OS Security

^ permalink raw reply

* [PATCH] bridge: include in6.h in if_bridge.h for struct in6_addr
From: Gregory Fong @ 2014-11-04 19:21 UTC (permalink / raw)
  To: netdev
  Cc: linux-api, linux-kernel, carlos, eblake, Kumar Gala, Gregory Fong,
	Florian Fainelli, Cong Wang, David Miller

if_bridge.h uses struct in6_addr ip6, but wasn't including the in6.h
header.  Thomas Backlund originally sent a patch to do this, but this
revealed a redefinition issue: https://lkml.org/lkml/2013/1/13/116

The redefinition issue should have been fixed by the following Linux
commits:
ee262ad827f89e2dc7851ec2986953b5b125c6bc inet: defines IPPROTO_* needed for module alias generation
cfd280c91253cc28e4919e349fa7a813b63e71e8 net: sync some IP headers with glibc

and the following glibc commit:
6c82a2f8d7c8e21e39237225c819f182ae438db3 Coordinate IPv6 definitions for Linux and glibc

so actually include the header now.

Reported-by: Colin Guthrie <colin@mageia.org>
Reported-by: Christiaan Welvaart <cjw@daneel.dyndns.org>
Reported-by: Thomas Backlund <tmb@mageia.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Gregory Fong <gregory.0xf0@gmail.com>
---
 include/uapi/linux/if_bridge.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 39f621a..da17e45 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -15,6 +15,7 @@
 
 #include <linux/types.h>
 #include <linux/if_ether.h>
+#include <linux/in6.h>
 
 #define SYSFS_BRIDGE_ATTR	"bridge"
 #define SYSFS_BRIDGE_FDB	"brforward"
-- 
1.9.1

^ permalink raw reply related

* [PATCH 1/1 net-next] cipso: remove NULL assignment on static
From: Fabian Frederick @ 2014-11-04 19:19 UTC (permalink / raw)
  To: linux-kernel
  Cc: Fabian Frederick, David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev

Also add blank line after structure declarations

Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
 net/ipv4/cipso_ipv4.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 4715f25..dd3746a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -72,6 +72,7 @@ struct cipso_v4_map_cache_bkt {
 	u32 size;
 	struct list_head list;
 };
+
 struct cipso_v4_map_cache_entry {
 	u32 hash;
 	unsigned char *key;
@@ -82,7 +83,8 @@ struct cipso_v4_map_cache_entry {
 	u32 activity;
 	struct list_head list;
 };
-static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL;
+
+static struct cipso_v4_map_cache_bkt *cipso_v4_cache;
 
 /* Restricted bitmap (tag #1) flags */
 int cipso_v4_rbm_optfmt = 0;
-- 
1.9.3

^ permalink raw reply related

* [PATCH v2 net] tcp: zero retrans_stamp if all retrans were acked
From: Marcelo Ricardo Leitner @ 2014-11-04 19:15 UTC (permalink / raw)
  To: netdev; +Cc: ncardwell, ycheng, edumazet
In-Reply-To: <CADVnQykbFjodAyJomX8E5HZhOrP7kDxDS7vPNiam858B84vqCQ@mail.gmail.com>

Ueki Kohei reported that when we are using NewReno with connections that
have a very low traffic, we may timeout the connection too early if a
second loss occurs after the first one was successfully acked but no
data was transfered later. Below is his description of it:

When SACK is disabled, and a socket suffers multiple separate TCP
retransmissions, that socket's ETIMEDOUT value is calculated from the
time of the *first* retransmission instead of the *latest*
retransmission.

This happens because the tcp_sock's retrans_stamp is set once then never
cleared.

Take the following connection:

                      Linux                    remote-machine
                        |                           |
         send#1---->(*1)|--------> data#1 --------->|
                  |     |                           |
                 RTO    :                           :
                  |     |                           |
                 ---(*2)|----> data#1(retrans) ---->|
                  | (*3)|<---------- ACK <----------|
                  |     |                           |
                  |     :                           :
                  |     :                           :
                  |     :                           :
                16 minutes (or more)                :
                  |     :                           :
                  |     :                           :
                  |     :                           :
                  |     |                           |
         send#2---->(*4)|--------> data#2 --------->|
                  |     |                           |
                 RTO    :                           :
                  |     |                           |
                 ---(*5)|----> data#2(retrans) ---->|
                  |     |                           |
                  |     |                           |
                RTO*2   :                           :
                  |     |                           |
                  |     |                           |
      ETIMEDOUT<----(*6)|                           |

(*1) One data packet sent.
(*2) Because no ACK packet is received, the packet is retransmitted.
(*3) The ACK packet is received. The transmitted packet is acknowledged.

At this point the first "retransmission event" has passed and been
recovered from. Any future retransmission is a completely new "event".

(*4) After 16 minutes (to correspond with retries2=15), a new data
packet is sent. Note: No data is transmitted between (*3) and (*4).

The socket's timeout SHOULD be calculated from this point in time, but
instead it's calculated from the prior "event" 16 minutes ago.

(*5) Because no ACK packet is received, the packet is retransmitted.
(*6) At the time of the 2nd retransmission, the socket returns
ETIMEDOUT.


Therefore, now we clear retrans_stamp as soon as all data during the
loss window is fully acked.

Reported-by: Ueki Kohei
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
---

Notes:
    v1->v2: fixed compilation issue noticed by Neal

 net/ipv4/tcp_input.c | 60 +++++++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a12b455928e52211efdc6b471ef54de6218f5df0..88fa2d1606859de25419d0d45c3095f6d410d42b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2315,6 +2315,35 @@ static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
 
 /* Undo procedures. */
 
+/* We can clear retrans_stamp when there are no retransmissions in the
+ * window. It would seem that it is trivially available for us in
+ * tp->retrans_out, however, that kind of assumptions doesn't consider
+ * what will happen if errors occur when sending retransmission for the
+ * second time. ...It could the that such segment has only
+ * TCPCB_EVER_RETRANS set at the present time. It seems that checking
+ * the head skb is enough except for some reneging corner cases that
+ * are not worth the effort.
+ *
+ * Main reason for all this complexity is the fact that connection dying
+ * time now depends on the validity of the retrans_stamp, in particular,
+ * that successive retransmissions of a segment must not advance
+ * retrans_stamp under any conditions.
+ */
+static bool tcp_any_retrans_done(const struct sock *sk)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+
+	if (tp->retrans_out)
+		return true;
+
+	skb = tcp_write_queue_head(sk);
+	if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
+		return true;
+
+	return false;
+}
+
 #if FASTRETRANS_DEBUG > 1
 static void DBGUNDO(struct sock *sk, const char *msg)
 {
@@ -2410,6 +2439,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
 		 * is ACKed. For Reno it is MUST to prevent false
 		 * fast retransmits (RFC2582). SACK TCP is safe. */
 		tcp_moderate_cwnd(tp);
+		if (!tcp_any_retrans_done(sk))
+			tp->retrans_stamp = 0;
 		return true;
 	}
 	tcp_set_ca_state(sk, TCP_CA_Open);
@@ -2430,35 +2461,6 @@ static bool tcp_try_undo_dsack(struct sock *sk)
 	return false;
 }
 
-/* We can clear retrans_stamp when there are no retransmissions in the
- * window. It would seem that it is trivially available for us in
- * tp->retrans_out, however, that kind of assumptions doesn't consider
- * what will happen if errors occur when sending retransmission for the
- * second time. ...It could the that such segment has only
- * TCPCB_EVER_RETRANS set at the present time. It seems that checking
- * the head skb is enough except for some reneging corner cases that
- * are not worth the effort.
- *
- * Main reason for all this complexity is the fact that connection dying
- * time now depends on the validity of the retrans_stamp, in particular,
- * that successive retransmissions of a segment must not advance
- * retrans_stamp under any conditions.
- */
-static bool tcp_any_retrans_done(const struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb;
-
-	if (tp->retrans_out)
-		return true;
-
-	skb = tcp_write_queue_head(sk);
-	if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
-		return true;
-
-	return false;
-}
-
 /* Undo during loss recovery after partial ACK or using F-RTO. */
 static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
 {
-- 
1.9.3

^ permalink raw reply related

* [PATCH 1/1 net-next] cipso: include linux/bug.h instead of asm/bug.h
From: Fabian Frederick @ 2014-11-04 19:15 UTC (permalink / raw)
  To: linux-kernel
  Cc: Fabian Frederick, David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev

Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
 net/ipv4/cipso_ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 4715f25..a7cc798 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -50,7 +50,7 @@
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <linux/atomic.h>
-#include <asm/bug.h>
+#include <linux/bug.h>
 #include <asm/unaligned.h>
 
 /* List of available DOI definitions */
-- 
1.9.3

^ permalink raw reply related

* [PATCH 1/1 net-next] ipv4: include linux/bug.h instead of asm/bug.h
From: Fabian Frederick @ 2014-11-04 19:13 UTC (permalink / raw)
  To: linux-kernel
  Cc: Fabian Frederick, David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev

Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
 net/ipv4/cipso_ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 4715f25..a7cc798 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -50,7 +50,7 @@
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <linux/atomic.h>
-#include <asm/bug.h>
+#include <linux/bug.h>
 #include <asm/unaligned.h>
 
 /* List of available DOI definitions */
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH net] tcp: zero retrans_stamp if all retrans were acked
From: Marcelo Ricardo Leitner @ 2014-11-04 19:12 UTC (permalink / raw)
  To: Neal Cardwell; +Cc: Netdev, Yuchung Cheng, Eric Dumazet
In-Reply-To: <CADVnQykbFjodAyJomX8E5HZhOrP7kDxDS7vPNiam858B84vqCQ@mail.gmail.com>

On 04-11-2014 17:03, Neal Cardwell wrote:
> On Tue, Nov 4, 2014 at 1:51 PM, Neal Cardwell <ncardwell@google.com> wrote:
>> In the meantime, Yuchung put together a nice packetdrill test case for
>> this, so I'll run it on that version of the patch.
>
> With the compilation issue fixed, Yuchung's packetdrill test passes.
> So this looks good to me once the compilation issue with
> tcp_any_retrans_done() is fixed.

Awesome! Thanks. Sending it now.

Marcelo

^ permalink raw reply

* [PATCH 1/1 net-next] cipso: kerneldoc warning fix
From: Fabian Frederick @ 2014-11-04 19:10 UTC (permalink / raw)
  To: linux-kernel
  Cc: Fabian Frederick, David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev

Signed-off-by: Fabian Frederick <fabf@skynet.be>
---
 net/ipv4/cipso_ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 4715f25..e24df5a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -539,7 +539,7 @@ doi_add_return:
 
 /**
  * cipso_v4_doi_free - Frees a DOI definition
- * @entry: the entry's RCU field
+ * @doi_def: the DOI definition
  *
  * Description:
  * This function frees all of the memory associated with a DOI definition.
-- 
1.9.3

^ permalink raw reply related

* Re: [PATCH net] tcp: zero retrans_stamp if all retrans were acked
From: Neal Cardwell @ 2014-11-04 19:03 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner; +Cc: Netdev, Yuchung Cheng, Eric Dumazet
In-Reply-To: <CADVnQynOfg6PDe0XYW+0uM+Kbqhv7RRTrEAJCrCEEY54Q6U8Eg@mail.gmail.com>

On Tue, Nov 4, 2014 at 1:51 PM, Neal Cardwell <ncardwell@google.com> wrote:
> In the meantime, Yuchung put together a nice packetdrill test case for
> this, so I'll run it on that version of the patch.

With the compilation issue fixed, Yuchung's packetdrill test passes.
So this looks good to me once the compilation issue with
tcp_any_retrans_done() is fixed.

neal

^ permalink raw reply

* [Patch net-next] ipv6: move INET6_MATCH() to include/net/inet6_hashtables.h
From: Cong Wang @ 2014-11-04 18:59 UTC (permalink / raw)
  To: netdev; +Cc: Cong Wang, David S. Miller

It is only used in net/ipv6/inet6_hashtables.c.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 include/linux/ipv6.h           | 10 ----------
 include/net/inet6_hashtables.h | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 7121a2e..c694e7b 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -317,14 +317,4 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 #define tcp_twsk_ipv6only(__sk)		0
 #define inet_v6_ipv6only(__sk)		0
 #endif /* IS_ENABLED(CONFIG_IPV6) */
-
-#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	\
-	(((__sk)->sk_portpair == (__ports))			&&	\
-	 ((__sk)->sk_family == AF_INET6)			&&	\
-	 ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr))		&&	\
-	 ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr))	&&	\
-	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))) 		&&	\
-	 net_eq(sock_net(__sk), (__net)))
-
 #endif /* _IPV6_H */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index d1d2728..9201afe0 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -99,4 +99,14 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
 			  const struct in6_addr *daddr, const __be16 dport,
 			  const int dif);
 #endif /* IS_ENABLED(CONFIG_IPV6) */
+
+#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	\
+	(((__sk)->sk_portpair == (__ports))			&&	\
+	 ((__sk)->sk_family == AF_INET6)			&&	\
+	 ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr))		&&	\
+	 ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr))	&&	\
+	 (!(__sk)->sk_bound_dev_if	||				\
+	   ((__sk)->sk_bound_dev_if == (__dif))) 		&&	\
+	 net_eq(sock_net(__sk), (__net)))
+
 #endif /* _INET6_HASHTABLES_H */
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH net] tcp: zero retrans_stamp if all retrans were acked
From: Neal Cardwell @ 2014-11-04 18:51 UTC (permalink / raw)
  To: Marcelo Ricardo Leitner; +Cc: Netdev, Yuchung Cheng, Eric Dumazet
In-Reply-To: <66a8a101a2bfe645432ec393d22b9da44b9739cf.1415110605.git.mleitner@redhat.com>

On Tue, Nov 4, 2014 at 9:18 AM, Marcelo Ricardo Leitner
<mleitner@redhat.com> wrote:
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index a12b455928e52211efdc6b471ef54de6218f5df0..65686efeaaf3c36706390d3bfd260fd1fb942b7f 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -2410,6 +2410,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
>                  * is ACKed. For Reno it is MUST to prevent false
>                  * fast retransmits (RFC2582). SACK TCP is safe. */
>                 tcp_moderate_cwnd(tp);
> +               if (!tcp_any_retrans_done(sk))
> +                       tp->retrans_stamp = 0;

I ran into a compilation error with this, since tcp_any_retrans_done()
is defined below this spot in the file.

I'd recommend moving the tcp_any_retrans_done() code and its preceding
block comment to just above the spot in tcp_input.c that says "/* Undo
procedures. */". That way it compiles.

In the meantime, Yuchung put together a nice packetdrill test case for
this, so I'll run it on that version of the patch.

neal

^ permalink raw reply

* Re: [PATCH 02/13] net_sched: introduce qdisc_peek() helper function
From: Stephen Hemminger @ 2014-11-04 18:45 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev
In-Reply-To: <1415123796-8093-3-git-send-email-xiyou.wangcong@gmail.com>

On Tue,  4 Nov 2014 09:56:25 -0800
Cong Wang <xiyou.wangcong@gmail.com> wrote:

> +static inline void qdisc_warn_nonwc(void *func, struct Qdisc *qdisc)
> +{
> +	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
> +		pr_warn("%pf: %s qdisc %X: is non-work-conserving?\n",
> +			func, qdisc->ops->id, qdisc->handle >> 16);
> +		qdisc->flags |= TCQ_F_WARN_NONWC;
> +	}
> +}
> +

Inilining this and creating N copies of same message is not a step forward.

^ permalink raw reply

* Re: [PATCH 00/13] net_sched: misc cleanups and improvements
From: Eric Dumazet @ 2014-11-04 18:20 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, Jamal Hadi Salim
In-Reply-To: <1415123796-8093-1-git-send-email-xiyou.wangcong@gmail.com>

On Tue, 2014-11-04 at 09:56 -0800, Cong Wang wrote:
> Minor code cleanups for TC qdiscs and filters, each patch has
> more details.
> 
> Cong Wang (13):
>       net_sched: refactor out tcf_exts
>       net_sched: introduce qdisc_peek() helper function
>       net_sched: rename ->gso_skb to ->dequeued_skb
>       net_sched: rename qdisc_drop() to qdisc_drop_skb()
>       net_sched: introduce qdisc_drop() helper function
>       net_sched: move some qdisc flag into qdisc ops
>       net_sched: move TCQ_F_MQROOT into qdisc ops
>       net_sched: use a flag to indicate fifo qdiscs instead of the name
>       net_sched: redefine qdisc_create_dflt()
>       net_sched: forbid setting default qdisc to inappropriate ones
>       net_sched: remove hashmask from Qdisc_class_hash
>       net_sched: remove useless qdisc_stab_lock
>       net_sched: return NULL instead of ERR_PTR for qdisc_alloc()
> 

NACK for the whole serie.

I am tired of your inexistent changelogs and code churn in qdisc.

If you do not care of writing good changelogs and explain why you want
all this, why should I care spending time to review all this stuff ?

What is the plan, beyond all this code churn ?

^ permalink raw reply

* Re: [PATCH] net: phy: spi_ks8995: remove sysfs bin file by registered attribute
From: Florian Fainelli @ 2014-11-04 18:15 UTC (permalink / raw)
  To: Vladimir Zapolskiy, netdev; +Cc: David S. Miller
In-Reply-To: <1415057109-8506-1-git-send-email-vz@mleia.com>

On 11/03/2014 03:25 PM, Vladimir Zapolskiy wrote:
> When a sysfs binary file is asked to be removed, it is found by
> attribute name, so strictly speaking this change is not a fix, but
> just in case when attribute name is changed in the driver or sysfs
> internals are changed, it might be better to remove the previously
> created file using right the same binary attribute.
> 
> Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
> Cc: Florian Fainelli <f.fainelli@gmail.com>
> Cc: David S. Miller <davem@davemloft.net>

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>

> ---
>  drivers/net/phy/spi_ks8995.c |    4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c
> index eab57fc..4653015 100644
> --- a/drivers/net/phy/spi_ks8995.c
> +++ b/drivers/net/phy/spi_ks8995.c
> @@ -353,7 +353,9 @@ static int ks8995_probe(struct spi_device *spi)
>  
>  static int ks8995_remove(struct spi_device *spi)
>  {
> -	sysfs_remove_bin_file(&spi->dev.kobj, &ks8995_registers_attr);
> +	struct ks8995_switch *ks = spi_get_drvdata(spi);
> +
> +	sysfs_remove_bin_file(&spi->dev.kobj, &ks->regs_attr);
>  
>  	return 0;
>  }
> 

^ permalink raw reply

* Re: [GIT PULL nf] IPVS Fixes for v3.18
From: Pablo Neira Ayuso @ 2014-11-04 17:57 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, netdev, netfilter-devel, Wensong Zhang,
	Julian Anastasov
In-Reply-To: <1414458334-22479-1-git-send-email-horms@verge.net.au>

Hi Simon,

On Tue, Oct 28, 2014 at 10:05:33AM +0900, Simon Horman wrote:
> Hi Pablo,
> 
> please consider this fix for v3.18.
> 
> It fixes a null-pointer dereference that may occur when logging
> errors.
> 
> This problem was introduced by 4a4739d56b0 ("ipvs: Pull out
> crosses_local_route_boundary logic") in v3.17-rc5. As such I would
> also like it considered for 3.17-stable.

Regarding your request to pass this to 3.17-stable. According to git
am and my scripts:

3d53666 ipvs: Avoid null-pointer deref in debug code

doesn't apply cleanly 3.17.x. Please re-check and send me a backport if
you want to see this in 3.17.x. Let me know, sorry.

^ permalink raw reply

* [PATCH 13/13] net_sched: return NULL instead of ERR_PTR for qdisc_alloc()
From: Cong Wang @ 2014-11-04 17:56 UTC (permalink / raw)
  To: netdev; +Cc: Cong Wang
In-Reply-To: <1415123796-8093-1-git-send-email-xiyou.wangcong@gmail.com>

It always returns ENOFBUFS so we can return NULL
and let its callers set this errno.

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 net/sched/sch_api.c     | 4 ++--
 net/sched/sch_generic.c | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 38c42bd..27bfd75 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -905,8 +905,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 		goto err_out;
 
 	sch = qdisc_alloc(dev_queue, ops);
-	if (IS_ERR(sch)) {
-		err = PTR_ERR(sch);
+	if (!sch) {
+		err = -ENOBUFS;
 		goto err_out2;
 	}
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 29db9c8..b474fbb 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -585,7 +585,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	void *p;
 	struct Qdisc *sch;
 	unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
-	int err = -ENOBUFS;
 	struct net_device *dev = dev_queue->dev;
 
 	p = kzalloc_node(size, GFP_KERNEL,
@@ -620,7 +619,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 
 	return sch;
 errout:
-	return ERR_PTR(err);
+	return NULL;
 }
 
 struct Qdisc *qdisc_create_internal(struct netdev_queue *dev_queue,
@@ -633,7 +632,7 @@ struct Qdisc *qdisc_create_internal(struct netdev_queue *dev_queue,
 		goto errout;
 
 	sch = qdisc_alloc(dev_queue, ops);
-	if (IS_ERR(sch))
+	if (!sch)
 		goto errout;
 	sch->parent = parentid;
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH 12/13] net_sched: remove useless qdisc_stab_lock
From: Cong Wang @ 2014-11-04 17:56 UTC (permalink / raw)
  To: netdev; +Cc: Cong Wang
In-Reply-To: <1415123796-8093-1-git-send-email-xiyou.wangcong@gmail.com>

We always acquire rtnl lock when we get or put stab,
so there is no need to use another spinlock.

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 net/sched/sch_api.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index efc60d6..38c42bd 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -441,7 +441,6 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
 EXPORT_SYMBOL(qdisc_put_rtab);
 
 static LIST_HEAD(qdisc_stab_list);
-static DEFINE_SPINLOCK(qdisc_stab_lock);
 
 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
@@ -457,6 +456,8 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
 	u16 *tab = NULL;
 	int err;
 
+	ASSERT_RTNL();
+
 	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
 	if (err < 0)
 		return ERR_PTR(err);
@@ -475,20 +476,15 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
 	if (tsize != s->tsize || (!tab && tsize > 0))
 		return ERR_PTR(-EINVAL);
 
-	spin_lock(&qdisc_stab_lock);
-
 	list_for_each_entry(stab, &qdisc_stab_list, list) {
 		if (memcmp(&stab->szopts, s, sizeof(*s)))
 			continue;
 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
 			continue;
 		stab->refcnt++;
-		spin_unlock(&qdisc_stab_lock);
 		return stab;
 	}
 
-	spin_unlock(&qdisc_stab_lock);
-
 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
 	if (!stab)
 		return ERR_PTR(-ENOMEM);
@@ -498,10 +494,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
 	if (tsize > 0)
 		memcpy(stab->data, tab, tsize * sizeof(u16));
 
-	spin_lock(&qdisc_stab_lock);
 	list_add_tail(&stab->list, &qdisc_stab_list);
-	spin_unlock(&qdisc_stab_lock);
-
 	return stab;
 }
 
@@ -512,17 +505,15 @@ static void stab_kfree_rcu(struct rcu_head *head)
 
 void qdisc_put_stab(struct qdisc_size_table *tab)
 {
+	ASSERT_RTNL();
+
 	if (!tab)
 		return;
 
-	spin_lock(&qdisc_stab_lock);
-
 	if (--tab->refcnt == 0) {
 		list_del(&tab->list);
 		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
 	}
-
-	spin_unlock(&qdisc_stab_lock);
 }
 EXPORT_SYMBOL(qdisc_put_stab);
 
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH 11/13] net_sched: remove hashmask from Qdisc_class_hash
From: Cong Wang @ 2014-11-04 17:56 UTC (permalink / raw)
  To: netdev; +Cc: Cong Wang
In-Reply-To: <1415123796-8093-1-git-send-email-xiyou.wangcong@gmail.com>

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 include/net/sch_generic.h | 8 +++++---
 net/sched/sch_api.c       | 9 +++------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 89c3f37..1f30524 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -359,12 +359,14 @@ struct Qdisc_class_common {
 struct Qdisc_class_hash {
 	struct hlist_head	*hash;
 	unsigned int		hashsize;
-	unsigned int		hashmask;
 	unsigned int		hashelems;
 };
 
-static inline unsigned int qdisc_class_hash(u32 id, u32 mask)
+static inline
+unsigned int qdisc_class_hash(unsigned int id, unsigned int hashsize)
 {
+	unsigned int mask = hashsize- 1;
+
 	id ^= id >> 8;
 	id ^= id >> 4;
 	return id & mask;
@@ -376,7 +378,7 @@ qdisc_class_find(const struct Qdisc_class_hash *hash, u32 id)
 	struct Qdisc_class_common *cl;
 	unsigned int h;
 
-	h = qdisc_class_hash(id, hash->hashmask);
+	h = qdisc_class_hash(id, hash->hashsize);
 	hlist_for_each_entry(cl, &hash->hash[h], hnode) {
 		if (cl->classid == id)
 			return cl;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f57d3c6..efc60d6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -647,14 +647,13 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 	struct Qdisc_class_common *cl;
 	struct hlist_node *next;
 	struct hlist_head *nhash, *ohash;
-	unsigned int nsize, nmask, osize;
+	unsigned int nsize, osize;
 	unsigned int i, h;
 
 	/* Rehash when load factor exceeds 0.75 */
 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
 		return;
 	nsize = clhash->hashsize * 2;
-	nmask = nsize - 1;
 	nhash = qdisc_class_hash_alloc(nsize);
 	if (nhash == NULL)
 		return;
@@ -665,13 +664,12 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
 	sch_tree_lock(sch);
 	for (i = 0; i < osize; i++) {
 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
-			h = qdisc_class_hash(cl->classid, nmask);
+			h = qdisc_class_hash(cl->classid, nsize);
 			hlist_add_head(&cl->hnode, &nhash[h]);
 		}
 	}
 	clhash->hash     = nhash;
 	clhash->hashsize = nsize;
-	clhash->hashmask = nmask;
 	sch_tree_unlock(sch);
 
 	qdisc_class_hash_free(ohash, osize);
@@ -686,7 +684,6 @@ int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
 	if (clhash->hash == NULL)
 		return -ENOMEM;
 	clhash->hashsize  = size;
-	clhash->hashmask  = size - 1;
 	clhash->hashelems = 0;
 	return 0;
 }
@@ -704,7 +701,7 @@ void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
 	unsigned int h;
 
 	INIT_HLIST_NODE(&cl->hnode);
-	h = qdisc_class_hash(cl->classid, clhash->hashmask);
+	h = qdisc_class_hash(cl->classid, clhash->hashsize);
 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
 	clhash->hashelems++;
 }
-- 
1.8.3.1

^ permalink raw reply related

* [PATCH 10/13] net_sched: forbid setting default qdisc to inappropriate ones
From: Cong Wang @ 2014-11-04 17:56 UTC (permalink / raw)
  To: netdev; +Cc: Cong Wang, Stephen Hemminger, Eric Dumazet, David S. Miller
In-Reply-To: <1415123796-8093-1-git-send-email-xiyou.wangcong@gmail.com>

Instead of just documentation, we should explicitly prohibit
setting the default qdisc to inappropriate ones. That is,
setting a flag for appropriate ones.

Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
---
 include/net/sch_generic.h |  1 +
 net/sched/sch_api.c       | 12 ++++++++++--
 net/sched/sch_fifo.c      |  6 +++---
 net/sched/sch_fq.c        |  1 +
 net/sched/sch_fq_codel.c  |  1 +
 net/sched/sch_generic.c   |  2 +-
 net/sched/sch_mq.c        |  2 +-
 net/sched/sch_sfq.c       |  1 +
 8 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ba3b6bf..89c3f37 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -185,6 +185,7 @@ struct Qdisc_ops {
 #define QDISC_F_BUILTIN		1
 #define QDISC_F_MQ		2
 #define QDISC_F_FIFO		4
+#define QDISC_F_PARAM_LESS	8
 	unsigned int		flags;
 
 	int 			(*enqueue)(struct sk_buff *, struct Qdisc *);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 98b315f..f57d3c6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -227,6 +227,7 @@ static struct Qdisc_ops *qdisc_lookup_default(const char *name)
 int qdisc_set_default(const char *name)
 {
 	const struct Qdisc_ops *ops;
+	int err = 0;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -243,13 +244,20 @@ int qdisc_set_default(const char *name)
 	}
 
 	if (ops) {
+		if (!(ops->flags & QDISC_F_PARAM_LESS)) {
+			err = -EINVAL;
+			goto unlock;
+		}
 		/* Set new default */
 		module_put(default_qdisc_ops->owner);
 		default_qdisc_ops = ops;
+	} else {
+		err = -ENOENT;
 	}
-	write_unlock(&qdisc_mod_lock);
 
-	return ops ? 0 : -ENOENT;
+unlock:
+	write_unlock(&qdisc_mod_lock);
+	return err;
 }
 
 /* We know handle. Find qdisc among all qdisc's attached to device
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index c21a037..6bed08b 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -96,7 +96,7 @@ static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
 struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
 	.id		=	"pfifo",
 	.priv_size	=	0,
-	.flags 		=	QDISC_F_FIFO,
+	.flags 		=	QDISC_F_FIFO | QDISC_F_PARAM_LESS,
 	.enqueue	=	pfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
@@ -112,7 +112,7 @@ EXPORT_SYMBOL(pfifo_qdisc_ops);
 struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 	.id		=	"bfifo",
 	.priv_size	=	0,
-	.flags 		=	QDISC_F_FIFO,
+	.flags 		=	QDISC_F_FIFO | QDISC_F_PARAM_LESS,
 	.enqueue	=	bfifo_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
@@ -128,7 +128,7 @@ EXPORT_SYMBOL(bfifo_qdisc_ops);
 struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
 	.id		=	"pfifo_head_drop",
 	.priv_size	=	0,
-	.flags 		=	QDISC_F_FIFO,
+	.flags 		=	QDISC_F_FIFO | QDISC_F_PARAM_LESS,
 	.enqueue	=	pfifo_tail_enqueue,
 	.dequeue	=	qdisc_dequeue_head,
 	.peek		=	qdisc_peek_head,
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 34ec70c..7c359b2 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -805,6 +805,7 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 
 static struct Qdisc_ops fq_qdisc_ops __read_mostly = {
 	.id		=	"fq",
+	.flags		=	QDISC_F_PARAM_LESS,
 	.priv_size	=	sizeof(struct fq_sched_data),
 
 	.enqueue	=	fq_enqueue,
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index b9ca32e..acc06bf 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -594,6 +594,7 @@ static const struct Qdisc_class_ops fq_codel_class_ops = {
 static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
 	.cl_ops		=	&fq_codel_class_ops,
 	.id		=	"fq_codel",
+	.flags		=	QDISC_F_PARAM_LESS,
 	.priv_size	=	sizeof(struct fq_codel_sched_data),
 	.enqueue	=	fq_codel_enqueue,
 	.dequeue	=	fq_codel_dequeue,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2b1931d..29db9c8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -567,7 +567,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
 struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.id		=	"pfifo_fast",
 	.priv_size	=	sizeof(struct pfifo_fast_priv),
-	.flags		=	QDISC_F_FIFO,
+	.flags		=	QDISC_F_FIFO | QDISC_F_PARAM_LESS,
 	.enqueue	=	pfifo_fast_enqueue,
 	.dequeue	=	pfifo_fast_dequeue,
 	.peek		=	pfifo_fast_peek,
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 03b8069..43f9dc8 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -236,7 +236,7 @@ static const struct Qdisc_class_ops mq_class_ops = {
 struct Qdisc_ops mq_qdisc_ops __read_mostly = {
 	.cl_ops		= &mq_class_ops,
 	.id		= "mq",
-	.flags		= QDISC_F_MQ,
+	.flags		= QDISC_F_MQ | QDISC_F_PARAM_LESS,
 	.priv_size	= sizeof(struct mq_sched),
 	.init		= mq_init,
 	.destroy	= mq_destroy,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 6212652..0d88d52 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -913,6 +913,7 @@ static const struct Qdisc_class_ops sfq_class_ops = {
 static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
 	.cl_ops		=	&sfq_class_ops,
 	.id		=	"sfq",
+	.flags		=	QDISC_F_PARAM_LESS,
 	.priv_size	=	sizeof(struct sfq_sched_data),
 	.enqueue	=	sfq_enqueue,
 	.dequeue	=	sfq_dequeue,
-- 
1.8.3.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox