Netdev List
 help / color / mirror / Atom feed
* [PATCH net-2.6.25 1/3] [TCP]: cleanup tcp_parse_options deep indented switch
From: Ilpo Järvinen @ 2008-01-03 11:00 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Removed case indentation level & combined some nested ifs, mostly
within 80 lines now. This is a leftover from indent patch, it
just had to be done manually to avoid messing it up completely.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
---
 net/ipv4/tcp_input.c |  135 +++++++++++++++++++++++++-------------------------
 1 files changed, 67 insertions(+), 68 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 18e099c..d5b6adf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3278,81 +3278,80 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 		int opsize;
 
 		switch (opcode) {
-			case TCPOPT_EOL:
+		case TCPOPT_EOL:
+			return;
+		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+			length--;
+			continue;
+		default:
+			opsize = *ptr++;
+			if (opsize < 2) /* "silly options" */
 				return;
-			case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
-				length--;
-				continue;
-			default:
-				opsize=*ptr++;
-				if (opsize < 2) /* "silly options" */
-					return;
-				if (opsize > length)
-					return;	/* don't parse partial options */
-				switch (opcode) {
-				case TCPOPT_MSS:
-					if (opsize==TCPOLEN_MSS && th->syn && !estab) {
-						u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
-						if (in_mss) {
-							if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
-								in_mss = opt_rx->user_mss;
-							opt_rx->mss_clamp = in_mss;
-						}
-					}
-					break;
-				case TCPOPT_WINDOW:
-					if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
-						if (sysctl_tcp_window_scaling) {
-							__u8 snd_wscale = *(__u8 *) ptr;
-							opt_rx->wscale_ok = 1;
-							if (snd_wscale > 14) {
-								if (net_ratelimit())
-									printk(KERN_INFO "tcp_parse_options: Illegal window "
-									       "scaling value %d >14 received.\n",
-									       snd_wscale);
-								snd_wscale = 14;
-							}
-							opt_rx->snd_wscale = snd_wscale;
-						}
-					break;
-				case TCPOPT_TIMESTAMP:
-					if (opsize==TCPOLEN_TIMESTAMP) {
-						if ((estab && opt_rx->tstamp_ok) ||
-						    (!estab && sysctl_tcp_timestamps)) {
-							opt_rx->saw_tstamp = 1;
-							opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
-							opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
-						}
+			if (opsize > length)
+				return;	/* don't parse partial options */
+			switch (opcode) {
+			case TCPOPT_MSS:
+				if (opsize == TCPOLEN_MSS && th->syn && !estab) {
+					u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
+					if (in_mss) {
+						if (opt_rx->user_mss &&
+						    opt_rx->user_mss < in_mss)
+							in_mss = opt_rx->user_mss;
+						opt_rx->mss_clamp = in_mss;
 					}
-					break;
-				case TCPOPT_SACK_PERM:
-					if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
-						if (sysctl_tcp_sack) {
-							opt_rx->sack_ok = 1;
-							tcp_sack_reset(opt_rx);
-						}
+				}
+				break;
+			case TCPOPT_WINDOW:
+				if (opsize == TCPOLEN_WINDOW && th->syn &&
+				    !estab && sysctl_tcp_window_scaling) {
+					__u8 snd_wscale = *(__u8 *)ptr;
+					opt_rx->wscale_ok = 1;
+					if (snd_wscale > 14) {
+						if (net_ratelimit())
+							printk(KERN_INFO "tcp_parse_options: Illegal window "
+							       "scaling value %d >14 received.\n",
+							       snd_wscale);
+						snd_wscale = 14;
 					}
-					break;
+					opt_rx->snd_wscale = snd_wscale;
+				}
+				break;
+			case TCPOPT_TIMESTAMP:
+				if ((opsize == TCPOLEN_TIMESTAMP) &&
+				    ((estab && opt_rx->tstamp_ok) ||
+				     (!estab && sysctl_tcp_timestamps))) {
+					opt_rx->saw_tstamp = 1;
+					opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
+					opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
+				}
+				break;
+			case TCPOPT_SACK_PERM:
+				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
+				    !estab && sysctl_tcp_sack) {
+ 					opt_rx->sack_ok = 1;
+					tcp_sack_reset(opt_rx);
+				}
+				break;
 
-				case TCPOPT_SACK:
-					if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
-					   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
-					   opt_rx->sack_ok) {
-						TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
-					}
-					break;
+			case TCPOPT_SACK:
+				if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+				   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
+				   opt_rx->sack_ok) {
+					TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
+				}
+				break;
 #ifdef CONFIG_TCP_MD5SIG
-				case TCPOPT_MD5SIG:
-					/*
-					 * The MD5 Hash has already been
-					 * checked (see tcp_v{4,6}_do_rcv()).
-					 */
-					break;
+			case TCPOPT_MD5SIG:
+				/*
+				 * The MD5 Hash has already been
+				 * checked (see tcp_v{4,6}_do_rcv()).
+				 */
+				break;
 #endif
-				}
+			}
 
-				ptr+=opsize-2;
-				length-=opsize;
+			ptr += opsize-2;
+			length -= opsize;
 		}
 	}
 }
-- 
1.5.0.6


^ permalink raw reply related

* [PATCH net-2.6.25 2/3] [TCP]: Urgent parameter effect can be simplified.
From: Ilpo Järvinen @ 2008-01-03 11:00 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <11993580071900-git-send-email-ilpo.jarvinen@helsinki.fi>

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
---
 net/ipv4/tcp_output.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f6d279a..6c7cd0a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2488,7 +2488,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	 * end to send an ack.  Don't queue or clone SKB, just
 	 * send it.
 	 */
-	TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
+	TCP_SKB_CB(skb)->seq = tp->snd_una - !urgent;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
-- 
1.5.0.6


^ permalink raw reply related

* [PATCH net-2.6.25 3/3] [TCP]: Perform setting of common control fields in one place
From: Ilpo Järvinen @ 2008-01-03 11:00 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <11993580071646-git-send-email-ilpo.jarvinen@helsinki.fi>

In case of segments which are purely for control without any
data (SYN/ACK/FIN/RST), many fields are set to common values
in multiple places.

i386 results:

$ gcc --version
gcc (GCC) 4.1.2 20070626 (Red Hat 4.1.2-13)

$ codiff tcp_output.o.old tcp_output.o.new
net/ipv4/tcp_output.c:
  tcp_xmit_probe_skb    |  -48
  tcp_send_ack          |  -56
  tcp_retransmit_skb    |  -79
  tcp_connect           |  -43
  tcp_send_active_reset |  -35
  tcp_make_synack       |  -42
  tcp_send_fin          |  -48
 7 functions changed, 351 bytes removed

net/ipv4/tcp_output.c:
  tcp_init_nondata_skb |  +90
 1 function changed, 90 bytes added

tcp_output.o.mid:
 8 functions changed, 90 bytes added, 351 bytes removed, diff: -261

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
---
 net/ipv4/tcp_output.c |   91 +++++++++++++++++++-----------------------------
 1 files changed, 36 insertions(+), 55 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6c7cd0a..89f0188 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -327,6 +327,26 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
 	}
 }
 
+/* Constructs common control bits of non-data skb. If SYN/FIN is present,
+ * auto increment end seqno.
+ */
+static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
+{
+	skb->csum = 0;
+
+	TCP_SKB_CB(skb)->flags = flags;
+	TCP_SKB_CB(skb)->sacked = 0;
+
+	skb_shinfo(skb)->gso_segs = 1;
+	skb_shinfo(skb)->gso_size = 0;
+	skb_shinfo(skb)->gso_type = 0;
+
+	TCP_SKB_CB(skb)->seq = seq;
+	if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
+		seq++;
+	TCP_SKB_CB(skb)->end_seq = seq;
+}
+
 static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
 					 __u32 tstamp, __u8 **md5_hash)
 {
@@ -1864,12 +1884,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
 	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
 		if (!pskb_trim(skb, 0)) {
-			TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
-			skb_shinfo(skb)->gso_segs = 1;
-			skb_shinfo(skb)->gso_size = 0;
-			skb_shinfo(skb)->gso_type = 0;
+			/* Reuse, even though it does some unnecessary work */
+			tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
+					     TCP_SKB_CB(skb)->flags);
 			skb->ip_summed = CHECKSUM_NONE;
-			skb->csum = 0;
 		}
 	}
 
@@ -2068,16 +2086,9 @@ void tcp_send_fin(struct sock *sk)
 
 		/* Reserve space for headers and prepare control bits. */
 		skb_reserve(skb, MAX_TCP_HEADER);
-		skb->csum = 0;
-		TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
-		TCP_SKB_CB(skb)->sacked = 0;
-		skb_shinfo(skb)->gso_segs = 1;
-		skb_shinfo(skb)->gso_size = 0;
-		skb_shinfo(skb)->gso_type = 0;
-
 		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
-		TCP_SKB_CB(skb)->seq = tp->write_seq;
-		TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+		tcp_init_nondata_skb(skb, tp->write_seq,
+				     TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
 		tcp_queue_skb(sk, skb);
 	}
 	__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2101,16 +2112,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(skb, MAX_TCP_HEADER);
-	skb->csum = 0;
-	TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
-	TCP_SKB_CB(skb)->sacked = 0;
-	skb_shinfo(skb)->gso_segs = 1;
-	skb_shinfo(skb)->gso_size = 0;
-	skb_shinfo(skb)->gso_type = 0;
-
+	tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
+			     TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
 	/* Send it off. */
-	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
-	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	if (tcp_transmit_skb(sk, skb, 0, priority))
 		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
@@ -2198,12 +2202,11 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	TCP_ECN_make_synack(req, th);
 	th->source = inet_sk(sk)->sport;
 	th->dest = ireq->rmt_port;
-	TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
-	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
-	TCP_SKB_CB(skb)->sacked = 0;
-	skb_shinfo(skb)->gso_segs = 1;
-	skb_shinfo(skb)->gso_size = 0;
-	skb_shinfo(skb)->gso_type = 0;
+	/* Setting of flags are superfluous here for callers (and ECE is
+	 * not even correctly set)
+	 */
+	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
+			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
 	th->seq = htonl(TCP_SKB_CB(skb)->seq);
 	th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
 	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
@@ -2235,7 +2238,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			       NULL)
 			      );
 
-	skb->csum = 0;
 	th->doff = (tcp_header_size >> 2);
 	TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
@@ -2327,16 +2329,9 @@ int tcp_connect(struct sock *sk)
 	/* Reserve space for headers. */
 	skb_reserve(buff, MAX_TCP_HEADER);
 
-	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
-	TCP_ECN_send_syn(sk, buff);
-	TCP_SKB_CB(buff)->sacked = 0;
-	skb_shinfo(buff)->gso_segs = 1;
-	skb_shinfo(buff)->gso_size = 0;
-	skb_shinfo(buff)->gso_type = 0;
-	buff->csum = 0;
 	tp->snd_nxt = tp->write_seq;
-	TCP_SKB_CB(buff)->seq = tp->write_seq++;
-	TCP_SKB_CB(buff)->end_seq = tp->write_seq;
+	tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
+	TCP_ECN_send_syn(sk, buff);
 
 	/* Send it off. */
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2441,15 +2436,9 @@ void tcp_send_ack(struct sock *sk)
 
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(buff, MAX_TCP_HEADER);
-	buff->csum = 0;
-	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
-	TCP_SKB_CB(buff)->sacked = 0;
-	skb_shinfo(buff)->gso_segs = 1;
-	skb_shinfo(buff)->gso_size = 0;
-	skb_shinfo(buff)->gso_type = 0;
+	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
 
 	/* Send it off, this clears delayed acks for us. */
-	TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk);
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
 	tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
 }
@@ -2477,19 +2466,11 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 
 	/* Reserve space for headers and set control bits. */
 	skb_reserve(skb, MAX_TCP_HEADER);
-	skb->csum = 0;
-	TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
-	TCP_SKB_CB(skb)->sacked = 0;
-	skb_shinfo(skb)->gso_segs = 1;
-	skb_shinfo(skb)->gso_size = 0;
-	skb_shinfo(skb)->gso_type = 0;
-
 	/* Use a previous sequence.  This should cause the other
 	 * end to send an ack.  Don't queue or clone SKB, just
 	 * send it.
 	 */
-	TCP_SKB_CB(skb)->seq = tp->snd_una - !urgent;
-	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
-- 
1.5.0.6


^ permalink raw reply related

* Re: [RFC PATCH] NET: Clone the sk_buff->iif field properly
From: Jarek Poplawski @ 2008-01-03  9:58 UTC (permalink / raw)
  To: Paul Moore; +Cc: netdev, Jamal Hadi Salim
In-Reply-To: <20080102160133.11792.21836.stgit@flek.lan>

On 02-01-2008 17:01, Paul Moore wrote:
> When sk_buffs are cloned the iif field of the new, cloned packet is neither
> zeroed out or copied from the existing sk_buff.  The result is that the newly
> cloned sk_buff has garbage in the iif field which is a Bad Thing.  This patch
> fixes this problem by copying the iif field along with the other sk_buff
> critical fields in __copy_skb_header().
> 
> This patch is needed by some of the labeled networking changes proposed for
> 2.6.25, does anyone have any objections?

Probably Jamal could be the most interested (added to CC):

http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=a057ae3c104dd2c661e55d2af37e70d168c65e00

Regards,
Jarek P.

> ---
> 
>  net/core/skbuff.c |    1 +
>  1 files changed, 1 insertions(+), 0 deletions(-)
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 5b4ce9b..9cb7bb7 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -371,6 +371,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
>  {
>  	new->tstamp		= old->tstamp;
>  	new->dev		= old->dev;
> +	new->iif		= old->iif;
>  	new->transport_header	= old->transport_header;
>  	new->network_header	= old->network_header;
>  	new->mac_header		= old->mac_header;
> 

^ permalink raw reply

* Re: TCP event tracking via netlink...
From: David Miller @ 2008-01-03  9:26 UTC (permalink / raw)
  To: ilpo.jarvinen; +Cc: shemminger, netdev
In-Reply-To: <Pine.LNX.4.64.0801021304050.31652@kivilampi-30.cs.helsinki.fi>

From: "Ilpo_Järvinen" <ilpo.jarvinen@helsinki.fi>
Date: Wed, 2 Jan 2008 13:05:17 +0200 (EET)

> git://git.kernel.org/pub/scm/network/tcptest/tcptest.git

Thanks a lot Ilpo.

^ permalink raw reply

* Re: neigh: timer & !nud_in_timer
From: John Sigler @ 2008-01-03  9:23 UTC (permalink / raw)
  To: netdev, linux-kernel, linux-rt-users
In-Reply-To: <476A6F85.7070905@free.fr>

John Sigler wrote:

> I noticed the following message in my kernel log.
> kernel: neigh: timer & !nud_in_timer
> (Might be due to a race condition.)
> 
> I'm running a UP Linux version 2.6.22.1-rt9
> ( http://rt.wiki.kernel.org/index.php )
> 
> The following /proc entries might be relevant.
> 
> /proc/sys/net/ipv4/conf/all/arp_accept
> 0
> /proc/sys/net/ipv4/conf/all/arp_announce
> 2
> /proc/sys/net/ipv4/conf/all/arp_filter
> 0
> /proc/sys/net/ipv4/conf/all/arp_ignore
> 1
> 
> I also lowered the priority of softirq-timer/0 to 10 which means
> it can be interrupted by other IRQ handlers.

Comments or musings?

^ permalink raw reply

* ethtool reporting wrong speed and duplex when link partner is forced
From: Esa-Pekka.Pyokkimies @ 2008-01-03  6:55 UTC (permalink / raw)
  To: netdev


Hello,

In drivers/net/mii.c, is there a reason why mii_nway_result is used to
find the speed and duplex settings when autonegation is on?  This
gives wrong result when autoneg is on, but we are making link to a
host which is forced. In this case the variable lpa is 0, and
mii_nway_result(advert & lpa) gives LPA_10HALF, which is wrong.
The real speed can be found from the BMCR register.

This is how it looks like with ethtool:

# ethtool eth1
...
        Advertised auto-negotiation: Yes
        Speed: 10Mb/s
        Duplex: Half
        Auto-negotiation: on
        Link detected: yes

# ethtool -d eth1
...
0x62: MII Basic Mode Control Register             0x3000
0x68: MII Link Partner Ability                    0x0000

And looking at mii.h we see that this implies BMCR_SPEED100 and
BMCR_ANENABLE, i.e. we are in 100 half duplex, as we should be (link
partner was forced to 100 full duplex).

Below is a quick patch you can comment. I have not yet tested this myself.

Esa-Pekka

--- 1.20/drivers/net/mii.c      2007-04-28 18:01:05 +03:00
+++ 1.21/drivers/net/mii.c      2008-01-03 08:25:42 +02:00
@@ -90,31 +90,16 @@ int mii_ethtool_gset(struct mii_if_info
        if (bmcr & BMCR_ANENABLE) {
                ecmd->advertising |= ADVERTISED_Autoneg;
                ecmd->autoneg = AUTONEG_ENABLE;
-
-               nego = mii_nway_result(advert & lpa);
-               if ((bmcr2 & (ADVERTISE_1000HALF | ADVERTISE_1000FULL)) &
-                   (lpa2 >> 2))
-                       ecmd->speed = SPEED_1000;
-               else if (nego == LPA_100FULL || nego == LPA_100HALF)
-                       ecmd->speed = SPEED_100;
-               else
-                       ecmd->speed = SPEED_10;
-               if ((lpa2 & LPA_1000FULL) || nego == LPA_100FULL ||
-                   nego == LPA_10FULL) {
-                       ecmd->duplex = DUPLEX_FULL;
-                       mii->full_duplex = 1;
-               } else {
-                       ecmd->duplex = DUPLEX_HALF;
-                       mii->full_duplex = 0;
-               }
        } else {
                ecmd->autoneg = AUTONEG_DISABLE;
-
-               ecmd->speed = ((bmcr & BMCR_SPEED1000 &&
-                               (bmcr & BMCR_SPEED100) == 0) ? SPEED_1000 :
-                              (bmcr & BMCR_SPEED100) ? SPEED_100 :
SPEED_10);
-               ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL :
DUPLEX_HA\
LF;
        }
+
+
+       ecmd->speed = ((bmcr & BMCR_SPEED1000 &&
+                       (bmcr & BMCR_SPEED100) == 0) ? SPEED_1000 :
+                      (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10);
+       ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF;
+

        /* ignore maxtxpkt, maxrxpkt for now */


^ permalink raw reply

* Re: 2.6.24-rc6-mm1
From: Torsten Kaiser @ 2008-01-03  5:02 UTC (permalink / raw)
  To: J. Bruce Fields
  Cc: Herbert Xu, Andrew Morton, linux-kernel, Neil Brown, netdev,
	Tom Tucker
In-Reply-To: <20080102215706.GB6480@fieldses.org>

On Jan 2, 2008 10:57 PM, J. Bruce Fields <bfields@fieldses.org> wrote:
> On Thu, Jan 03, 2008 at 08:51:54AM +1100, Herbert Xu wrote:
> > On Wed, Jan 02, 2008 at 07:29:59PM +0100, Torsten Kaiser wrote:
> > >
> > > Vanilla 2.6.24-rc6 seems stable. I did not see any crash or warnings.
> >
> > OK that's great.  The next step would be to try excluding specific git
> > trees from mm to see if they make a difference.
> >
> > The two specific trees of interest would be git-nfsd and git-net.
>
> Also, if it's git-nfsd, it'd be useful to test with the current git-nfsd
> from the for-mm branch at:
>
>         git://linux-nfs.org/~bfields/linus.git for-mm
>
> and then any bisection results (even partial) from that tree would help
> immensely....

The problem with that is, that triggering the bug is not easy so
marking anything 'good' is questionable.
This time I needed to compile over 50 packages until it triggered.

I was using 2.6.24-rc6-mm1 again, but with a crude hack (see end of
mail) that I hope should catch any double-frees of skbs.
None of my warnings triggered, only a list corruption again in
svc_xprt_enqueue(), but this time with an additional output about
whats wrong with the list:
[17023.029519] list_add corruption. prev->next should be next
(ffff8100d20ec1c8), but was ffff81009c5a6
c28. (prev=ffff81009c5a6c28).
[17023.029537] ------------[ cut here ]------------
[17023.031445] kernel BUG at lib/list_debug.c:33!
[17023.033280] invalid opcode: 0000 [1] SMP
[17023.034967] last sysfs file:
/sys/devices/system/cpu/cpu3/cache/index2/shared_cpu_map
[17023.038209] CPU 3
[17023.039047] Modules linked in: radeon drm w83792d ipv6 tuner
tea5767 tda8290 tuner_xc2028 tda9887 tu
ner_simple mt20xx tea5761 tvaudio msp3400 bttv ir_common
compat_ioctl32 videobuf_dma_sg videobuf_core b
tcx_risc usbhid tveeprom videodev hid v4l2_common v4l1_compat sg
pata_amd i2c_nforce2
[17023.039519] Pid: 20564, comm: nfsv4-svc Not tainted 2.6.24-rc6-mm1 #14
[17023.039519] RIP: 0010:[<ffffffff803bd834>]  [<ffffffff803bd834>]
__list_add+0x54/0x60
[17023.039519] RSP: 0018:ffff8101002c9dc0  EFLAGS: 00010282
[17023.039519] RAX: 0000000000000088 RBX: ffff810110125c00 RCX: 0000000000000000
[17023.039519] RDX: ffff81010067c000 RSI: 0000000000000001 RDI: ffffffff80764140
[17023.039519] RBP: ffff8101002c9dc0 R08: 0000000000000001 R09: 0000000000000000
[17023.039519] R10: ffff81000100a088 R11: 0000000000000001 R12: ffff8100d20ec180
[17023.039519] R13: ffff8100d20ec1b8 R14: ffff8100d20ec1b8 R15: ffff8101188e4600
[17023.039519] FS:  00007ff7a870c6f0(0000) GS:ffff81011ff0cd00(0000)
knlGS:0000000000000000
[17023.039519] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[17023.039519] CR2: 00000000024df510 CR3: 0000000032539000 CR4: 00000000000006e0
[17023.039519] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[17023.039519] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[17023.039519] Process nfsv4-svc (pid: 20564, threadinfo
ffff8101002c8000, task ffff81010067c000)
[17023.039519] Stack:  ffff8101002c9e00 ffffffff805c18ab
ffff8100d20ec188 ffff8101188e4600
[17023.039519]  ffff81009c5a6c00 ffff81010d118000 ffff810110125c00
ffff8101188e4610
[17023.039519]  ffff8101002c9e10 ffffffff805c1997 ffff8101002c9ee0
ffffffff805c2ac4
[17023.039519] Call Trace:
[17023.039519]  [<ffffffff805c18ab>] svc_xprt_enqueue+0x1ab/0x240
[17023.039519]  [<ffffffff805c1997>] svc_xprt_received+0x17/0x20
[17023.039519]  [<ffffffff805c2ac4>] svc_recv+0x394/0x7c0
[17023.039519]  [<ffffffff805c1ece>] svc_send+0xae/0xd0
[17023.039519]  [<ffffffff802305f0>] default_wake_function+0x0/0x10
[17023.039519]  [<ffffffff803178b9>] nfs_callback_svc+0x79/0x130
[17023.039519]  [<ffffffff80232a67>] finish_task_switch+0x67/0xe0
[17023.039519]  [<ffffffff8020c4b8>] child_rip+0xa/0x12
[17023.039519]  [<ffffffff8020bbcf>] restore_args+0x0/0x30
[17023.039519]  [<ffffffff805b69cd>] __svc_create_thread+0xdd/0x200
[17023.039519]  [<ffffffff80317840>] nfs_callback_svc+0x0/0x130
[17023.039519]  [<ffffffff8020c4ae>] child_rip+0x0/0x12
[17023.039519]
[17023.039519]
[17023.039519] Code: 0f 0b eb fe 0f 1f 84 00 00 00 00 00 55 48 8b 16 48 89 e5 e8
[17023.039519] RIP  [<ffffffff803bd834>] __list_add+0x54/0x60
[17023.039519]  RSP <ffff8101002c9dc0>
[17023.039524] ---[ end trace a9257b24a4b10968 ]---
[17023.041451] Kernel panic - not syncing: Aiee, killing interrupt handler!

I also wonder if this really is only one bug, or two. (One in skb
handling and one in svc_xprt_enqueue's list code)

Summary of what I think is related to this:

* 2.6.24-rc2-mm1 might not have it, but had a bug in the nfsv4 sillyrenames.
* 2.6.24-rc3-mm1 did not work for me at all (IO-APIC problem)
* 2.6.24-rc3-mm2 has the bug
  - I have seen a crash in ether1394_complete_cb
  - I have seen 3 crashes in svc_xprt_enqueue, two with slub_debug=FZP
  - I have seen a crash in tcp_send_ack -> __alloc_skb
  - patched with svc_xprt_received(&svsk->sk_xprt); removed from
svc_create_socket()
    -> still crashed in svc_xprt_enqueue
  - patched "skb_release_all(dst);" back to "skb_release_data(dst);"
in skb_morph() (net/core/skbuff.c).
    -> seemed to work (200+ packages compiled+installed)
* 2.6.24-rc4-mm1 and -rc5-mm1: not tried, was still testing -rc3-mm2
* 2.6.24-rc6 did not crash, but I did not test it for too long
* 2.6.24-rc6-mm1 has the bug
  - I have seen a crash in skb_release_data with vanilla -mm1
  - patched "skb_release_all(dst);" back to "skb_release_data(dst);" in
skb_morph() (net/core/skbuff.c).
    -> crashed in xs_tcp_data_recv, skb_morph() was not called once
  - patched with notfreed-hack
    -> crashed in svc_xprt_enqueue, skb_morph() was not called once

I will see if I have the time to try git-nfsd, but as I'm not able to
trigger the bug reliable, I will not be able to really declare any
tree unrelated...

Torsten

My skb-double-free-hack: I added a new __u8 field to sk_buff after its
cb field and the following warnings:
--- skbuff.c~   2008-01-03 05:29:32.092408637 +0100
+++ skbuff.c    2008-01-02 23:00:14.114535678 +0100
@@ -205,6 +205,7 @@ struct sk_buff *__alloc_skb(unsigned int
        memset(skb, 0, offsetof(struct sk_buff, tail));
        skb->truesize = size + sizeof(struct sk_buff);
        atomic_set(&skb->users, 1);
+       skb->notfreed=1;
        skb->head = data;
        skb->data = data;
        skb_reset_tail_pointer(skb);
@@ -317,13 +318,18 @@ static void kfree_skbmem(struct sk_buff

        switch (skb->fclone) {
        case SKB_FCLONE_UNAVAILABLE:
+               WARN_ON(!skb->notfreed);
+               skb->notfreed=0;
                kmem_cache_free(skbuff_head_cache, skb);
                break;

        case SKB_FCLONE_ORIG:
                fclone_ref = (atomic_t *) (skb + 2);
-               if (atomic_dec_and_test(fclone_ref))
+               if (atomic_dec_and_test(fclone_ref)) {
+                 WARN_ON(!skb->notfreed);
+                 skb->notfreed=0;
                        kmem_cache_free(skbuff_fclone_cache, skb);
+                }
                break;

        case SKB_FCLONE_CLONE:
@@ -335,8 +341,11 @@ static void kfree_skbmem(struct sk_buff
                 */
                skb->fclone = SKB_FCLONE_UNAVAILABLE;

-               if (atomic_dec_and_test(fclone_ref))
+               if (atomic_dec_and_test(fclone_ref)) {
+                 WARN_ON(!other->notfreed);
+                 other->notfreed=0;
                        kmem_cache_free(skbuff_fclone_cache, other);
+                }
                break;
        }
 }

^ permalink raw reply

* Re: [PATCH] iproute2: add synonyms for ip rule options to ip(8) manpage.
From: Stephen Hemminger @ 2008-01-03  0:32 UTC (permalink / raw)
  To: Andreas Henriksson; +Cc: netdev
In-Reply-To: <1199317752.16358.57.camel@localhost.localdomain>

On Thu, 03 Jan 2008 00:49:11 +0100
Andreas Henriksson <andreas@fatal.se> wrote:

> commit 355e1438f73602fbac029d28891dfa889471b679
> Author: Andreas Henriksson <andreas@fatal.se>
> Date:   Wed Jan 2 23:21:59 2008 +0100
> 
>     Add synonyms for ip rule options to ip(8) manpage.
>     
>     See:
>     http://bugs.debian.org/433507
>     http://bugs.debian.org/213673
> 
> diff --git a/man/man8/ip.8 b/man/man8/ip.8
> index c74440a..1a57a42 100644
> --- a/man/man8/ip.8
> +++ b/man/man8/ip.8
> @@ -1568,10 +1568,12 @@ the priority of this rule.  Each rule should have an explicitly
>  set
>  .I unique
>  priority value.
> +The options preference and order are synonyms with priority.
>  
>  .TP
>  .BI table " TABLEID"
>  the routing table identifier to lookup if the rule selector matches.
> +It is also possible to use lookup instead of table.
>  
>  .TP
>  .BI realms " FROM/TO"
> @@ -1589,6 +1591,7 @@ may be either the start of the block of NAT addresses (selected by NAT
>  routes) or a local host address (or even zero).
>  In the last case the router does not translate the packets, but
>  masquerades them to this address.
> +Using map-to instead of nat means the same thing.
>  
>  .B Warning:
>  Changes to the RPDB made with these commands do not become active
> @@ -1601,6 +1604,7 @@ This command has no arguments.
>  
>  .SS ip rule show - list rules
>  This command has no arguments.
> +The options list or lst are synonyms with show.
>  
>  .SH ip maddress - multicast addresses management
>  
> 
> 

applied both.

-- 
Stephen Hemminger <stephen.hemminger@vyatta.com>

^ permalink raw reply

* [PATCH] iproute2: add synonyms for ip rule options to ip(8) manpage.
From: Andreas Henriksson @ 2008-01-02 23:49 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev
In-Reply-To: <1199317650.16358.54.camel@localhost.localdomain>

commit 355e1438f73602fbac029d28891dfa889471b679
Author: Andreas Henriksson <andreas@fatal.se>
Date:   Wed Jan 2 23:21:59 2008 +0100

    Add synonyms for ip rule options to ip(8) manpage.
    
    See:
    http://bugs.debian.org/433507
    http://bugs.debian.org/213673

diff --git a/man/man8/ip.8 b/man/man8/ip.8
index c74440a..1a57a42 100644
--- a/man/man8/ip.8
+++ b/man/man8/ip.8
@@ -1568,10 +1568,12 @@ the priority of this rule.  Each rule should have an explicitly
 set
 .I unique
 priority value.
+The options preference and order are synonyms with priority.
 
 .TP
 .BI table " TABLEID"
 the routing table identifier to lookup if the rule selector matches.
+It is also possible to use lookup instead of table.
 
 .TP
 .BI realms " FROM/TO"
@@ -1589,6 +1591,7 @@ may be either the start of the block of NAT addresses (selected by NAT
 routes) or a local host address (or even zero).
 In the last case the router does not translate the packets, but
 masquerades them to this address.
+Using map-to instead of nat means the same thing.
 
 .B Warning:
 Changes to the RPDB made with these commands do not become active
@@ -1601,6 +1604,7 @@ This command has no arguments.
 
 .SS ip rule show - list rules
 This command has no arguments.
+The options list or lst are synonyms with show.
 
 .SH ip maddress - multicast addresses management
 



^ permalink raw reply related

* [PATCH] iproute2: revert syntax help text mistake.
From: Andreas Henriksson @ 2008-01-02 23:47 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: netdev

Sorry. The pref and prio options are actually synonyms.
This reverts part of commit 576c63ee59de3f18bec4ebd8181a5a395f268d03.

Signed-off-by: Andreas Henriksson <andreas@fatal.se>

--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -38,7 +38,7 @@ static void usage(void)
 {
 	fprintf(stderr, "Usage: ip rule [ list | add | del | flush ] SELECTOR ACTION\n");
 	fprintf(stderr, "SELECTOR := [ not ] [ from PREFIX ] [ to PREFIX ] [ tos TOS ] [ fwmark FWMARK[/MASK] ]\n");
-	fprintf(stderr, "            [ dev STRING ] [ pref NUMBER ] [ prio NUMBER ]\n");
+	fprintf(stderr, "            [ dev STRING ] [ pref NUMBER ]\n");
 	fprintf(stderr, "ACTION := [ table TABLE_ID ]\n");
 	fprintf(stderr, "          [ prohibit | reject | unreachable ]\n");
 	fprintf(stderr, "          [ realms [SRCREALM/]DSTREALM ]\n");



^ permalink raw reply

* Re: forcedeth: MAC-address reversed on resume from suspend
From: Adrian Bunk @ 2008-01-02 23:42 UTC (permalink / raw)
  To: Andreas Mohr; +Cc: Richard Jonsson, linux-kernel, Ayaz Abdulla, jgarzik, netdev
In-Reply-To: <20080102214843.GA19224@rhlx01.hs-esslingen.de>

[ original bug report: http://lkml.org/lkml/2008/1/2/253 ]

On Wed, Jan 02, 2008 at 10:48:43PM +0100, Andreas Mohr wrote:
> Hi,
> 
> On Wed, Jan 02, 2008 at 10:04:49PM +0100, Richard Jonsson wrote:
> > Bugreport regarding forcedeth driver.
> >
> > When returning from suspend-to-RAM the MAC-address byteorder is reversed. 
> > After another suspend-resume cycle the MAC-address is again correct. This 
> > brings a great deal of pain since the NIC is assigned a random MAC-address 
> > when it is detected as invalid.
> >
> > I cannot say if this issue appeared recently or if it's been in the kernel 
> > for a while, as I've been using wireless until recently.
> >
> > I read somewhere on lkml that the mac is read from the device, then 
> > reversed and finally written back to the device. Can it be that it is read 
> > again on resume and reversed, and then again written to the device? This 
> > would explain why it's ok every other resume cycle.
> 
> Uh, Use The Source, Luke?
> 
> But no, I think it's simply driver dainbreadness, not a matter of
> complicated writing and reading back in reversed order.
> 
> drivers/net/forcedeth.c has a nice DEV_HAS_CORRECT_MACADDR flag which is
> being enabled for certain cards (those which need this) and disabled for others.
> 
> The nv_probe() function then nicely obeys this flag by reversing the
> address if needed, _however_ there's another function, nv_copy_mac_to_hw(),
> which does _NOT_ obey it and simply copies the _raw_ netdev dev_addr
> to the card register (NvRegMacAddrA etc.).
> 
> I don't know, this all looks a bit dirty to me, MAC reading/writing
> should have been implemented in a more central way, then those people
> wouldn't have confused heaven and hell with MAC address fiddling.
> 
> And yeah, this certainly looks like a bug that should be fixed ASAP,
> unless my short analysis somehow happened to be wrong.
> (I could probably fix it, but then the forcedeth people
> most likely know better how they would like to see it implemented)
> 
> Thank you for your report!
> 
> Now the only thing remaining would be: is there a specific way to
> contact forcedeth-related people? I didn't find anything within a short
> search.

Cc's added.

> Andreas Mohr

cu
Adrian

-- 

       "Is there not promise of rain?" Ling Tan asked suddenly out
        of the darkness. There had been need of rain for many days.
       "Only a promise," Lao Er said.
                                       Pearl S. Buck - Dragon Seed


^ permalink raw reply

* [PATCH take2] Re: Nested VLAN causes recursive locking error
From: Jarek Poplawski @ 2008-01-02 23:41 UTC (permalink / raw)
  To: Benny Amorsen; +Cc: Patrick McHardy, Chuck Ebbert, netdev
In-Reply-To: <m3r6h0dvtz.fsf@ursa.amorsen.dk>

Benny Amorsen wrote, On 01/02/2008 05:40 PM:
...
> The box that shows the problem is "almost-production", so it doesn't
> have a build system. Is there a chance you could get the patch into
> Rawhide or some other testing repository? It takes forever and a lot
> of disk space to rebuild a kernel RPM, so if I could get someone else
> to do the hard work, that would be lovely...
> 
> If not, I'll probably find the time to do the RPM rebuild sometime
> within the next week or so.

...I guess this is to Chuck?

As a matter of fact I started to doubt it's a real problem: 2 vlan
headers in the row - is it working?

Anyway, as Patrick pointed, the previous patch was a bit buggy, and
deeper nesting needs a little more (if it's can work too...). So,
here is something minimal.

Patrick, if you think about something else, then of course don't care
about this patch.

Regards,
Jarek P.

------------------> (take 2)

Subject: [PATCH] nested VLAN: fix lockdep's recursive locking warning

Allow vlans nesting other vlans without lockdep's warnings (max. 2 levels
i.e. parent + child). Thanks to Patrick McHardy for pointing a bug in the
first version of this patch.

Reported-by: Benny Amorsen
Tested-by: Benny Amorsen	(?) STILL NEEDS TESTING!

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>

---

diff -Nurp 2.6.24-rc6-mm1-/net/8021q/vlan.c 2.6.24-rc6-mm1+/net/8021q/vlan.c
--- 2.6.24-rc6-mm1-/net/8021q/vlan.c	2007-12-23 14:55:38.000000000 +0100
+++ 2.6.24-rc6-mm1+/net/8021q/vlan.c	2008-01-02 23:50:19.000000000 +0100
@@ -323,6 +323,7 @@ static const struct header_ops vlan_head
 static int vlan_dev_init(struct net_device *dev)
 {
 	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
+	int subclass = 0;
 
 	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
 	dev->flags  = real_dev->flags & ~IFF_UP;
@@ -349,7 +350,11 @@ static int vlan_dev_init(struct net_devi
 		dev->hard_start_xmit = vlan_dev_hard_start_xmit;
 	}
 
-	lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
+	if (real_dev->priv_flags & IFF_802_1Q_VLAN)
+		subclass = 1;
+
+	lockdep_set_class_and_subclass(&dev->_xmit_lock,
+				&vlan_netdev_xmit_lock_key, subclass);
 	return 0;
 }
 

^ permalink raw reply

* Re: delay via-rhine irq initialisation.
From: Chuck Ebbert @ 2008-01-02 23:34 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Dave Jones, netdev, Andrew Morton
In-Reply-To: <476DD7CC.9090507@garzik.org>

On 12/22/2007 10:36 PM, Jeff Garzik wrote:
> Dave Jones wrote:
>> With CONFIG_DEBUG_SHIRQ set, via-rhine complains during init.
>> (See https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=377721 for a
>> report).
> 
> Absolutely we want to do this.  DEBUG_SHIRQ is only one of many reasons
> -- quite simply, you are fixing an order-of-init bug that leads to races
> and other badness.
> 

Such bugs seem to be more common than was thought.  hisax_fcpcipnp has the
same problem -- it assigns function pointers for driver interrupt processing
after it requests an interrupt.

^ permalink raw reply

* Re: [patch 7/9][NETNS][IPV6] make mld_max_msf per namespace
From: David Stevens @ 2008-01-02 23:31 UTC (permalink / raw)
  To: Daniel Lezcano; +Cc: davem, netdev, netdev-owner
In-Reply-To: <20080102122827.779444823@localhost.localdomain>

Daniel,
        I'm not sure what benefit you get from making this per-namespace.
The point of it is really to prevent one (non-root, even) application from
killing machine performance with source filters (because maintaining them
is an n^2 algorithm). It's a weak constraint, but the resources it's 
protecting are
the processor and MLDv2 packet counts. If any one namespace has a
large value, all will have a problem still, and  (even without your 
patch),
lots of separate source filters can still cause a problem. What it catches
is one application creating thousands (or millions) of source filters and
killing the machine and network with MLDv2 reports as a result. Why
shouldn't that remain global?

                                                +-DLS


^ permalink raw reply

* Re: [PATCH 2.6.23.12] net/bonding: option to specify initial bond interface number
From: Jay Vosburgh @ 2008-01-02 22:24 UTC (permalink / raw)
  To: Jari Takkala; +Cc: netdev
In-Reply-To: <413FEEF1743111439393FB76D0221E4809790942@leopard.zoo.q9networks.com>

Jari Takkala <Jari.Takkala@Q9.com> wrote:

>Allow the user to specify an initial interface number when loading the bonding driver. This is useful when loading multiple instances of the bonding driver and you want to control the interface number assignment. For example, if the user wishes to create a bond5 interface they can type 'modprobe -o bond5 bonding ifnum=5'. It also works with the max_bonds option.

	What advantage does this have over:

# echo +bond5 > /sys/class/net/bonding_masters

	which will create a new bonding master for the already-loaded
driver?

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com

^ permalink raw reply

* Re: 2.6.24-rc6-mm1
From: J. Bruce Fields @ 2008-01-02 21:57 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Torsten Kaiser, Andrew Morton, linux-kernel, Neil Brown, netdev,
	Tom Tucker
In-Reply-To: <20080102215154.GA13911@gondor.apana.org.au>

On Thu, Jan 03, 2008 at 08:51:54AM +1100, Herbert Xu wrote:
> On Wed, Jan 02, 2008 at 07:29:59PM +0100, Torsten Kaiser wrote:
> >
> > Vanilla 2.6.24-rc6 seems stable. I did not see any crash or warnings.
> 
> OK that's great.  The next step would be to try excluding specific git
> trees from mm to see if they make a difference.
> 
> The two specific trees of interest would be git-nfsd and git-net.

Also, if it's git-nfsd, it'd be useful to test with the current git-nfsd
from the for-mm branch at:

	git://linux-nfs.org/~bfields/linus.git for-mm

and then any bisection results (even partial) from that tree would help
immensely....

--b.

^ permalink raw reply

* Re: [PATCH 2.6.23.12] net/bonding: option to specify initial bond interface number
From: Randy Dunlap @ 2008-01-02 21:55 UTC (permalink / raw)
  To: Jari Takkala; +Cc: netdev
In-Reply-To: <413FEEF1743111439393FB76D0221E4809790942@leopard.zoo.q9networks.com>

On Wed, 2 Jan 2008 16:20:48 -0500 Jari Takkala wrote:

> Allow the user to specify an initial interface number when loading the bonding driver. This is useful when loading multiple instances of the bonding driver and you want to control the interface number assignment. For example, if the user wishes to create a bond5 interface they can type 'modprobe -o bond5 bonding ifnum=5'. It also works with the max_bonds option.
> 
> Signed-off-by: Jari Takkala <jari.takkala@q9.com>
> ---
> diff -ruN linux-2.6.23.12.orig/drivers/net/bonding/bond_main.c linux-2.6.23.12/drivers/net/bonding/bond_main.c
> --- linux-2.6.23.12.orig/drivers/net/bonding/bond_main.c	2007-12-18 16:55:57.000000000 -0500
> +++ linux-2.6.23.12/drivers/net/bonding/bond_main.c	2008-01-02 14:49:37.000000000 -0500
> @@ -85,6 +85,7 @@
>  #define BOND_LINK_MON_INTERV	0
>  #define BOND_LINK_ARP_INTERV	0
>  
> +static int ifnum	= BOND_DEFAULT_IFNUM;
>  static int max_bonds	= BOND_DEFAULT_MAX_BONDS;
>  static int miimon	= BOND_LINK_MON_INTERV;
>  static int updelay	= 0;
> @@ -99,6 +100,8 @@
>  static char *arp_validate = NULL;
>  struct bond_params bonding_defaults;
>  
> +module_param(ifnum, int, 0);

You could (should) make <ifnum> be unsigned int and then use
module_param(ifnum, uint, 0);
and then ...

> +MODULE_PARM_DESC(ifnum, "Initial interface number to assign");
>  module_param(max_bonds, int, 0);
>  MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
>  module_param(miimon, int, 0);
> @@ -4388,6 +4391,14 @@
>  		}
>  	}

then this block is mostly useless since ifnum cannot be < 0.
And how could it ever be > INT_MAX (when ifnum was an int)?

If <ifnum> is unsigned int but you want to limit it to INT_MAX,
then half of this if-test would be OK.

> +	if (ifnum < 0 || ifnum > INT_MAX) {
> +		printk(KERN_WARNING DRV_NAME
> +		       ": Warning: ifnum (%d) not in range %d-%d, so it "
> +		       "was reset to BOND_DEFAULT_IFNUM (%d)\n",
> +		       ifnum, 0, INT_MAX, BOND_DEFAULT_IFNUM);
> +		ifnum = BOND_DEFAULT_IFNUM;
> +	}
> +
>  	if (max_bonds < 1 || max_bonds > INT_MAX) {
>  		printk(KERN_WARNING DRV_NAME
>  		       ": Warning: max_bonds (%d) not in range %d-%d, so it "


---
~Randy
desserts:  http://www.xenotime.net/linux/recipes/

^ permalink raw reply

* Re: 2.6.24-rc6-mm1
From: Herbert Xu @ 2008-01-02 21:51 UTC (permalink / raw)
  To: Torsten Kaiser
  Cc: Andrew Morton, linux-kernel, Neil Brown, J. Bruce Fields, netdev,
	Tom Tucker
In-Reply-To: <64bb37e0801021029u1735bb9eta53906b123abf577@mail.gmail.com>

On Wed, Jan 02, 2008 at 07:29:59PM +0100, Torsten Kaiser wrote:
>
> Vanilla 2.6.24-rc6 seems stable. I did not see any crash or warnings.

OK that's great.  The next step would be to try excluding specific git
trees from mm to see if they make a difference.

The two specific trees of interest would be git-nfsd and git-net.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCH 2.6.23.12] net/bonding: option to specify initial bond interface number
From: Jari Takkala @ 2008-01-02 21:20 UTC (permalink / raw)
  To: netdev

Allow the user to specify an initial interface number when loading the bonding driver. This is useful when loading multiple instances of the bonding driver and you want to control the interface number assignment. For example, if the user wishes to create a bond5 interface they can type 'modprobe -o bond5 bonding ifnum=5'. It also works with the max_bonds option.

Signed-off-by: Jari Takkala <jari.takkala@q9.com>
---
diff -ruN linux-2.6.23.12.orig/drivers/net/bonding/bond_main.c linux-2.6.23.12/drivers/net/bonding/bond_main.c
--- linux-2.6.23.12.orig/drivers/net/bonding/bond_main.c	2007-12-18 16:55:57.000000000 -0500
+++ linux-2.6.23.12/drivers/net/bonding/bond_main.c	2008-01-02 14:49:37.000000000 -0500
@@ -85,6 +85,7 @@
 #define BOND_LINK_MON_INTERV	0
 #define BOND_LINK_ARP_INTERV	0
 
+static int ifnum	= BOND_DEFAULT_IFNUM;
 static int max_bonds	= BOND_DEFAULT_MAX_BONDS;
 static int miimon	= BOND_LINK_MON_INTERV;
 static int updelay	= 0;
@@ -99,6 +100,8 @@
 static char *arp_validate = NULL;
 struct bond_params bonding_defaults;
 
+module_param(ifnum, int, 0);
+MODULE_PARM_DESC(ifnum, "Initial interface number to assign");
 module_param(max_bonds, int, 0);
 MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
 module_param(miimon, int, 0);
@@ -4388,6 +4391,14 @@
 		}
 	}
 
+	if (ifnum < 0 || ifnum > INT_MAX) {
+		printk(KERN_WARNING DRV_NAME
+		       ": Warning: ifnum (%d) not in range %d-%d, so it "
+		       "was reset to BOND_DEFAULT_IFNUM (%d)\n",
+		       ifnum, 0, INT_MAX, BOND_DEFAULT_IFNUM);
+		ifnum = BOND_DEFAULT_IFNUM;
+	}
+
 	if (max_bonds < 1 || max_bonds > INT_MAX) {
 		printk(KERN_WARNING DRV_NAME
 		       ": Warning: max_bonds (%d) not in range %d-%d, so it "
@@ -4703,6 +4714,7 @@
 {
 	int i;
 	int res;
+	char ifname[16];
 
 	printk(KERN_INFO "%s", version);
 
@@ -4715,7 +4727,13 @@
 	bond_create_proc_dir();
 #endif
 	for (i = 0; i < max_bonds; i++) {
-		res = bond_create(NULL, &bonding_defaults, NULL);
+		if (ifnum != BOND_DEFAULT_IFNUM) {
+			ifnum = ifnum + i;
+			snprintf(ifname, sizeof(ifname) - 1, "bond%d", ifnum);
+			res = bond_create(ifname, &bonding_defaults, NULL);
+		} else {
+			res = bond_create(NULL, &bonding_defaults, NULL);
+		}
 		if (res)
 			goto err;
 	}
diff -ruN linux-2.6.23.12.orig/include/linux/if_bonding.h linux-2.6.23.12/include/linux/if_bonding.h
--- linux-2.6.23.12.orig/include/linux/if_bonding.h	2007-12-18 16:55:57.000000000 -0500
+++ linux-2.6.23.12/include/linux/if_bonding.h	2008-01-02 13:30:18.000000000 -0500
@@ -81,6 +81,7 @@
 #define BOND_STATE_ACTIVE       0   /* link is active */
 #define BOND_STATE_BACKUP       1   /* link is backup */
 
+#define BOND_DEFAULT_IFNUM	0   /* Default interface number */
 #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
 
 /* hashing types */


^ permalink raw reply

* [PATCH net-2.6.25] [IPV4] Remove unsupported DNAT (RTCF_NAT and RTCF_NAT) in IPV4
From: Rami Rosen @ 2008-01-02 21:31 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

[-- Attachment #1: Type: text/plain, Size: 332 bytes --]

Hi,

- The DNAT (Destination NAT) is not implemented in IPV4.

- This patch remove the code which checks these flags
in net/ipv4/arp.c and net/ipv4/route.c.

The RTCF_NAT and RTCF_NAT should stay in the header (linux/in_route.h)
because they are used in DECnet.

Regards,
Rami Rosen


Signed-off-by: Rami Rosen <ramirose@gmail.com>

[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 1496 bytes --]

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 1416bc9..2b72ba6 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -835,9 +835,8 @@ static int arp_process(struct sk_buff *skb)
 			}
 			goto out;
 		} else if (IN_DEV_FORWARD(in_dev)) {
-			if ((rt->rt_flags&RTCF_DNAT) ||
-			    (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
-			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0)))) {
+			    if (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
+			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0))) {
 				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
 				if (n)
 					neigh_release(n);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 10915bb..ee9c93c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1768,7 +1768,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
 	if (err)
 		flags |= RTCF_DIRECTSRC;
 
-	if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) &&
+	if (out_dev == in_dev && err && !(flags & RTCF_MASQ) &&
 	    (IN_DEV_SHARED_MEDIA(out_dev) ||
 	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
 		flags |= RTCF_DOREDIRECT;
@@ -1777,7 +1777,7 @@ static inline int __mkroute_input(struct sk_buff *skb,
 		/* Not IP (i.e. ARP). Do not create route, if it is
 		 * invalid for proxy arp. DNAT routes are always valid.
 		 */
-		if (out_dev == in_dev && !(flags & RTCF_DNAT)) {
+		if (out_dev == in_dev) {
 			err = -EINVAL;
 			goto cleanup;
 		}

^ permalink raw reply related

* Re: [PATCH 6/8] [PATCH] Split up rndis_host.c
From: Jussi Kivilinna @ 2008-01-02 20:13 UTC (permalink / raw)
  To: David Brownell
  Cc: bjd-a1rhEgazXTw, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20071223011707.1798C2360C6-ZcXrCSuhvln6VZ3dlLfH/g4gEjPzgfUyLrfjE7I9kuVHxeISYlDBzl6hYfS7NtTn@public.gmane.org>

Hello,

Bjorge was not comfortable with double probing rndis_wext requires,
wireless RNDIS devices have same device id as the rest of RNDIS. Our
module version checks OID_GEN_PHYSICAL_MEDIUM in generic_rndis_bind,
with rndis_host bind fails if OID is supported and wireless media type
is returned, with rndis_wext if OID isn't supported or type isn't
wireless. Should this be ok?

Should separate rndis_wext be located in drivers/net/wireless instead of
drivers/net/usb?

 - Jussi Kivilinna

On Sat, 2007-12-22 at 17:17 -0800, David Brownell wrote:
> > From: Bjorge Dijkstra <bjd-a1rhEgazXTw@public.gmane.org>
> > Subject: [PATCH 6/8] [PATCH] Split up rndis_host.c
> > Date: Sat, 22 Dec 2007 22:51:32 +0100
> >
> > Split up rndis_host.c into rndis_host.h and rndis_base.c and
> > change Makefile accordingly. This is done so we can add extra
> > source files to the rndis_host module later on.
> 
> I'm fine with splitting out a header file and the EXPORT_SYMBOL_GPL.
> But why not just have a separate "rndis_wext" module?
> 
> 
> > ---
> >  drivers/net/usb/Makefile     |    1 +
> >  drivers/net/usb/rndis_base.c |  548 ++++++++++++++++++++++++++++++
> >  drivers/net/usb/rndis_host.c |  763 ------------------------------------------
> >  drivers/net/usb/rndis_host.h |  256 ++++++++++++++
> >  4 files changed, 805 insertions(+), 763 deletions(-)
> >  create mode 100644 drivers/net/usb/rndis_base.c
> >  delete mode 100644 drivers/net/usb/rndis_host.c
> >  create mode 100644 drivers/net/usb/rndis_host.h
> -
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* Re: [usb regression] Re: [PATCH 2.6.24-rc3] Fix /proc/net breakage
From: David Brownell @ 2008-01-02 18:48 UTC (permalink / raw)
  To: Alan Stern
  Cc: Greg KH, Andreas Mohr, Ingo Molnar, Alexey Dobriyan,
	Andrew Morton, David Woodhouse, Eric W. Biederman, Linus Torvalds,
	Rafael J. Wysocki, Pavel Machek, kernel list, netdev,
	Pavel Emelyanov, Denis V. Lunev, USB list
In-Reply-To: <Pine.LNX.4.44L0.0801021052470.4861-100000-IYeN2dnnYyZXsRXLowluHWD2FQJk+8+b@public.gmane.org>

On Wednesday 02 January 2008, Alan Stern wrote:
> 	 BTW, I don't recall ever seeing Tony's patch announced on
> linux-usb or linux-usb-devel.  Did I simply miss it?

I think he didn't post it.  I got some questions from him at
one point, which I answered, but as I recall he decided for
some reason to stop that work.

-
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: 2.6.24-rc6-mm1
From: Torsten Kaiser @ 2008-01-02 18:29 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Andrew Morton, linux-kernel, Neil Brown, J. Bruce Fields, netdev,
	Tom Tucker
In-Reply-To: <20080101120406.GA27209@gondor.apana.org.au>

On Jan 1, 2008 1:04 PM, Herbert Xu <herbert@gondor.apana.org.au> wrote:
> In any case, I suspect the cause of your problem is that somebody
> somewhere is doing a double-free on an skb.
>
> Since you're the only person who can reproduce this, we really need
> your help to track this down.  Since bisecting the mm tree is not
> practical, you could start by checking whether the bug is in mm only
> or whether it affects rc6 too.

Vanilla 2.6.24-rc6 seems stable. I did not see any crash or warnings.

Torsten

^ permalink raw reply

* Re: [klibc] [patch] import socket defines
From: H. Peter Anvin @ 2008-01-02 18:09 UTC (permalink / raw)
  To: Mike Frysinger; +Cc: Netdev List, klibc list
In-Reply-To: <200801020830.43449.vapier@gentoo.org>

[-- Attachment #1: Type: text/plain, Size: 912 bytes --]

Mike Frysinger wrote:
> On Tuesday 01 January 2008, H. Peter Anvin wrote:
>> Mike Frysinger wrote:
>>> The kernel __GLIBC__ hacks were re-added so as to appease klibc people,
>>> but the klibc people didnt actually fix the problem on their side.  This
>>> patch imports the structures/defines that klibc seems to need.  Build
>>> tested on x86_64 (i dont actually use klibc so no idea how to test it).
>> The whole point was to NOT need to replicate all these structures and
>> constants, which are part of the ABI, in klibc...
> 
> then figure out a way that doesnt make the kernel headers blow for everyone 
> else out there.  change the __GLIBC__ crap to __KLIBC__ or something.

Seems the most logical thing to do would be to break out the small 
portion that everyone wants into <linux/sockaddr.h> or somesuch, and 
then remove those ifdefs entirely.

Proposed patch (still being tested) attached...

	-hpa

[-- Attachment #2: 0001-linux-socket.h-break-out-glibc-portions-into-l.patch --]
[-- Type: text/x-patch, Size: 3780 bytes --]

>From 727c56ac213bdaedb9247c442375a5979686acf5 Mon Sep 17 00:00:00 2001
From: H. Peter Anvin <hpa@zytor.com>
Date: Wed, 2 Jan 2008 10:08:16 -0800
Subject: [PATCH] <linux/socket.h>: break out "glibc" portions into <linux/sockaddr.h>

Some userspaces (e.g. klibc) want to be able to use the full set of
ABI constants in <linux/socket.h>, others (e.g. glibc) do not, and
rather want just the bare minimum to build a kernel-valid
sockaddr... but apparently they want that much.  This is currently
keyed on the existence of __GLIBC__, which is clearly wrong.

This patch breaks out the "bare minimum" into <linux/sockaddr.h> for
the userspaces who want to do it themselves, and eliminates the
ifdefs completely.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/Kbuild     |    1 +
 include/linux/sockaddr.h |   19 +++++++++++++++++++
 include/linux/socket.h   |   21 ++-------------------
 3 files changed, 22 insertions(+), 19 deletions(-)
 create mode 100644 include/linux/sockaddr.h

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index f30fa92..f8bbb31 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -139,6 +139,7 @@ header-y += rose.h
 header-y += serial_reg.h
 header-y += smbno.h
 header-y += snmp.h
+header-y += sockaddr.h
 header-y += sockios.h
 header-y += som.h
 header-y += sound.h
diff --git a/include/linux/sockaddr.h b/include/linux/sockaddr.h
new file mode 100644
index 0000000..f182083
--- /dev/null
+++ b/include/linux/sockaddr.h
@@ -0,0 +1,19 @@
+#ifndef _KERNEL_SOCKADDR_H
+#define _KERNEL_SOCKADDR_H
+
+/*
+ * Desired design of maximum size and alignment (see RFC2553)
+ */
+#define _K_SS_MAXSIZE	128	/* Implementation specific max size */
+#define _K_SS_ALIGNSIZE	(__alignof__ (struct sockaddr *))
+				/* Implementation specific desired alignment */
+
+struct __kernel_sockaddr_storage {
+	unsigned short	ss_family;		/* address family */
+	/* Following field(s) are implementation specific */
+	char		__data[_K_SS_MAXSIZE - sizeof(unsigned short)];
+				/* space to achieve desired size, */
+				/* _SS_MAXSIZE value minus size of ss_family */
+} __attribute__ ((aligned(_K_SS_ALIGNSIZE)));	/* force desired alignment */
+
+#endif /* _KERNEL_SOCKADDR_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index c22ef1c..9cd6edc 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -1,23 +1,7 @@
 #ifndef _LINUX_SOCKET_H
 #define _LINUX_SOCKET_H
 
-/*
- * Desired design of maximum size and alignment (see RFC2553)
- */
-#define _K_SS_MAXSIZE	128	/* Implementation specific max size */
-#define _K_SS_ALIGNSIZE	(__alignof__ (struct sockaddr *))
-				/* Implementation specific desired alignment */
-
-struct __kernel_sockaddr_storage {
-	unsigned short	ss_family;		/* address family */
-	/* Following field(s) are implementation specific */
-	char		__data[_K_SS_MAXSIZE - sizeof(unsigned short)];
-				/* space to achieve desired size, */
-				/* _SS_MAXSIZE value minus size of ss_family */
-} __attribute__ ((aligned(_K_SS_ALIGNSIZE)));	/* force desired alignment */
-
-#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
-
+#include <linux/sockaddr.h>
 #include <asm/socket.h>			/* arch-dependent defines	*/
 #include <linux/sockios.h>		/* the SIOCxxx I/O controls	*/
 #include <linux/uio.h>			/* iovec support		*/
@@ -310,7 +294,6 @@ extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len);
 extern int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen);
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+#endif /* __KERNEL__ */
 
-#endif
-#endif /* not kernel and not glibc */
 #endif /* _LINUX_SOCKET_H */
-- 
1.5.3.6


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox