Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH] dev: use name hash for dev_seq_ops
From: Mihai Maruseac @ 2011-10-21  6:45 UTC (permalink / raw)
  To: davem, shemminger, eric.dumazet
  Cc: mirq-linux, therbert, jpirko, netdev, linux-kernel, dbaluta,
	Mihai Maruseac
In-Reply-To: <1319097717-14910-1-git-send-email-mmaruseac@ixiacom.com>

Instead of using the dev->next chain and trying to resync at each call to
dev_seq_start, use the name hash, keeping the bucket and the offset in
seq->private field.

Tests revealed the following results for ifconfig > /dev/null
	* 1000 interfaces:
		* 0.114s without patch
		* 0.089s with patch
	* 3000 interfaces:
		* 0.489s without patch
		* 0.110s with patch
	* 5000 interfaces:
		* 1.363s without patch
		* 0.250s with patch
	* 128000 interfaces (other setup):
		* ~100s without patch
		* ~30s with patch

Signed-off-by: Mihai Maruseac <mmaruseac@ixiacom.com>
---
 net/core/dev.c |   84 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 69 insertions(+), 15 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 70ecb86..6edbcc5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4041,6 +4041,60 @@ static int dev_ifconf(struct net *net, char __user *arg)
 }
 
 #ifdef CONFIG_PROC_FS
+
+#define BUCKET_SPACE (32 - NETDEV_HASHBITS)
+
+struct dev_iter_state {
+	struct seq_net_private p;
+	unsigned int pos; /* bucket << BUCKET_SPACE + offset */
+};
+
+#define get_bucket(x) ((x) >> BUCKET_SPACE)
+#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
+#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
+
+static inline struct net_device *dev_from_same_bucket(struct seq_file *seq)
+{
+	struct dev_iter_state *state = seq->private;
+	struct net *net = seq_file_net(seq);
+	struct net_device *dev;
+	struct hlist_node *p;
+	struct hlist_head *h;
+	unsigned int count, bucket, offset;
+
+	bucket = get_bucket(state->pos);
+	offset = get_offset(state->pos);
+	h = &net->dev_name_head[bucket];
+	count = 0;
+	hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
+		if (count++ == offset) {
+			state->pos = set_bucket_offset(bucket, count);
+			return dev;
+		}
+	}
+
+	return NULL;
+}
+
+static inline struct net_device *dev_from_new_bucket(struct seq_file *seq)
+{
+	struct dev_iter_state *state = seq->private;
+	struct net_device *dev;
+	unsigned int bucket;
+
+	bucket = get_bucket(state->pos);
+	do {
+		dev = dev_from_same_bucket(seq);
+		if (dev)
+			return dev;
+
+		bucket++;
+		state->pos = set_bucket_offset(bucket, 0);
+	} while (bucket < NETDEV_HASHENTRIES);
+
+	return NULL;
+}
+
 /*
  *	This is invoked by the /proc filesystem handler to display a device
  *	in detail.
@@ -4048,33 +4102,33 @@ static int dev_ifconf(struct net *net, char __user *arg)
 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
 {
-	struct net *net = seq_file_net(seq);
-	loff_t off;
-	struct net_device *dev;
+	struct dev_iter_state *state = seq->private;
 
 	rcu_read_lock();
 	if (!*pos)
 		return SEQ_START_TOKEN;
 
-	off = 1;
-	for_each_netdev_rcu(net, dev)
-		if (off++ == *pos)
-			return dev;
+	/* check for end of the hash */
+	if (state->pos == 0 && *pos > 1)
+		return NULL;
 
-	return NULL;
+	return dev_from_new_bucket(seq);
 }
 
 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	struct net_device *dev = v;
+	struct net_device *dev;
+
+	++*pos;
 
 	if (v == SEQ_START_TOKEN)
-		dev = first_net_device_rcu(seq_file_net(seq));
-	else
-		dev = next_net_device_rcu(dev);
+		return dev_from_new_bucket(seq);
 
-	++*pos;
-	return dev;
+	dev = dev_from_same_bucket(seq);
+	if (dev)
+		return dev;
+
+	return dev_from_new_bucket(seq);
 }
 
 void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4173,7 +4227,7 @@ static const struct seq_operations dev_seq_ops = {
 static int dev_seq_open(struct inode *inode, struct file *file)
 {
 	return seq_open_net(inode, file, &dev_seq_ops,
-			    sizeof(struct seq_net_private));
+			    sizeof(struct dev_iter_state));
 }
 
 static const struct file_operations dev_seq_fops = {
-- 
1.7.4.1

^ permalink raw reply related

* Re: [PATCH] dev: use name hash for dev_seq_ops
From: Eric Dumazet @ 2011-10-21  6:52 UTC (permalink / raw)
  To: Mihai Maruseac
  Cc: davem, shemminger, mirq-linux, therbert, jpirko, netdev,
	linux-kernel, dbaluta, Mihai Maruseac
In-Reply-To: <1319179510-10715-1-git-send-email-mmaruseac@ixiacom.com>

Le vendredi 21 octobre 2011 à 09:45 +0300, Mihai Maruseac a écrit :
> Instead of using the dev->next chain and trying to resync at each call to
> dev_seq_start, use the name hash, keeping the bucket and the offset in
> seq->private field.
> 
...
> Signed-off-by: Mihai Maruseac <mmaruseac@ixiacom.com>

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Thanks

^ permalink raw reply

* [PATCH net-next] tcp: add const qualifiers where possible
From: Eric Dumazet @ 2011-10-21  6:53 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Adding const qualifiers to pointers can ease code review, and spot some
bugs. It might allow compiler to optimize code further.

For example, is it legal to temporary write a null cksum into tcphdr
in tcp_md5_hash_header() ? I am afraid a sniffer could catch the
temporary null value...

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/secure_seq.h |    2 
 include/net/sock.h       |    2 
 include/net/tcp.h        |   40 +++++++------
 net/core/secure_seq.c    |    2 
 net/ipv4/syncookies.c    |    2 
 net/ipv4/tcp.c           |   18 +++---
 net/ipv4/tcp_input.c     |  110 ++++++++++++++++++-------------------
 net/ipv4/tcp_ipv4.c      |   26 ++++----
 net/ipv4/tcp_minisocks.c |    4 -
 net/ipv4/tcp_output.c    |   72 ++++++++++++------------
 net/ipv6/syncookies.c    |    6 +-
 net/ipv6/tcp_ipv6.c      |   27 ++++-----
 12 files changed, 159 insertions(+), 152 deletions(-)

diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index d97f689..c2e542b 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -10,7 +10,7 @@ extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 				      __be16 dport);
 extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 					__be16 sport, __be16 dport);
-extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
+extern __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 					  __be16 sport, __be16 dport);
 extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 				       __be16 sport, __be16 dport);
diff --git a/include/net/sock.h b/include/net/sock.h
index 5ac682f..1df0ed0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1310,7 +1310,7 @@ extern int sock_i_uid(struct sock *sk);
 extern unsigned long sock_i_ino(struct sock *sk);
 
 static inline struct dst_entry *
-__sk_dst_get(struct sock *sk)
+__sk_dst_get(const struct sock *sk)
 {
 	return rcu_dereference_check(sk->sk_dst_cache, sock_owned_by_user(sk) ||
 						       lockdep_is_held(&sk->sk_lock.slock));
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0113d30..75fdaac 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -327,9 +327,9 @@ extern int tcp_sendpage(struct sock *sk, struct page *page, int offset,
 			size_t size, int flags);
 extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
-				 struct tcphdr *th, unsigned len);
+				 const struct tcphdr *th, unsigned int len);
 extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			       struct tcphdr *th, unsigned len);
+			       const struct tcphdr *th, unsigned int len);
 extern void tcp_rcv_space_adjust(struct sock *sk);
 extern void tcp_cleanup_rbuf(struct sock *sk, int copied);
 extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
@@ -401,10 +401,10 @@ extern void tcp_set_keepalive(struct sock *sk, int val);
 extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req);
 extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		       size_t len, int nonblock, int flags, int *addr_len);
-extern void tcp_parse_options(struct sk_buff *skb,
-			      struct tcp_options_received *opt_rx, u8 **hvpp,
+extern void tcp_parse_options(const struct sk_buff *skb,
+			      struct tcp_options_received *opt_rx, const u8 **hvpp,
 			      int estab);
-extern u8 *tcp_parse_md5sig_option(struct tcphdr *th);
+extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 
 /*
  *	TCP v4 functions exported for the inet6 API
@@ -450,7 +450,7 @@ extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
 /* From net/ipv6/syncookies.c */
 extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
 #ifdef CONFIG_SYN_COOKIES
-extern __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb,
+extern __u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb,
 				     __u16 *mss);
 #else
 static inline __u32 cookie_v6_init_sequence(struct sock *sk,
@@ -522,7 +522,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 }
 
 /* tcp.c */
-extern void tcp_get_info(struct sock *, struct tcp_info *);
+extern void tcp_get_info(const struct sock *, struct tcp_info *);
 
 /* Read 'sendfile()'-style from a TCP socket */
 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
@@ -532,8 +532,8 @@ extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 
 extern void tcp_initialize_rcv_mss(struct sock *sk);
 
-extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
-extern int tcp_mss_to_mtu(struct sock *sk, int mss);
+extern int tcp_mtu_to_mss(const struct sock *sk, int pmtu);
+extern int tcp_mss_to_mtu(const struct sock *sk, int mss);
 extern void tcp_mtup_init(struct sock *sk);
 extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
 
@@ -832,7 +832,7 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 #define tcp_verify_left_out(tp)	WARN_ON(tcp_left_out(tp) > tp->packets_out)
 
 extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
-extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
+extern __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst);
 
 /* Slow start with delack produces 3 packets of burst, so that
  * it is safe "de facto".  This will be the default - same as
@@ -861,7 +861,7 @@ static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss,
 
 static inline void tcp_check_probe_timer(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
 	if (!tp->packets_out && !icsk->icsk_pending)
@@ -1209,10 +1209,10 @@ extern struct tcp_md5sig_pool	*tcp_get_md5sig_pool(void);
 extern void tcp_put_md5sig_pool(void);
 
 extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *);
-extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *,
+extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
 				 unsigned header_len);
 extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
-			    struct tcp_md5sig_key *key);
+			    const struct tcp_md5sig_key *key);
 
 /* write queue abstraction */
 static inline void tcp_write_queue_purge(struct sock *sk)
@@ -1225,22 +1225,24 @@ static inline void tcp_write_queue_purge(struct sock *sk)
 	tcp_clear_all_retrans_hints(tcp_sk(sk));
 }
 
-static inline struct sk_buff *tcp_write_queue_head(struct sock *sk)
+static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
 {
 	return skb_peek(&sk->sk_write_queue);
 }
 
-static inline struct sk_buff *tcp_write_queue_tail(struct sock *sk)
+static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk)
 {
 	return skb_peek_tail(&sk->sk_write_queue);
 }
 
-static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_buff *skb)
+static inline struct sk_buff *tcp_write_queue_next(const struct sock *sk,
+						   const struct sk_buff *skb)
 {
 	return skb_queue_next(&sk->sk_write_queue, skb);
 }
 
-static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb)
+static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk,
+						   const struct sk_buff *skb)
 {
 	return skb_queue_prev(&sk->sk_write_queue, skb);
 }
@@ -1254,7 +1256,7 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu
 #define tcp_for_write_queue_from_safe(skb, tmp, sk)			\
 	skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
 
-static inline struct sk_buff *tcp_send_head(struct sock *sk)
+static inline struct sk_buff *tcp_send_head(const struct sock *sk)
 {
 	return sk->sk_send_head;
 }
@@ -1265,7 +1267,7 @@ static inline bool tcp_skb_is_last(const struct sock *sk,
 	return skb_queue_is_last(&sk->sk_write_queue, skb);
 }
 
-static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb)
+static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb)
 {
 	if (tcp_skb_is_last(sk, skb))
 		sk->sk_send_head = NULL;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 45329d7..025233d 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -35,7 +35,7 @@ static u32 seq_scale(u32 seq)
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
+__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 				   __be16 sport, __be16 dport)
 {
 	u32 secret[MD5_MESSAGE_BYTES / 4];
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 3bc5c8f..d7b89b1 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -265,7 +265,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 			     struct ip_options *opt)
 {
 	struct tcp_options_received tcp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct inet_request_sock *ireq;
 	struct tcp_request_sock *treq;
 	struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 132be08..e7fc2f0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -374,7 +374,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 {
 	unsigned int mask;
 	struct sock *sk = sock->sk;
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	sock_poll_wait(file, sk_sleep(sk), wait);
 	if (sk->sk_state == TCP_LISTEN)
@@ -528,7 +528,7 @@ static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 	tp->pushed_seq = tp->write_seq;
 }
 
-static inline int forced_push(struct tcp_sock *tp)
+static inline int forced_push(const struct tcp_sock *tp)
 {
 	return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
 }
@@ -891,9 +891,9 @@ EXPORT_SYMBOL(tcp_sendpage);
 #define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
 #define TCP_OFF(sk)	(sk->sk_sndmsg_off)
 
-static inline int select_size(struct sock *sk, int sg)
+static inline int select_size(const struct sock *sk, int sg)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	int tmp = tp->mss_cache;
 
 	if (sg) {
@@ -2408,7 +2408,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		   unsigned int optlen)
 {
-	struct inet_connection_sock *icsk = inet_csk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 
 	if (level != SOL_TCP)
 		return icsk->icsk_af_ops->setsockopt(sk, level, optname,
@@ -2430,9 +2430,9 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
 #endif
 
 /* Return information about state of tcp endpoint in API format. */
-void tcp_get_info(struct sock *sk, struct tcp_info *info)
+void tcp_get_info(const struct sock *sk, struct tcp_info *info)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now = tcp_time_stamp;
 
@@ -3010,7 +3010,7 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
 EXPORT_SYMBOL(tcp_md5_hash_header);
 
 int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
-			  struct sk_buff *skb, unsigned header_len)
+			  const struct sk_buff *skb, unsigned int header_len)
 {
 	struct scatterlist sg;
 	const struct tcphdr *tp = tcp_hdr(skb);
@@ -3043,7 +3043,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 }
 EXPORT_SYMBOL(tcp_md5_hash_skb_data);
 
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
+int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
 {
 	struct scatterlist sg;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 69a90b8..8272316 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -206,7 +206,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
 		tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
 }
 
-static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
 {
 	if (tcp_hdr(skb)->cwr)
 		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
@@ -239,19 +239,19 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s
 	}
 }
 
-static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
+static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
 {
 	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
 		tp->ecn_flags &= ~TCP_ECN_OK;
 }
 
-static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
+static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
 {
 	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
 		tp->ecn_flags &= ~TCP_ECN_OK;
 }
 
-static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
+static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
 {
 	if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
 		return 1;
@@ -315,7 +315,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 	return 0;
 }
 
-static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -429,7 +429,7 @@ static void tcp_clamp_window(struct sock *sk)
  */
 void tcp_initialize_rcv_mss(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
 
 	hint = min(hint, tp->rcv_wnd / 2);
@@ -824,7 +824,7 @@ void tcp_update_metrics(struct sock *sk)
 	}
 }
 
-__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
+__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
 {
 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
@@ -1216,7 +1216,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
 		tp->lost_retrans_low = new_low_seq;
 }
 
-static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
+static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 			   struct tcp_sack_block_wire *sp, int num_sacks,
 			   u32 prior_snd_una)
 {
@@ -1310,7 +1310,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 	return in_sack;
 }
 
-static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
+static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
 			  struct tcp_sacktag_state *state,
 			  int dup_sack, int pcount)
 {
@@ -1465,13 +1465,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 /* I wish gso_size would have a bit more sane initialization than
  * something-or-zero which complicates things
  */
-static int tcp_skb_seglen(struct sk_buff *skb)
+static int tcp_skb_seglen(const struct sk_buff *skb)
 {
 	return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
 }
 
 /* Shifting pages past head area doesn't work */
-static int skb_can_shift(struct sk_buff *skb)
+static int skb_can_shift(const struct sk_buff *skb)
 {
 	return !skb_headlen(skb) && skb_is_nonlinear(skb);
 }
@@ -1720,19 +1720,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
 	return skb;
 }
 
-static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
+static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
 {
 	return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
 }
 
 static int
-tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
+tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 			u32 prior_snd_una)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	unsigned char *ptr = (skb_transport_header(ack_skb) +
-			      TCP_SKB_CB(ack_skb)->sacked);
+	const unsigned char *ptr = (skb_transport_header(ack_skb) +
+				    TCP_SKB_CB(ack_skb)->sacked);
 	struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
 	struct tcp_sack_block sp[TCP_NUM_SACKS];
 	struct tcp_sack_block *cache;
@@ -2296,7 +2296,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
 	return 0;
 }
 
-static inline int tcp_fackets_out(struct tcp_sock *tp)
+static inline int tcp_fackets_out(const struct tcp_sock *tp)
 {
 	return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
 }
@@ -2316,19 +2316,20 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
  * they differ. Since neither occurs due to loss, TCP should really
  * ignore them.
  */
-static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
+static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
 {
 	return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
 }
 
-static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_skb_timedout(const struct sock *sk,
+				   const struct sk_buff *skb)
 {
 	return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
 }
 
-static inline int tcp_head_timedout(struct sock *sk)
+static inline int tcp_head_timedout(const struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	return tp->packets_out &&
 	       tcp_skb_timedout(sk, tcp_write_queue_head(sk));
@@ -2639,7 +2640,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
 /* Nothing was retransmitted or returned timestamp is less
  * than timestamp of the first retransmission.
  */
-static inline int tcp_packet_delayed(struct tcp_sock *tp)
+static inline int tcp_packet_delayed(const struct tcp_sock *tp)
 {
 	return !tp->retrans_stamp ||
 		(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -2700,7 +2701,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static inline int tcp_may_undo(struct tcp_sock *tp)
+static inline int tcp_may_undo(const struct tcp_sock *tp)
 {
 	return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
 }
@@ -2764,9 +2765,9 @@ static void tcp_try_undo_dsack(struct sock *sk)
  * that successive retransmissions of a segment must not advance
  * retrans_stamp under any conditions.
  */
-static int tcp_any_retrans_done(struct sock *sk)
+static int tcp_any_retrans_done(const struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
 	if (tp->retrans_out)
@@ -3245,7 +3246,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
  */
 static void tcp_rearm_rto(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	if (!tp->packets_out) {
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -3497,7 +3498,7 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
  * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
  * and in FreeBSD. NetBSD's one is even worse.) is wrong.
  */
-static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
+static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
 				 u32 ack_seq)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -3673,7 +3674,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
 }
 
 /* This routine deals with incoming acks, but not outgoing ones. */
-static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
+static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -3810,14 +3811,14 @@ old_ack:
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
  */
-void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
-		       u8 **hvpp, int estab)
+void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
+		       const u8 **hvpp, int estab)
 {
-	unsigned char *ptr;
-	struct tcphdr *th = tcp_hdr(skb);
+	const unsigned char *ptr;
+	const struct tcphdr *th = tcp_hdr(skb);
 	int length = (th->doff * 4) - sizeof(struct tcphdr);
 
-	ptr = (unsigned char *)(th + 1);
+	ptr = (const unsigned char *)(th + 1);
 	opt_rx->saw_tstamp = 0;
 
 	while (length > 0) {
@@ -3928,9 +3929,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 }
 EXPORT_SYMBOL(tcp_parse_options);
 
-static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
+static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
 {
-	__be32 *ptr = (__be32 *)(th + 1);
+	const __be32 *ptr = (const __be32 *)(th + 1);
 
 	if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
 			  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
@@ -3947,8 +3948,9 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
 /* Fast parse options. This hopes to only see timestamps.
  * If it is wrong it falls back on tcp_parse_options().
  */
-static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
-				  struct tcp_sock *tp, u8 **hvpp)
+static int tcp_fast_parse_options(const struct sk_buff *skb,
+				  const struct tcphdr *th,
+				  struct tcp_sock *tp, const u8 **hvpp)
 {
 	/* In the spirit of fast parsing, compare doff directly to constant
 	 * values.  Because equality is used, short doff can be ignored here.
@@ -3969,10 +3971,10 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 /*
  * Parse MD5 Signature option
  */
-u8 *tcp_parse_md5sig_option(struct tcphdr *th)
+const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
 {
-	int length = (th->doff << 2) - sizeof (*th);
-	u8 *ptr = (u8*)(th + 1);
+	int length = (th->doff << 2) - sizeof(*th);
+	const u8 *ptr = (const u8 *)(th + 1);
 
 	/* If the TCP option is too short, we can short cut */
 	if (length < TCPOLEN_MD5SIG)
@@ -4049,8 +4051,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 
 static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 
@@ -4089,7 +4091,7 @@ static inline int tcp_paws_discard(const struct sock *sk,
  * (borrowed from freebsd)
  */
 
-static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
 {
 	return	!before(end_seq, tp->rcv_wup) &&
 		!after(seq, tp->rcv_nxt + tcp_receive_window(tp));
@@ -4246,7 +4248,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
 		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
 }
 
-static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
+static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -4433,7 +4435,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int eaten = -1;
 
@@ -4917,9 +4919,9 @@ void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static int tcp_should_expand_sndbuf(struct sock *sk)
+static int tcp_should_expand_sndbuf(const struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	/* If the user specified a specific send buffer setting, do
 	 * not modify it.
@@ -5028,7 +5030,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
  *	either form (or just set the sysctl tcp_stdurg).
  */
 
-static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
+static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 ptr = ntohs(th->urg_ptr);
@@ -5094,7 +5096,7 @@ static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
 }
 
 /* This is the 'fast' part of urgent handling. */
-static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
+static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -5215,9 +5217,9 @@ out:
  * play significant role here.
  */
 static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
-			      struct tcphdr *th, int syn_inerr)
+			      const struct tcphdr *th, int syn_inerr)
 {
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* RFC1323: H1. Apply PAWS check first. */
@@ -5298,7 +5300,7 @@ discard:
  *	tcp_data_queue when everything is OK.
  */
 int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			struct tcphdr *th, unsigned len)
+			const struct tcphdr *th, unsigned int len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int res;
@@ -5509,9 +5511,9 @@ discard:
 EXPORT_SYMBOL(tcp_rcv_established);
 
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
-					 struct tcphdr *th, unsigned len)
+					 const struct tcphdr *th, unsigned int len)
 {
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
@@ -5786,7 +5788,7 @@ reset_and_undo:
  */
 
 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
-			  struct tcphdr *th, unsigned len)
+			  const struct tcphdr *th, unsigned int len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 48da7cc..955c925 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -104,7 +104,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 struct inet_hashinfo tcp_hashinfo;
 EXPORT_SYMBOL(tcp_hashinfo);
 
-static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
+static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
 {
 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 					  ip_hdr(skb)->saddr,
@@ -552,7 +552,7 @@ static void __tcp_v4_send_check(struct sk_buff *skb,
 /* This routine computes an IPv4 TCP checksum. */
 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 {
-	struct inet_sock *inet = inet_sk(sk);
+	const struct inet_sock *inet = inet_sk(sk);
 
 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 }
@@ -590,7 +590,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 
 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
 #ifdef CONFIG_TCP_MD5SIG
@@ -668,7 +668,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 			    struct tcp_md5sig_key *key,
 			    int reply_flags)
 {
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
@@ -1182,10 +1182,10 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 	 * o MD5 hash and we're not expecting one.
 	 * o MD5 hash and its wrong.
 	 */
-	__u8 *hash_location = NULL;
+	const __u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
 	const struct iphdr *iph = ip_hdr(skb);
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	int genhash;
 	unsigned char newhash[16];
 
@@ -1248,7 +1248,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_extend_values tmp_ext;
 	struct tcp_options_received tmp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct request_sock *req;
 	struct inet_request_sock *ireq;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1645,7 +1645,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv);
 int tcp_v4_rcv(struct sk_buff *skb)
 {
 	const struct iphdr *iph;
-	struct tcphdr *th;
+	const struct tcphdr *th;
 	struct sock *sk;
 	int ret;
 	struct net *net = dev_net(skb->dev);
@@ -1809,7 +1809,7 @@ EXPORT_SYMBOL(tcp_v4_get_peer);
 
 void *tcp_v4_tw_get_peer(struct sock *sk)
 {
-	struct inet_timewait_sock *tw = inet_twsk(sk);
+	const struct inet_timewait_sock *tw = inet_twsk(sk);
 
 	return inet_getpeer_v4(tw->tw_daddr, 1);
 }
@@ -2381,7 +2381,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
 }
 EXPORT_SYMBOL(tcp_proc_unregister);
 
-static void get_openreq4(struct sock *sk, struct request_sock *req,
+static void get_openreq4(const struct sock *sk, const struct request_sock *req,
 			 struct seq_file *f, int i, int uid, int *len)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
@@ -2411,9 +2411,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 {
 	int timer_active;
 	unsigned long timer_expires;
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct inet_sock *inet = inet_sk(sk);
+	const struct inet_sock *inet = inet_sk(sk);
 	__be32 dest = inet->inet_daddr;
 	__be32 src = inet->inet_rcv_saddr;
 	__u16 destp = ntohs(inet->inet_dport);
@@ -2462,7 +2462,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 		len);
 }
 
-static void get_timewait4_sock(struct inet_timewait_sock *tw,
+static void get_timewait4_sock(const struct inet_timewait_sock *tw,
 			       struct seq_file *f, int i, int *len)
 {
 	__be32 dest, src;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d2fe4e0..b767a95 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -141,7 +141,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			   const struct tcphdr *th)
 {
 	struct tcp_options_received tmp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	int paws_reject = 0;
 
@@ -566,7 +566,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   struct request_sock **prev)
 {
 	struct tcp_options_received tmp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct sock *child;
 	const struct tcphdr *th = tcp_hdr(skb);
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ed96c54..980b98f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
 
 
 /* Account for new data that has been sent to the network. */
-static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
+static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int prior_packets = tp->packets_out;
@@ -89,9 +89,9 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
  * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
  * invalid. OK, let's make this for now:
  */
-static inline __u32 tcp_acceptable_seq(struct sock *sk)
+static inline __u32 tcp_acceptable_seq(const struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	if (!before(tcp_wnd_end(tp), tp->snd_nxt))
 		return tp->snd_nxt;
@@ -116,7 +116,7 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk)
 static __u16 tcp_advertise_mss(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
+	const struct dst_entry *dst = __sk_dst_get(sk);
 	int mss = tp->advmss;
 
 	if (dst) {
@@ -133,7 +133,7 @@ static __u16 tcp_advertise_mss(struct sock *sk)
 
 /* RFC2861. Reset CWND after idle period longer RTO to "restart window".
  * This is the first part of cwnd validation mechanism. */
-static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
+static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	s32 delta = tcp_time_stamp - tp->lsndtime;
@@ -154,7 +154,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
 
 /* Congestion state accounting after a packet has been sent. */
 static void tcp_event_data_sent(struct tcp_sock *tp,
-				struct sk_buff *skb, struct sock *sk)
+				struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const u32 now = tcp_time_stamp;
@@ -295,7 +295,7 @@ static u16 tcp_select_window(struct sock *sk)
 }
 
 /* Packet ECN state for a SYN-ACK */
-static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb)
 {
 	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
 	if (!(tp->ecn_flags & TCP_ECN_OK))
@@ -315,7 +315,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 }
 
 static __inline__ void
-TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
+TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th)
 {
 	if (inet_rsk(req)->ecn_ok)
 		th->ece = 1;
@@ -565,7 +565,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
  */
 static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_out_options *opts,
-				struct tcp_md5sig_key **md5) {
+				struct tcp_md5sig_key **md5)
+{
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
 	unsigned remaining = MAX_TCP_OPTION_SPACE;
@@ -743,7 +744,8 @@ static unsigned tcp_synack_options(struct sock *sk,
  */
 static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
 					struct tcp_out_options *opts,
-					struct tcp_md5sig_key **md5) {
+					struct tcp_md5sig_key **md5)
+{
 	struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned size = 0;
@@ -893,7 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
 	if (skb->len != tcp_header_size)
-		tcp_event_data_sent(tp, skb, sk);
+		tcp_event_data_sent(tp, sk);
 
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
 		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
@@ -926,7 +928,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 }
 
 /* Initialize TSO segments for a packet. */
-static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
+static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 				 unsigned int mss_now)
 {
 	if (skb->len <= mss_now || !sk_can_gso(sk) ||
@@ -947,7 +949,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
 /* When a modification to fackets out becomes necessary, we need to check
  * skb is counted to fackets_out or not.
  */
-static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
+static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
 				   int decr)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -962,7 +964,7 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
 /* Pcount in the middle of the write queue got changed, we need to do various
  * tweaks to fix counters
  */
-static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
+static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -1146,10 +1148,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 }
 
 /* Calculate MSS. Not accounting for SACKs here.  */
-int tcp_mtu_to_mss(struct sock *sk, int pmtu)
+int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct inet_connection_sock *icsk = inet_csk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	int mss_now;
 
 	/* Calculate base mss without TCP options:
@@ -1175,10 +1177,10 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
 }
 
 /* Inverse of above */
-int tcp_mss_to_mtu(struct sock *sk, int mss)
+int tcp_mss_to_mtu(const struct sock *sk, int mss)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct inet_connection_sock *icsk = inet_csk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	int mtu;
 
 	mtu = mss +
@@ -1252,8 +1254,8 @@ EXPORT_SYMBOL(tcp_sync_mss);
  */
 unsigned int tcp_current_mss(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct dst_entry *dst = __sk_dst_get(sk);
 	u32 mss_now;
 	unsigned header_len;
 	struct tcp_out_options opts;
@@ -1313,10 +1315,10 @@ static void tcp_cwnd_validate(struct sock *sk)
  * modulo only when the receiver window alone is the limiting factor or
  * when we would be allowed to send the split-due-to-Nagle skb fully.
  */
-static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
+static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
 					unsigned int mss_now, unsigned int cwnd)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	u32 needed, window, cwnd_len;
 
 	window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
@@ -1336,8 +1338,8 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
 /* Can at least one segment of SKB be sent right now, according to the
  * congestion window rules?  If so, return how many segments are allowed.
  */
-static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
-					 struct sk_buff *skb)
+static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
+					 const struct sk_buff *skb)
 {
 	u32 in_flight, cwnd;
 
@@ -1358,7 +1360,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
  * This must be invoked the first time we consider transmitting
  * SKB onto the wire.
  */
-static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
+static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
 			     unsigned int mss_now)
 {
 	int tso_segs = tcp_skb_pcount(skb);
@@ -1396,7 +1398,7 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp,
 /* Return non-zero if the Nagle test allows this packet to be
  * sent now.
  */
-static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
+static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
 				 unsigned int cur_mss, int nonagle)
 {
 	/* Nagle rule does not apply to frames, which sit in the middle of the
@@ -1422,7 +1424,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
 }
 
 /* Does at least the first segment of SKB fit into the send window? */
-static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
+static inline int tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
 				   unsigned int cur_mss)
 {
 	u32 end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -1437,10 +1439,10 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
  * should be put on the wire right now.  If so, it returns the number of
  * packets allowed by the congestion window.
  */
-static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
+static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
 				 unsigned int cur_mss, int nonagle)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int cwnd_quota;
 
 	tcp_init_tso_segs(sk, skb, cur_mss);
@@ -1458,7 +1460,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
 /* Test if sending is allowed right now. */
 int tcp_may_send_now(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb = tcp_send_head(sk);
 
 	return skb &&
@@ -2008,7 +2010,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 }
 
 /* Check if coalescing SKBs is legal. */
-static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
+static int tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
 {
 	if (tcp_skb_pcount(skb) > 1)
 		return 0;
@@ -2184,7 +2186,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 static int tcp_can_forward_retransmit(struct sock *sk)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	/* Forward retransmissions are possible only during Recovery. */
 	if (icsk->icsk_ca_state != TCP_CA_Recovery)
@@ -2550,7 +2552,7 @@ EXPORT_SYMBOL(tcp_make_synack);
 /* Do all connect socket setups that can be done AF independent. */
 static void tcp_connect_init(struct sock *sk)
 {
-	struct dst_entry *dst = __sk_dst_get(sk);
+	const struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u8 rcv_wscale;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index ac83896..5a0d664 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -115,7 +115,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
 		& COOKIEMASK;
 }
 
-__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	const struct tcphdr *th = tcp_hdr(skb);
@@ -137,7 +137,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 				     jiffies / (HZ * 60), mssind);
 }
 
-static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	const struct tcphdr *th = tcp_hdr(skb);
@@ -152,7 +152,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tcp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct inet_request_sock *ireq;
 	struct inet6_request_sock *ireq6;
 	struct tcp_request_sock *treq;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5357902..da2ada8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -114,7 +114,7 @@ static __inline__ __sum16 tcp_v6_check(int len,
 	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
 }
 
-static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
+static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
 {
 	return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
 					    ipv6_hdr(skb)->saddr.s6_addr32,
@@ -844,7 +844,7 @@ clear_hash_noput:
 
 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 {
-	__u8 *hash_location = NULL;
+	const __u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct tcphdr *th = tcp_hdr(skb);
@@ -980,7 +980,8 @@ static int tcp6_gro_complete(struct sk_buff *skb)
 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 				 u32 ts, struct tcp_md5sig_key *key, int rst)
 {
-	struct tcphdr *th = tcp_hdr(skb), *t1;
+	const struct tcphdr *th = tcp_hdr(skb);
+	struct tcphdr *t1;
 	struct sk_buff *buff;
 	struct flowi6 fl6;
 	struct net *net = dev_net(skb_dst(skb)->dev);
@@ -1070,7 +1071,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 
 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = 0, ack_seq = 0;
 	struct tcp_md5sig_key *key = NULL;
 
@@ -1160,7 +1161,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_extend_values tmp_ext;
 	struct tcp_options_received tmp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct request_sock *req;
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1688,7 +1689,7 @@ ipv6_pktoptions:
 
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
-	struct tcphdr *th;
+	const struct tcphdr *th;
 	const struct ipv6hdr *hdr;
 	struct sock *sk;
 	int ret;
@@ -1856,8 +1857,8 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
 
 static void *tcp_v6_tw_get_peer(struct sock *sk)
 {
-	struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
-	struct inet_timewait_sock *tw = inet_twsk(sk);
+	const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
+	const struct inet_timewait_sock *tw = inet_twsk(sk);
 
 	if (tw->tw_family == AF_INET)
 		return tcp_v4_tw_get_peer(sk);
@@ -2012,7 +2013,7 @@ static void tcp_v6_destroy_sock(struct sock *sk)
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCPv6 sock list dumping. */
 static void get_openreq6(struct seq_file *seq,
-			 struct sock *sk, struct request_sock *req, int i, int uid)
+			 const struct sock *sk, struct request_sock *req, int i, int uid)
 {
 	int ttd = req->expires - jiffies;
 	const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
@@ -2048,10 +2049,10 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 	__u16 destp, srcp;
 	int timer_active;
 	unsigned long timer_expires;
-	struct inet_sock *inet = inet_sk(sp);
-	struct tcp_sock *tp = tcp_sk(sp);
+	const struct inet_sock *inet = inet_sk(sp);
+	const struct tcp_sock *tp = tcp_sk(sp);
 	const struct inet_connection_sock *icsk = inet_csk(sp);
-	struct ipv6_pinfo *np = inet6_sk(sp);
+	const struct ipv6_pinfo *np = inet6_sk(sp);
 
 	dest  = &np->daddr;
 	src   = &np->rcv_saddr;
@@ -2103,7 +2104,7 @@ static void get_timewait6_sock(struct seq_file *seq,
 {
 	const struct in6_addr *dest, *src;
 	__u16 destp, srcp;
-	struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
+	const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
 	int ttd = tw->tw_ttd - jiffies;
 
 	if (ttd < 0)

^ permalink raw reply related

* Re: [PATCH] dev: use name hash for dev_seq_ops
From: David Miller @ 2011-10-21  6:55 UTC (permalink / raw)
  To: mihai.maruseac
  Cc: shemminger, eric.dumazet, mirq-linux, therbert, jpirko, netdev,
	linux-kernel, dbaluta, mmaruseac
In-Reply-To: <1319179510-10715-1-git-send-email-mmaruseac@ixiacom.com>

From: Mihai Maruseac <mihai.maruseac@gmail.com>
Date: Fri, 21 Oct 2011 09:45:10 +0300

> Instead of using the dev->next chain and trying to resync at each call to
> dev_seq_start, use the name hash, keeping the bucket and the offset in
> seq->private field.
> 
> Tests revealed the following results for ifconfig > /dev/null
 ...
> Signed-off-by: Mihai Maruseac <mmaruseac@ixiacom.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next] tcp: add const qualifiers where possible
From: David Miller @ 2011-10-21  6:55 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1319179993.2854.63.camel@edumazet-laptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 21 Oct 2011 08:53:13 +0200

> Adding const qualifiers to pointers can ease code review, and spot some
> bugs. It might allow compiler to optimize code further.
> 
> For example, is it legal to temporary write a null cksum into tcphdr
> in tcp_md5_hash_header() ? I am afraid a sniffer could catch the
> temporary null value...
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH 0/5] macvtap fixes.
From: David Miller @ 2011-10-21  6:56 UTC (permalink / raw)
  To: ebiederm; +Cc: netdev, arnd, jasowang, mst, ian.campbell, mashirley
In-Reply-To: <m1wrbzu4ec.fsf@fess.ebiederm.org>

From: ebiederm@xmission.com (Eric W. Biederman)
Date: Thu, 20 Oct 2011 07:24:59 -0700

> 
> This series of patches fixes a series of minor bugs in the macvtap code.
> 
> The fixes to handle failures in newlink and the change in how we handle
> minor device number allocations are particularly significant.

Applied to net-next, thanks.

^ permalink raw reply

* Re: [RFC PATCH 15/17] phy_device: Add "port" and "transciever" fields
From: Kyle Moffett @ 2011-10-21  7:12 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: Kyle Moffett, linux-kernel, netdev, David S. Miller,
	David Decotigny, Stephen Hemminger, Andrew Morton,
	Lucas De Marchi, Marc Kleine-Budde, Mike Frysinger
In-Reply-To: <1319153279.2918.16.camel@deadeye>

On Thu, Oct 20, 2011 at 19:27, Ben Hutchings <bhutchings@solarflare.com> wrote:
> On Thu, 2011-10-20 at 17:00 -0400, Kyle Moffett wrote:
>> Some PHYs have multiple software-selectable inputs and outputs,
>> including RGMII, SGMII, SerDes, etc.  New fields are added to the
>> "struct phy_device" for "port" and "transciever" to allow "ethtool" to
>> switch outputs at runtime.  The defaults for the new fields are
>> identical to the hardcoded values used previously.
>>
>> This should make no functional changes to the PHY layer behavior, but
>> it will allow later PHY/ethernet drivers to override those fields.
>>
>> Signed-off-by: Kyle Moffett <Kyle.D.Moffett@boeing.com>
>> ---
>>  drivers/net/phy/phy.c        |    4 ++--
>>  drivers/net/phy/phy_device.c |    2 ++
>>  include/linux/phy.h          |    4 ++++
>>  3 files changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
>> index c378f91..5f72055 100644
>> --- a/drivers/net/phy/phy.c
>> +++ b/drivers/net/phy/phy.c
>> @@ -290,9 +290,9 @@ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd)
>>
>>       ethtool_cmd_speed_set(cmd, phydev->speed);
>>       cmd->duplex = phydev->duplex;
>> -     cmd->port = PORT_MII;
>> +     cmd->port = phydev->port;
>>       cmd->phy_address = phydev->addr;
>> -     cmd->transceiver = XCVR_EXTERNAL;
>> +     cmd->transceiver = phydev->transciever;
>
> 'transceiver' is spelt thus.

Oh shoot!

That's what I get for trying to do a last-minute patch edit before
sending the email; thanks for pointing it out!

Cheers,
Kyle Moffett

-- 
Curious about my work on the Debian powerpcspe port?
I'm keeping a blog here: http://pureperl.blogspot.com/

^ permalink raw reply

* Re: [GIT PULL nf-next] IPVS
From: Pablo Neira Ayuso @ 2011-10-21  7:27 UTC (permalink / raw)
  To: Simon Horman
  Cc: lvs-devel, netdev, netfilter-devel, Wensong Zhang,
	Julian Anastasov, Krzysztof Wilczynski
In-Reply-To: <1319160783-28422-1-git-send-email-horms@verge.net.au>

On Fri, Oct 21, 2011 at 10:33:01AM +0900, Simon Horman wrote:
> Hi Pablo,
> 
> please consider pulling the following to get compile fix
> and cleanup patches from Krzysztof Wilczynski.
> 
> The following changes since commit 2ca5b853f1dd81c605ddc8a55e06bdad85636597:
> 
>   netfilter: export NAT definitions through linux/netfilter_ipv4/nf_nat.h (2011-10-11 03:32:34 +0200)
> 
> are available in the git repository at:
>   git://github.com/horms/ipvs-next.git master

Pulled, thanks.

http://1984.lsi.us.es/git/?p=net-next/.git;a=shortlog;h=refs/heads/nf-next

> Krzysztof Wilczynski (2):
>       ipvs: Remove unused variable "cs" from ip_vs_leave function.
>       ipvs: Fix compilation error in ip_vs.h for ip_vs_confirm_conntrack function.

Strange, I have all IPVS configs enabled here and I didn't hit this error.

^ permalink raw reply

* Re: [PATCH net-next] tcp: add const qualifiers where possible
From: David Miller @ 2011-10-21  7:36 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <20111021.025549.113655582134326444.davem@redhat.com>

From: David Miller <davem@redhat.com>
Date: Fri, 21 Oct 2011 02:55:49 -0400 (EDT)

> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Fri, 21 Oct 2011 08:53:13 +0200
> 
>> Adding const qualifiers to pointers can ease code review, and spot some
>> bugs. It might allow compiler to optimize code further.
>> 
>> For example, is it legal to temporary write a null cksum into tcphdr
>> in tcp_md5_hash_header() ? I am afraid a sniffer could catch the
>> temporary null value...
>> 
>> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> 
> Applied.

You can't make the argument to __sk_dst_get() const, because lock_is_held()
does not take a const pointer argument.

include/net/sock.h: In function ‘__sk_dst_get’:
include/net/sock.h:1315:2: warning: passing argument 1 of ‘lock_is_held’ discards ‘const’ qualifier from pointer target type [enabled by default]
include/linux/lockdep.h:324:12: note: expected ‘struct lockdep_map *’ but argument is of type ‘const struct lockdep_map *’

I'm reverting.

^ permalink raw reply

* Re: [PATCH net-next] tcp: add const qualifiers where possible
From: Eric Dumazet @ 2011-10-21  8:21 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20111021.033648.1029307040209750463.davem@davemloft.net>

Le vendredi 21 octobre 2011 à 03:36 -0400, David Miller a écrit :

> You can't make the argument to __sk_dst_get() const, because lock_is_held()
> does not take a const pointer argument.
> 
> include/net/sock.h: In function ‘__sk_dst_get’:
> include/net/sock.h:1315:2: warning: passing argument 1 of ‘lock_is_held’ discards ‘const’ qualifier from pointer target type [enabled by default]
> include/linux/lockdep.h:324:12: note: expected ‘struct lockdep_map *’ but argument is of type ‘const struct lockdep_map *’
> 
> I'm reverting.

Thats strange, I built with LOCKDEP enabled here...

# grep LOCKDEP .config
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_LOCKDEP=y
# CONFIG_DEBUG_LOCKDEP is not set


I'll take a look,
Thanks

^ permalink raw reply

* Re: IPsec performance bug
From: Yan, Zheng @ 2011-10-21  8:28 UTC (permalink / raw)
  To: Kim Phillips; +Cc: netdev, davem, hirofumi
In-Reply-To: <20111019212229.72078d51.kim.phillips@freescale.com>

On Thu, Oct 20, 2011 at 10:22 AM, Kim Phillips
<kim.phillips@freescale.com> wrote:
> Hi,
>
> I'm trying to debug an IPSec forwarding performance slowdown on a
> p2020 dual-core powerpc linux box using s/w crypto (no crypto h/w
> offload enabled) between vanilla kernel versions 2.6.35 and 3.0.
> Using a h/w packet generator set to 64-byte packets, I get the
> following results:
>
> v2.6.35: 48.5kpps
> v3.0.0:   0.2kpps
> v3.0.7:   0.2kpps
> v3.1.0-rc9-01707-gf7ba35d (a recent net-next): 13.6kpps
>
> I was able to bisect the problem down to the following commit:
>
> 7e1dc7b6f709dfc1a9ab4b320dbe723f45992693 is the first bad commit
> commit 7e1dc7b6f709dfc1a9ab4b320dbe723f45992693
> Author: David S. Miller <davem@davemloft.net>
> Date:   Sat Mar 12 02:42:11 2011 -0500
>
>    net: Use flowi4 and flowi6 in xfrm layer.
>
>    Signed-off-by: David S. Miller <davem@davemloft.net>
>
> And, indeed, going back one commit (i.e., v2.6.38-rc8-1468-g2032656
> and manually applying commit 7313714:  "xfrm: fix
> __xfrm_route_forward ()"), brings performance back to ~50kpps from
> 0.2kpps.
>
> Tracing shows that the commit breaks the route cache [1], and I
> understand there is major surgery going on in the area [2], so I
> suppose my question is twofold:
>
> (a) was such a large performance drop to be expected for v3.0?
>
> (b) any ideas how to fix?  I don't know much about routing
> internals, but in ip_route_input_common(), if I remove the input
> interface comparison (rth->rt_route_iif ^ iif), I get some
> performance back, but the system becomes unstable (it's booted over
> nfs).
>
> Thanks,
>
> Kim
>

Looks like xfrm4_fill_dst() reset rt->rt_route_iif to 0, it makes the
comparison (rth->rt_route_iif ^ iif) in
ip_route_input_common() return false.

Please try patch below. It improves the performance of 3.1-rc10
kernel. (I'm not sure the patch is harmless)

---
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index fc5368a..88a0972 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -82,7 +82,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst,
struct net_device *dev,
 	rt->rt_key_dst = fl4->daddr;
 	rt->rt_key_src = fl4->saddr;
 	rt->rt_key_tos = fl4->flowi4_tos;
-	rt->rt_route_iif = fl4->flowi4_iif;
 	rt->rt_iif = fl4->flowi4_iif;
 	rt->rt_oif = fl4->flowi4_oif;
 	rt->rt_mark = fl4->flowi4_mark;

^ permalink raw reply related

* Re: IPsec performance bug
From: Eric Dumazet @ 2011-10-21  8:49 UTC (permalink / raw)
  To: Yan, Zheng; +Cc: Kim Phillips, netdev, davem, hirofumi
In-Reply-To: <CAAM7YAkxP06_=y1wg4P1JhPDWhZgRM6+wbFQRG5PFkAq8vgsTw@mail.gmail.com>

Le vendredi 21 octobre 2011 à 16:28 +0800, Yan, Zheng a écrit :
> On Thu, Oct 20, 2011 at 10:22 AM, Kim Phillips
> <kim.phillips@freescale.com> wrote:
> > Hi,
> >
> > I'm trying to debug an IPSec forwarding performance slowdown on a
> > p2020 dual-core powerpc linux box using s/w crypto (no crypto h/w
> > offload enabled) between vanilla kernel versions 2.6.35 and 3.0.
> > Using a h/w packet generator set to 64-byte packets, I get the
> > following results:
> >
> > v2.6.35: 48.5kpps
> > v3.0.0:   0.2kpps
> > v3.0.7:   0.2kpps
> > v3.1.0-rc9-01707-gf7ba35d (a recent net-next): 13.6kpps
> >
> > I was able to bisect the problem down to the following commit:
> >
> > 7e1dc7b6f709dfc1a9ab4b320dbe723f45992693 is the first bad commit
> > commit 7e1dc7b6f709dfc1a9ab4b320dbe723f45992693
> > Author: David S. Miller <davem@davemloft.net>
> > Date:   Sat Mar 12 02:42:11 2011 -0500
> >
> >    net: Use flowi4 and flowi6 in xfrm layer.
> >
> >    Signed-off-by: David S. Miller <davem@davemloft.net>
> >
> > And, indeed, going back one commit (i.e., v2.6.38-rc8-1468-g2032656
> > and manually applying commit 7313714:  "xfrm: fix
> > __xfrm_route_forward ()"), brings performance back to ~50kpps from
> > 0.2kpps.
> >
> > Tracing shows that the commit breaks the route cache [1], and I
> > understand there is major surgery going on in the area [2], so I
> > suppose my question is twofold:
> >
> > (a) was such a large performance drop to be expected for v3.0?
> >
> > (b) any ideas how to fix?  I don't know much about routing
> > internals, but in ip_route_input_common(), if I remove the input
> > interface comparison (rth->rt_route_iif ^ iif), I get some
> > performance back, but the system becomes unstable (it's booted over
> > nfs).
> >
> > Thanks,
> >
> > Kim
> >
> 
> Looks like xfrm4_fill_dst() reset rt->rt_route_iif to 0, it makes the
> comparison (rth->rt_route_iif ^ iif) in
> ip_route_input_common() return false.
> 
> Please try patch below. It improves the performance of 3.1-rc10
> kernel. (I'm not sure the patch is harmless)
> 
> ---
> diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
> index fc5368a..88a0972 100644
> --- a/net/ipv4/xfrm4_policy.c
> +++ b/net/ipv4/xfrm4_policy.c
> @@ -82,7 +82,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst,
> struct net_device *dev,
>  	rt->rt_key_dst = fl4->daddr;
>  	rt->rt_key_src = fl4->saddr;
>  	rt->rt_key_tos = fl4->flowi4_tos;
> -	rt->rt_route_iif = fl4->flowi4_iif;
>  	rt->rt_iif = fl4->flowi4_iif;
>  	rt->rt_oif = fl4->flowi4_oif;
>  	rt->rt_mark = fl4->flowi4_mark;
> --

Hmm, looks like 1b86a58f9d7ce4fe237 (ipv4: Fix "Set rt->rt_iif more
sanely on output routes.") assumed xfrm4_fill_dst() was used for input
routes.

^ permalink raw reply

* [PATCH V2 net-next] tcp: add const qualifiers where possible
From: Eric Dumazet @ 2011-10-21  8:58 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <1319185263.2338.1.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

Adding const qualifiers to pointers can ease code review, and spot some
bugs. It might allow compiler to optimize code further.

For example, is it legal to temporary write a null cksum into tcphdr
in tcp_md5_hash_header() ? I am afraid a sniffer could catch the
temporary null value...

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
V2: no need to change __sk_dst_get() prototype...

 include/net/secure_seq.h |    2 
 include/net/tcp.h        |   43 +++++++-------
 net/core/secure_seq.c    |    2 
 net/ipv4/syncookies.c    |    2 
 net/ipv4/tcp.c           |   18 +++---
 net/ipv4/tcp_input.c     |  110 ++++++++++++++++++-------------------
 net/ipv4/tcp_ipv4.c      |   26 ++++----
 net/ipv4/tcp_minisocks.c |    4 -
 net/ipv4/tcp_output.c    |   72 ++++++++++++------------
 net/ipv6/syncookies.c    |    6 +-
 net/ipv6/tcp_ipv6.c      |   27 ++++-----
 11 files changed, 160 insertions(+), 152 deletions(-)

diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index d97f689..c2e542b 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -10,7 +10,7 @@ extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
 				      __be16 dport);
 extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 					__be16 sport, __be16 dport);
-extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
+extern __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 					  __be16 sport, __be16 dport);
 extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 				       __be16 sport, __be16 dport);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0113d30..3edef0b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -327,9 +327,9 @@ extern int tcp_sendpage(struct sock *sk, struct page *page, int offset,
 			size_t size, int flags);
 extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
-				 struct tcphdr *th, unsigned len);
+				 const struct tcphdr *th, unsigned int len);
 extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			       struct tcphdr *th, unsigned len);
+			       const struct tcphdr *th, unsigned int len);
 extern void tcp_rcv_space_adjust(struct sock *sk);
 extern void tcp_cleanup_rbuf(struct sock *sk, int copied);
 extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
@@ -401,10 +401,10 @@ extern void tcp_set_keepalive(struct sock *sk, int val);
 extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req);
 extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		       size_t len, int nonblock, int flags, int *addr_len);
-extern void tcp_parse_options(struct sk_buff *skb,
-			      struct tcp_options_received *opt_rx, u8 **hvpp,
+extern void tcp_parse_options(const struct sk_buff *skb,
+			      struct tcp_options_received *opt_rx, const u8 **hvpp,
 			      int estab);
-extern u8 *tcp_parse_md5sig_option(struct tcphdr *th);
+extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 
 /*
  *	TCP v4 functions exported for the inet6 API
@@ -450,7 +450,7 @@ extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
 /* From net/ipv6/syncookies.c */
 extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
 #ifdef CONFIG_SYN_COOKIES
-extern __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb,
+extern __u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb,
 				     __u16 *mss);
 #else
 static inline __u32 cookie_v6_init_sequence(struct sock *sk,
@@ -522,7 +522,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 }
 
 /* tcp.c */
-extern void tcp_get_info(struct sock *, struct tcp_info *);
+extern void tcp_get_info(const struct sock *, struct tcp_info *);
 
 /* Read 'sendfile()'-style from a TCP socket */
 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
@@ -532,8 +532,8 @@ extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 
 extern void tcp_initialize_rcv_mss(struct sock *sk);
 
-extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
-extern int tcp_mss_to_mtu(struct sock *sk, int mss);
+extern int tcp_mtu_to_mss(const struct sock *sk, int pmtu);
+extern int tcp_mss_to_mtu(const struct sock *sk, int mss);
 extern void tcp_mtup_init(struct sock *sk);
 extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
 
@@ -574,7 +574,7 @@ static inline void tcp_fast_path_check(struct sock *sk)
 /* Compute the actual rto_min value */
 static inline u32 tcp_rto_min(struct sock *sk)
 {
-	struct dst_entry *dst = __sk_dst_get(sk);
+	const struct dst_entry *dst = __sk_dst_get(sk);
 	u32 rto_min = TCP_RTO_MIN;
 
 	if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
@@ -820,6 +820,7 @@ static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
 static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
+
 	if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
 		return tp->snd_ssthresh;
 	else
@@ -832,7 +833,7 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 #define tcp_verify_left_out(tp)	WARN_ON(tcp_left_out(tp) > tp->packets_out)
 
 extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
-extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
+extern __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst);
 
 /* Slow start with delack produces 3 packets of burst, so that
  * it is safe "de facto".  This will be the default - same as
@@ -861,7 +862,7 @@ static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss,
 
 static inline void tcp_check_probe_timer(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
 	if (!tp->packets_out && !icsk->icsk_pending)
@@ -1209,10 +1210,10 @@ extern struct tcp_md5sig_pool	*tcp_get_md5sig_pool(void);
 extern void tcp_put_md5sig_pool(void);
 
 extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *);
-extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *,
+extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *,
 				 unsigned header_len);
 extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp,
-			    struct tcp_md5sig_key *key);
+			    const struct tcp_md5sig_key *key);
 
 /* write queue abstraction */
 static inline void tcp_write_queue_purge(struct sock *sk)
@@ -1225,22 +1226,24 @@ static inline void tcp_write_queue_purge(struct sock *sk)
 	tcp_clear_all_retrans_hints(tcp_sk(sk));
 }
 
-static inline struct sk_buff *tcp_write_queue_head(struct sock *sk)
+static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
 {
 	return skb_peek(&sk->sk_write_queue);
 }
 
-static inline struct sk_buff *tcp_write_queue_tail(struct sock *sk)
+static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk)
 {
 	return skb_peek_tail(&sk->sk_write_queue);
 }
 
-static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_buff *skb)
+static inline struct sk_buff *tcp_write_queue_next(const struct sock *sk,
+						   const struct sk_buff *skb)
 {
 	return skb_queue_next(&sk->sk_write_queue, skb);
 }
 
-static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb)
+static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk,
+						   const struct sk_buff *skb)
 {
 	return skb_queue_prev(&sk->sk_write_queue, skb);
 }
@@ -1254,7 +1257,7 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu
 #define tcp_for_write_queue_from_safe(skb, tmp, sk)			\
 	skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
 
-static inline struct sk_buff *tcp_send_head(struct sock *sk)
+static inline struct sk_buff *tcp_send_head(const struct sock *sk)
 {
 	return sk->sk_send_head;
 }
@@ -1265,7 +1268,7 @@ static inline bool tcp_skb_is_last(const struct sock *sk,
 	return skb_queue_is_last(&sk->sk_write_queue, skb);
 }
 
-static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb)
+static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb)
 {
 	if (tcp_skb_is_last(sk, skb))
 		sk->sk_send_head = NULL;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 45329d7..025233d 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -35,7 +35,7 @@ static u32 seq_scale(u32 seq)
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr,
+__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 				   __be16 sport, __be16 dport)
 {
 	u32 secret[MD5_MESSAGE_BYTES / 4];
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 3bc5c8f..d7b89b1 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -265,7 +265,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 			     struct ip_options *opt)
 {
 	struct tcp_options_received tcp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct inet_request_sock *ireq;
 	struct tcp_request_sock *treq;
 	struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 132be08..704adad 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -374,7 +374,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 {
 	unsigned int mask;
 	struct sock *sk = sock->sk;
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	sock_poll_wait(file, sk_sleep(sk), wait);
 	if (sk->sk_state == TCP_LISTEN)
@@ -528,7 +528,7 @@ static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 	tp->pushed_seq = tp->write_seq;
 }
 
-static inline int forced_push(struct tcp_sock *tp)
+static inline int forced_push(const struct tcp_sock *tp)
 {
 	return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
 }
@@ -891,9 +891,9 @@ EXPORT_SYMBOL(tcp_sendpage);
 #define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
 #define TCP_OFF(sk)	(sk->sk_sndmsg_off)
 
-static inline int select_size(struct sock *sk, int sg)
+static inline int select_size(const struct sock *sk, int sg)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	int tmp = tp->mss_cache;
 
 	if (sg) {
@@ -2408,7 +2408,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 		   unsigned int optlen)
 {
-	struct inet_connection_sock *icsk = inet_csk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 
 	if (level != SOL_TCP)
 		return icsk->icsk_af_ops->setsockopt(sk, level, optname,
@@ -2430,9 +2430,9 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
 #endif
 
 /* Return information about state of tcp endpoint in API format. */
-void tcp_get_info(struct sock *sk, struct tcp_info *info)
+void tcp_get_info(const struct sock *sk, struct tcp_info *info)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now = tcp_time_stamp;
 
@@ -3010,7 +3010,7 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
 EXPORT_SYMBOL(tcp_md5_hash_header);
 
 int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
-			  struct sk_buff *skb, unsigned header_len)
+			  const struct sk_buff *skb, unsigned int header_len)
 {
 	struct scatterlist sg;
 	const struct tcphdr *tp = tcp_hdr(skb);
@@ -3043,7 +3043,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 }
 EXPORT_SYMBOL(tcp_md5_hash_skb_data);
 
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
+int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
 {
 	struct scatterlist sg;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 69a90b8..52b5c2d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -206,7 +206,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
 		tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
 }
 
-static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb)
 {
 	if (tcp_hdr(skb)->cwr)
 		tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
@@ -239,19 +239,19 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s
 	}
 }
 
-static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
+static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
 {
 	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
 		tp->ecn_flags &= ~TCP_ECN_OK;
 }
 
-static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
+static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th)
 {
 	if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
 		tp->ecn_flags &= ~TCP_ECN_OK;
 }
 
-static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
+static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th)
 {
 	if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
 		return 1;
@@ -315,7 +315,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb)
 	return 0;
 }
 
-static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -429,7 +429,7 @@ static void tcp_clamp_window(struct sock *sk)
  */
 void tcp_initialize_rcv_mss(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
 
 	hint = min(hint, tp->rcv_wnd / 2);
@@ -824,7 +824,7 @@ void tcp_update_metrics(struct sock *sk)
 	}
 }
 
-__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
+__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst)
 {
 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
@@ -1216,7 +1216,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
 		tp->lost_retrans_low = new_low_seq;
 }
 
-static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb,
+static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 			   struct tcp_sack_block_wire *sp, int num_sacks,
 			   u32 prior_snd_una)
 {
@@ -1310,7 +1310,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
 	return in_sack;
 }
 
-static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
+static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk,
 			  struct tcp_sacktag_state *state,
 			  int dup_sack, int pcount)
 {
@@ -1465,13 +1465,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
 /* I wish gso_size would have a bit more sane initialization than
  * something-or-zero which complicates things
  */
-static int tcp_skb_seglen(struct sk_buff *skb)
+static int tcp_skb_seglen(const struct sk_buff *skb)
 {
 	return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb);
 }
 
 /* Shifting pages past head area doesn't work */
-static int skb_can_shift(struct sk_buff *skb)
+static int skb_can_shift(const struct sk_buff *skb)
 {
 	return !skb_headlen(skb) && skb_is_nonlinear(skb);
 }
@@ -1720,19 +1720,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
 	return skb;
 }
 
-static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
+static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache)
 {
 	return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
 }
 
 static int
-tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
+tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
 			u32 prior_snd_una)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	unsigned char *ptr = (skb_transport_header(ack_skb) +
-			      TCP_SKB_CB(ack_skb)->sacked);
+	const unsigned char *ptr = (skb_transport_header(ack_skb) +
+				    TCP_SKB_CB(ack_skb)->sacked);
 	struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
 	struct tcp_sack_block sp[TCP_NUM_SACKS];
 	struct tcp_sack_block *cache;
@@ -2296,7 +2296,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag)
 	return 0;
 }
 
-static inline int tcp_fackets_out(struct tcp_sock *tp)
+static inline int tcp_fackets_out(const struct tcp_sock *tp)
 {
 	return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
 }
@@ -2316,19 +2316,20 @@ static inline int tcp_fackets_out(struct tcp_sock *tp)
  * they differ. Since neither occurs due to loss, TCP should really
  * ignore them.
  */
-static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
+static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
 {
 	return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
 }
 
-static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_skb_timedout(const struct sock *sk,
+				   const struct sk_buff *skb)
 {
 	return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
 }
 
-static inline int tcp_head_timedout(struct sock *sk)
+static inline int tcp_head_timedout(const struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	return tp->packets_out &&
 	       tcp_skb_timedout(sk, tcp_write_queue_head(sk));
@@ -2639,7 +2640,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag)
 /* Nothing was retransmitted or returned timestamp is less
  * than timestamp of the first retransmission.
  */
-static inline int tcp_packet_delayed(struct tcp_sock *tp)
+static inline int tcp_packet_delayed(const struct tcp_sock *tp)
 {
 	return !tp->retrans_stamp ||
 		(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -2700,7 +2701,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static inline int tcp_may_undo(struct tcp_sock *tp)
+static inline int tcp_may_undo(const struct tcp_sock *tp)
 {
 	return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
 }
@@ -2764,9 +2765,9 @@ static void tcp_try_undo_dsack(struct sock *sk)
  * that successive retransmissions of a segment must not advance
  * retrans_stamp under any conditions.
  */
-static int tcp_any_retrans_done(struct sock *sk)
+static int tcp_any_retrans_done(const struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 
 	if (tp->retrans_out)
@@ -3245,7 +3246,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
  */
 static void tcp_rearm_rto(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	if (!tp->packets_out) {
 		inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -3497,7 +3498,7 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
  * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
  * and in FreeBSD. NetBSD's one is even worse.) is wrong.
  */
-static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
+static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack,
 				 u32 ack_seq)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -3673,7 +3674,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
 }
 
 /* This routine deals with incoming acks, but not outgoing ones. */
-static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
+static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -3810,14 +3811,14 @@ old_ack:
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
  */
-void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
-		       u8 **hvpp, int estab)
+void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
+		       const u8 **hvpp, int estab)
 {
-	unsigned char *ptr;
-	struct tcphdr *th = tcp_hdr(skb);
+	const unsigned char *ptr;
+	const struct tcphdr *th = tcp_hdr(skb);
 	int length = (th->doff * 4) - sizeof(struct tcphdr);
 
-	ptr = (unsigned char *)(th + 1);
+	ptr = (const unsigned char *)(th + 1);
 	opt_rx->saw_tstamp = 0;
 
 	while (length > 0) {
@@ -3928,9 +3929,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 }
 EXPORT_SYMBOL(tcp_parse_options);
 
-static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
+static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th)
 {
-	__be32 *ptr = (__be32 *)(th + 1);
+	const __be32 *ptr = (const __be32 *)(th + 1);
 
 	if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
 			  | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
@@ -3947,8 +3948,9 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
 /* Fast parse options. This hopes to only see timestamps.
  * If it is wrong it falls back on tcp_parse_options().
  */
-static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
-				  struct tcp_sock *tp, u8 **hvpp)
+static int tcp_fast_parse_options(const struct sk_buff *skb,
+				  const struct tcphdr *th,
+				  struct tcp_sock *tp, const u8 **hvpp)
 {
 	/* In the spirit of fast parsing, compare doff directly to constant
 	 * values.  Because equality is used, short doff can be ignored here.
@@ -3969,10 +3971,10 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
 /*
  * Parse MD5 Signature option
  */
-u8 *tcp_parse_md5sig_option(struct tcphdr *th)
+const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
 {
-	int length = (th->doff << 2) - sizeof (*th);
-	u8 *ptr = (u8*)(th + 1);
+	int length = (th->doff << 2) - sizeof(*th);
+	const u8 *ptr = (const u8 *)(th + 1);
 
 	/* If the TCP option is too short, we can short cut */
 	if (length < TCPOLEN_MD5SIG)
@@ -4049,8 +4051,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
 
 static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 
@@ -4089,7 +4091,7 @@ static inline int tcp_paws_discard(const struct sock *sk,
  * (borrowed from freebsd)
  */
 
-static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq)
+static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq)
 {
 	return	!before(end_seq, tp->rcv_wup) &&
 		!after(seq, tp->rcv_nxt + tcp_receive_window(tp));
@@ -4246,7 +4248,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq)
 		tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
 }
 
-static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
+static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -4433,7 +4435,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	struct tcp_sock *tp = tcp_sk(sk);
 	int eaten = -1;
 
@@ -4917,9 +4919,9 @@ void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static int tcp_should_expand_sndbuf(struct sock *sk)
+static int tcp_should_expand_sndbuf(const struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	/* If the user specified a specific send buffer setting, do
 	 * not modify it.
@@ -5028,7 +5030,7 @@ static inline void tcp_ack_snd_check(struct sock *sk)
  *	either form (or just set the sysctl tcp_stdurg).
  */
 
-static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
+static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 ptr = ntohs(th->urg_ptr);
@@ -5094,7 +5096,7 @@ static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
 }
 
 /* This is the 'fast' part of urgent handling. */
-static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
+static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -5215,9 +5217,9 @@ out:
  * play significant role here.
  */
 static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
-			      struct tcphdr *th, int syn_inerr)
+			      const struct tcphdr *th, int syn_inerr)
 {
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* RFC1323: H1. Apply PAWS check first. */
@@ -5298,7 +5300,7 @@ discard:
  *	tcp_data_queue when everything is OK.
  */
 int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
-			struct tcphdr *th, unsigned len)
+			const struct tcphdr *th, unsigned int len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	int res;
@@ -5509,9 +5511,9 @@ discard:
 EXPORT_SYMBOL(tcp_rcv_established);
 
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
-					 struct tcphdr *th, unsigned len)
+					 const struct tcphdr *th, unsigned int len)
 {
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
@@ -5786,7 +5788,7 @@ reset_and_undo:
  */
 
 int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
-			  struct tcphdr *th, unsigned len)
+			  const struct tcphdr *th, unsigned int len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 48da7cc..955c925 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -104,7 +104,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 struct inet_hashinfo tcp_hashinfo;
 EXPORT_SYMBOL(tcp_hashinfo);
 
-static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
+static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
 {
 	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
 					  ip_hdr(skb)->saddr,
@@ -552,7 +552,7 @@ static void __tcp_v4_send_check(struct sk_buff *skb,
 /* This routine computes an IPv4 TCP checksum. */
 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 {
-	struct inet_sock *inet = inet_sk(sk);
+	const struct inet_sock *inet = inet_sk(sk);
 
 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 }
@@ -590,7 +590,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 
 static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
 #ifdef CONFIG_TCP_MD5SIG
@@ -668,7 +668,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 			    struct tcp_md5sig_key *key,
 			    int reply_flags)
 {
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
 		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
@@ -1182,10 +1182,10 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 	 * o MD5 hash and we're not expecting one.
 	 * o MD5 hash and its wrong.
 	 */
-	__u8 *hash_location = NULL;
+	const __u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
 	const struct iphdr *iph = ip_hdr(skb);
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	int genhash;
 	unsigned char newhash[16];
 
@@ -1248,7 +1248,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_extend_values tmp_ext;
 	struct tcp_options_received tmp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct request_sock *req;
 	struct inet_request_sock *ireq;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1645,7 +1645,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv);
 int tcp_v4_rcv(struct sk_buff *skb)
 {
 	const struct iphdr *iph;
-	struct tcphdr *th;
+	const struct tcphdr *th;
 	struct sock *sk;
 	int ret;
 	struct net *net = dev_net(skb->dev);
@@ -1809,7 +1809,7 @@ EXPORT_SYMBOL(tcp_v4_get_peer);
 
 void *tcp_v4_tw_get_peer(struct sock *sk)
 {
-	struct inet_timewait_sock *tw = inet_twsk(sk);
+	const struct inet_timewait_sock *tw = inet_twsk(sk);
 
 	return inet_getpeer_v4(tw->tw_daddr, 1);
 }
@@ -2381,7 +2381,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
 }
 EXPORT_SYMBOL(tcp_proc_unregister);
 
-static void get_openreq4(struct sock *sk, struct request_sock *req,
+static void get_openreq4(const struct sock *sk, const struct request_sock *req,
 			 struct seq_file *f, int i, int uid, int *len)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
@@ -2411,9 +2411,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 {
 	int timer_active;
 	unsigned long timer_expires;
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct inet_sock *inet = inet_sk(sk);
+	const struct inet_sock *inet = inet_sk(sk);
 	__be32 dest = inet->inet_daddr;
 	__be32 src = inet->inet_rcv_saddr;
 	__u16 destp = ntohs(inet->inet_dport);
@@ -2462,7 +2462,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 		len);
 }
 
-static void get_timewait4_sock(struct inet_timewait_sock *tw,
+static void get_timewait4_sock(const struct inet_timewait_sock *tw,
 			       struct seq_file *f, int i, int *len)
 {
 	__be32 dest, src;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index d2fe4e0..b767a95 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -141,7 +141,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			   const struct tcphdr *th)
 {
 	struct tcp_options_received tmp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	int paws_reject = 0;
 
@@ -566,7 +566,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   struct request_sock **prev)
 {
 	struct tcp_options_received tmp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct sock *child;
 	const struct tcphdr *th = tcp_hdr(skb);
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ed96c54..980b98f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
 
 
 /* Account for new data that has been sent to the network. */
-static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
+static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int prior_packets = tp->packets_out;
@@ -89,9 +89,9 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
  * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
  * invalid. OK, let's make this for now:
  */
-static inline __u32 tcp_acceptable_seq(struct sock *sk)
+static inline __u32 tcp_acceptable_seq(const struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	if (!before(tcp_wnd_end(tp), tp->snd_nxt))
 		return tp->snd_nxt;
@@ -116,7 +116,7 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk)
 static __u16 tcp_advertise_mss(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
+	const struct dst_entry *dst = __sk_dst_get(sk);
 	int mss = tp->advmss;
 
 	if (dst) {
@@ -133,7 +133,7 @@ static __u16 tcp_advertise_mss(struct sock *sk)
 
 /* RFC2861. Reset CWND after idle period longer RTO to "restart window".
  * This is the first part of cwnd validation mechanism. */
-static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
+static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	s32 delta = tcp_time_stamp - tp->lsndtime;
@@ -154,7 +154,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
 
 /* Congestion state accounting after a packet has been sent. */
 static void tcp_event_data_sent(struct tcp_sock *tp,
-				struct sk_buff *skb, struct sock *sk)
+				struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	const u32 now = tcp_time_stamp;
@@ -295,7 +295,7 @@ static u16 tcp_select_window(struct sock *sk)
 }
 
 /* Packet ECN state for a SYN-ACK */
-static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
+static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb)
 {
 	TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
 	if (!(tp->ecn_flags & TCP_ECN_OK))
@@ -315,7 +315,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 }
 
 static __inline__ void
-TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
+TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th)
 {
 	if (inet_rsk(req)->ecn_ok)
 		th->ece = 1;
@@ -565,7 +565,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
  */
 static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_out_options *opts,
-				struct tcp_md5sig_key **md5) {
+				struct tcp_md5sig_key **md5)
+{
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_cookie_values *cvp = tp->cookie_values;
 	unsigned remaining = MAX_TCP_OPTION_SPACE;
@@ -743,7 +744,8 @@ static unsigned tcp_synack_options(struct sock *sk,
  */
 static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
 					struct tcp_out_options *opts,
-					struct tcp_md5sig_key **md5) {
+					struct tcp_md5sig_key **md5)
+{
 	struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned size = 0;
@@ -893,7 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
 	if (skb->len != tcp_header_size)
-		tcp_event_data_sent(tp, skb, sk);
+		tcp_event_data_sent(tp, sk);
 
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
 		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
@@ -926,7 +928,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 }
 
 /* Initialize TSO segments for a packet. */
-static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
+static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
 				 unsigned int mss_now)
 {
 	if (skb->len <= mss_now || !sk_can_gso(sk) ||
@@ -947,7 +949,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
 /* When a modification to fackets out becomes necessary, we need to check
  * skb is counted to fackets_out or not.
  */
-static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
+static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
 				   int decr)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -962,7 +964,7 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
 /* Pcount in the middle of the write queue got changed, we need to do various
  * tweaks to fix counters
  */
-static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
+static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
@@ -1146,10 +1148,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 }
 
 /* Calculate MSS. Not accounting for SACKs here.  */
-int tcp_mtu_to_mss(struct sock *sk, int pmtu)
+int tcp_mtu_to_mss(const struct sock *sk, int pmtu)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct inet_connection_sock *icsk = inet_csk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	int mss_now;
 
 	/* Calculate base mss without TCP options:
@@ -1175,10 +1177,10 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
 }
 
 /* Inverse of above */
-int tcp_mss_to_mtu(struct sock *sk, int mss)
+int tcp_mss_to_mtu(const struct sock *sk, int mss)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct inet_connection_sock *icsk = inet_csk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	int mtu;
 
 	mtu = mss +
@@ -1252,8 +1254,8 @@ EXPORT_SYMBOL(tcp_sync_mss);
  */
 unsigned int tcp_current_mss(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct dst_entry *dst = __sk_dst_get(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct dst_entry *dst = __sk_dst_get(sk);
 	u32 mss_now;
 	unsigned header_len;
 	struct tcp_out_options opts;
@@ -1313,10 +1315,10 @@ static void tcp_cwnd_validate(struct sock *sk)
  * modulo only when the receiver window alone is the limiting factor or
  * when we would be allowed to send the split-due-to-Nagle skb fully.
  */
-static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
+static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
 					unsigned int mss_now, unsigned int cwnd)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	u32 needed, window, cwnd_len;
 
 	window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
@@ -1336,8 +1338,8 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
 /* Can at least one segment of SKB be sent right now, according to the
  * congestion window rules?  If so, return how many segments are allowed.
  */
-static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
-					 struct sk_buff *skb)
+static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
+					 const struct sk_buff *skb)
 {
 	u32 in_flight, cwnd;
 
@@ -1358,7 +1360,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
  * This must be invoked the first time we consider transmitting
  * SKB onto the wire.
  */
-static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
+static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
 			     unsigned int mss_now)
 {
 	int tso_segs = tcp_skb_pcount(skb);
@@ -1396,7 +1398,7 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp,
 /* Return non-zero if the Nagle test allows this packet to be
  * sent now.
  */
-static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
+static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb,
 				 unsigned int cur_mss, int nonagle)
 {
 	/* Nagle rule does not apply to frames, which sit in the middle of the
@@ -1422,7 +1424,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
 }
 
 /* Does at least the first segment of SKB fit into the send window? */
-static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
+static inline int tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb,
 				   unsigned int cur_mss)
 {
 	u32 end_seq = TCP_SKB_CB(skb)->end_seq;
@@ -1437,10 +1439,10 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
  * should be put on the wire right now.  If so, it returns the number of
  * packets allowed by the congestion window.
  */
-static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
+static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
 				 unsigned int cur_mss, int nonagle)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int cwnd_quota;
 
 	tcp_init_tso_segs(sk, skb, cur_mss);
@@ -1458,7 +1460,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
 /* Test if sending is allowed right now. */
 int tcp_may_send_now(struct sock *sk)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb = tcp_send_head(sk);
 
 	return skb &&
@@ -2008,7 +2010,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 }
 
 /* Check if coalescing SKBs is legal. */
-static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
+static int tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
 {
 	if (tcp_skb_pcount(skb) > 1)
 		return 0;
@@ -2184,7 +2186,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 static int tcp_can_forward_retransmit(struct sock *sk)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk);
 
 	/* Forward retransmissions are possible only during Recovery. */
 	if (icsk->icsk_ca_state != TCP_CA_Recovery)
@@ -2550,7 +2552,7 @@ EXPORT_SYMBOL(tcp_make_synack);
 /* Do all connect socket setups that can be done AF independent. */
 static void tcp_connect_init(struct sock *sk)
 {
-	struct dst_entry *dst = __sk_dst_get(sk);
+	const struct dst_entry *dst = __sk_dst_get(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u8 rcv_wscale;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index ac83896..5a0d664 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -115,7 +115,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,
 		& COOKIEMASK;
 }
 
-__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	const struct tcphdr *th = tcp_hdr(skb);
@@ -137,7 +137,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 				     jiffies / (HZ * 60), mssind);
 }
 
-static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	const struct tcphdr *th = tcp_hdr(skb);
@@ -152,7 +152,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tcp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct inet_request_sock *ireq;
 	struct inet6_request_sock *ireq6;
 	struct tcp_request_sock *treq;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5357902..da2ada8 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -114,7 +114,7 @@ static __inline__ __sum16 tcp_v6_check(int len,
 	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
 }
 
-static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
+static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
 {
 	return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
 					    ipv6_hdr(skb)->saddr.s6_addr32,
@@ -844,7 +844,7 @@ clear_hash_noput:
 
 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 {
-	__u8 *hash_location = NULL;
+	const __u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct tcphdr *th = tcp_hdr(skb);
@@ -980,7 +980,8 @@ static int tcp6_gro_complete(struct sk_buff *skb)
 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 				 u32 ts, struct tcp_md5sig_key *key, int rst)
 {
-	struct tcphdr *th = tcp_hdr(skb), *t1;
+	const struct tcphdr *th = tcp_hdr(skb);
+	struct tcphdr *t1;
 	struct sk_buff *buff;
 	struct flowi6 fl6;
 	struct net *net = dev_net(skb_dst(skb)->dev);
@@ -1070,7 +1071,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 
 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcphdr *th = tcp_hdr(skb);
+	const struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = 0, ack_seq = 0;
 	struct tcp_md5sig_key *key = NULL;
 
@@ -1160,7 +1161,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_extend_values tmp_ext;
 	struct tcp_options_received tmp_opt;
-	u8 *hash_location;
+	const u8 *hash_location;
 	struct request_sock *req;
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1688,7 +1689,7 @@ ipv6_pktoptions:
 
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
-	struct tcphdr *th;
+	const struct tcphdr *th;
 	const struct ipv6hdr *hdr;
 	struct sock *sk;
 	int ret;
@@ -1856,8 +1857,8 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
 
 static void *tcp_v6_tw_get_peer(struct sock *sk)
 {
-	struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
-	struct inet_timewait_sock *tw = inet_twsk(sk);
+	const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
+	const struct inet_timewait_sock *tw = inet_twsk(sk);
 
 	if (tw->tw_family == AF_INET)
 		return tcp_v4_tw_get_peer(sk);
@@ -2012,7 +2013,7 @@ static void tcp_v6_destroy_sock(struct sock *sk)
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCPv6 sock list dumping. */
 static void get_openreq6(struct seq_file *seq,
-			 struct sock *sk, struct request_sock *req, int i, int uid)
+			 const struct sock *sk, struct request_sock *req, int i, int uid)
 {
 	int ttd = req->expires - jiffies;
 	const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
@@ -2048,10 +2049,10 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 	__u16 destp, srcp;
 	int timer_active;
 	unsigned long timer_expires;
-	struct inet_sock *inet = inet_sk(sp);
-	struct tcp_sock *tp = tcp_sk(sp);
+	const struct inet_sock *inet = inet_sk(sp);
+	const struct tcp_sock *tp = tcp_sk(sp);
 	const struct inet_connection_sock *icsk = inet_csk(sp);
-	struct ipv6_pinfo *np = inet6_sk(sp);
+	const struct ipv6_pinfo *np = inet6_sk(sp);
 
 	dest  = &np->daddr;
 	src   = &np->rcv_saddr;
@@ -2103,7 +2104,7 @@ static void get_timewait6_sock(struct seq_file *seq,
 {
 	const struct in6_addr *dest, *src;
 	__u16 destp, srcp;
-	struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
+	const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
 	int ttd = tw->tw_ttd - jiffies;
 
 	if (ttd < 0)

^ permalink raw reply related

* Re: IPsec performance bug
From: Yan, Zheng @ 2011-10-21  9:02 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Kim Phillips, netdev, davem, hirofumi
In-Reply-To: <1319186986.2338.3.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

On 10/21/2011 04:49 PM, Eric Dumazet wrote:
> Le vendredi 21 octobre 2011 à 16:28 +0800, Yan, Zheng a écrit :
>> On Thu, Oct 20, 2011 at 10:22 AM, Kim Phillips
>> <kim.phillips@freescale.com> wrote:
>>> Hi,
>>>
>>> I'm trying to debug an IPSec forwarding performance slowdown on a
>>> p2020 dual-core powerpc linux box using s/w crypto (no crypto h/w
>>> offload enabled) between vanilla kernel versions 2.6.35 and 3.0.
>>> Using a h/w packet generator set to 64-byte packets, I get the
>>> following results:
>>>
>>> v2.6.35: 48.5kpps
>>> v3.0.0:   0.2kpps
>>> v3.0.7:   0.2kpps
>>> v3.1.0-rc9-01707-gf7ba35d (a recent net-next): 13.6kpps
>>>
>>> I was able to bisect the problem down to the following commit:
>>>
>>> 7e1dc7b6f709dfc1a9ab4b320dbe723f45992693 is the first bad commit
>>> commit 7e1dc7b6f709dfc1a9ab4b320dbe723f45992693
>>> Author: David S. Miller <davem@davemloft.net>
>>> Date:   Sat Mar 12 02:42:11 2011 -0500
>>>
>>>    net: Use flowi4 and flowi6 in xfrm layer.
>>>
>>>    Signed-off-by: David S. Miller <davem@davemloft.net>
>>>
>>> And, indeed, going back one commit (i.e., v2.6.38-rc8-1468-g2032656
>>> and manually applying commit 7313714:  "xfrm: fix
>>> __xfrm_route_forward ()"), brings performance back to ~50kpps from
>>> 0.2kpps.
>>>
>>> Tracing shows that the commit breaks the route cache [1], and I
>>> understand there is major surgery going on in the area [2], so I
>>> suppose my question is twofold:
>>>
>>> (a) was such a large performance drop to be expected for v3.0?
>>>
>>> (b) any ideas how to fix?  I don't know much about routing
>>> internals, but in ip_route_input_common(), if I remove the input
>>> interface comparison (rth->rt_route_iif ^ iif), I get some
>>> performance back, but the system becomes unstable (it's booted over
>>> nfs).
>>>
>>> Thanks,
>>>
>>> Kim
>>>
>>
>> Looks like xfrm4_fill_dst() reset rt->rt_route_iif to 0, it makes the
>> comparison (rth->rt_route_iif ^ iif) in
>> ip_route_input_common() return false.
>>
>> Please try patch below. It improves the performance of 3.1-rc10
>> kernel. (I'm not sure the patch is harmless)
>>
>> ---
>> diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
>> index fc5368a..88a0972 100644
>> --- a/net/ipv4/xfrm4_policy.c
>> +++ b/net/ipv4/xfrm4_policy.c
>> @@ -82,7 +82,6 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst,
>> struct net_device *dev,
>>  	rt->rt_key_dst = fl4->daddr;
>>  	rt->rt_key_src = fl4->saddr;
>>  	rt->rt_key_tos = fl4->flowi4_tos;
>> -	rt->rt_route_iif = fl4->flowi4_iif;
>>  	rt->rt_iif = fl4->flowi4_iif;
>>  	rt->rt_oif = fl4->flowi4_oif;
>>  	rt->rt_mark = fl4->flowi4_mark;
>> --
> 
> Hmm, looks like 1b86a58f9d7ce4fe237 (ipv4: Fix "Set rt->rt_iif more
> sanely on output routes.") assumed xfrm4_fill_dst() was used for input
> routes.
> 

xfrm4_fill_dst() is used for input routes. The problem is fl4->flowi4_iif is zero.
I don't know why xfrm_decode_session() does not set fl4->flowi4_iif.

Regards
Yan, Zheng

^ permalink raw reply

* Re: [PATCH V2 net-next] tcp: add const qualifiers where possible
From: David Miller @ 2011-10-21  9:31 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev
In-Reply-To: <1319187493.2338.6.camel@edumazet-HP-Compaq-6005-Pro-SFF-PC>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 21 Oct 2011 10:58:13 +0200

> Adding const qualifiers to pointers can ease code review, and spot some
> bugs. It might allow compiler to optimize code further.
> 
> For example, is it legal to temporary write a null cksum into tcphdr
> in tcp_md5_hash_header() ? I am afraid a sniffer could catch the
> temporary null value...
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
> ---
> V2: no need to change __sk_dst_get() prototype...

Applied.

^ permalink raw reply

* [PATCH net-next] ipv4: tcp: fix TOS value in ACK messages sent from TIME_WAIT
From: Eric Dumazet @ 2011-10-21 10:29 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, MuraliRaja Muniraju, Andi Kleen, KOVACS Krisztian
In-Reply-To: <1318393869.3686.26.camel@edumazet-laptop>

There is a long standing bug in linux tcp stack, about ACK messages sent
on behalf of TIME_WAIT sockets.

In the IP header of the ACK message, we choose to reflect TOS field of
incoming message, and this might break some setups.

Example of things that were broken :
  - Routing using TOS as a selector
  - Firewalls
  - Trafic classification / shaping

We now remember in timewait structure the inet tos field and use it in
ACK generation, and route lookup.

Notes :
 - We still reflect incoming TOS in RST messages.
 - We could extend MuraliRaja Muniraju patch to report TOS value in
netlink messages for TIME_WAIT sockets.
 - A patch is needed for IPv6

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: MuraliRaja Muniraju <muralira@google.com>
CC: Andi Kleen <ak@linux.intel.com>
CC: KOVACS Krisztian <hidden@balabit.hu>
---
 include/net/inet_timewait_sock.h |    3 ++-
 include/net/ip.h                 |    3 ++-
 net/ipv4/inet_timewait_sock.c    |    1 +
 net/ipv4/ip_output.c             |    6 +++---
 net/ipv4/tcp_ipv4.c              |   11 +++++++----
 5 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index f1a7709..180231c 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -126,7 +126,8 @@ struct inet_timewait_sock {
 	/* And these are ours. */
 	unsigned int		tw_ipv6only     : 1,
 				tw_transparent  : 1,
-				tw_pad		: 14,	/* 14 bits hole */
+				tw_pad		: 6,	/* 6 bits hole */
+				tw_tos		: 8,
 				tw_ipv6_offset  : 16;
 	kmemcheck_bitfield_end(flags);
 	unsigned long		tw_ttd;
diff --git a/include/net/ip.h b/include/net/ip.h
index c7e066a..eca0ef7 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -165,6 +165,7 @@ struct ip_reply_arg {
 	int	    csumoffset; /* u16 offset of csum in iov[0].iov_base */
 				/* -1 if not needed */ 
 	int	    bound_dev_if;
+	u8  	    tos;
 }; 
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
@@ -175,7 +176,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
 }
 
 void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
-		   struct ip_reply_arg *arg, unsigned int len);
+		   const struct ip_reply_arg *arg, unsigned int len);
 
 struct ipv4_config {
 	int	log_martians;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 3c8dfa1..44d65d5 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -183,6 +183,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
 		tw->tw_daddr	    = inet->inet_daddr;
 		tw->tw_rcv_saddr    = inet->inet_rcv_saddr;
 		tw->tw_bound_dev_if = sk->sk_bound_dev_if;
+		tw->tw_tos	    = inet->tos;
 		tw->tw_num	    = inet->inet_num;
 		tw->tw_state	    = TCP_TIME_WAIT;
 		tw->tw_substate	    = state;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e1374ab..0bc95f3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1466,7 +1466,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
  *     	structure to pass arguments.
  */
 void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
-		   struct ip_reply_arg *arg, unsigned int len)
+		   const struct ip_reply_arg *arg, unsigned int len)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_options_data replyopts;
@@ -1489,7 +1489,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
 	}
 
 	flowi4_init_output(&fl4, arg->bound_dev_if, 0,
-			   RT_TOS(ip_hdr(skb)->tos),
+			   RT_TOS(arg->tos),
 			   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 			   ip_reply_arg_flowi_flags(arg),
 			   daddr, rt->rt_spec_dst,
@@ -1506,7 +1506,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
 	   with locally disabled BH and that sk cannot be already spinlocked.
 	 */
 	bh_lock_sock(sk);
-	inet->tos = ip_hdr(skb)->tos;
+	inet->tos = arg->tos;
 	sk->sk_priority = skb->priority;
 	sk->sk_protocol = ip_hdr(skb)->protocol;
 	sk->sk_bound_dev_if = arg->bound_dev_if;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 955c925..16687b3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -652,6 +652,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
 
 	net = dev_net(skb_dst(skb)->dev);
+	arg.tos = ip_hdr(skb)->tos;
 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
 		      &arg, arg.iov[0].iov_len);
 
@@ -666,7 +667,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 			    u32 win, u32 ts, int oif,
 			    struct tcp_md5sig_key *key,
-			    int reply_flags)
+			    int reply_flags, u8 tos)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct {
@@ -726,7 +727,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
 	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
 	if (oif)
 		arg.bound_dev_if = oif;
-
+	arg.tos = tos;
 	ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
 		      &arg, arg.iov[0].iov_len);
 
@@ -743,7 +744,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 			tcptw->tw_ts_recent,
 			tw->tw_bound_dev_if,
 			tcp_twsk_md5_key(tcptw),
-			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
+			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
+			tw->tw_tos
 			);
 
 	inet_twsk_put(tw);
@@ -757,7 +759,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 			req->ts_recent,
 			0,
 			tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
-			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
+			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
+			ip_hdr(skb)->tos);
 }
 
 /*

^ permalink raw reply related

* [net-next 0/5][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2011-10-21 10:31 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, sassmann

The following series contains updates to igbvf and igb.
This version of the series contains the following changes:

- igb 2 fixes (VFTA table, Alt. MAC addr) and move DMA coalescing init
  code into separate function
- igbvf bump version and update identification strings

The following are changes since commit :
  
and are available in the git repository at
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next.git

Carolyn Wyborny (3):
  igb: Fix for Alt MAC Address feature on 82580 and later devices
  igb: Move DMA Coalescing init code to separate function.
  igb: VFTA Table Fix for i350 devices

Williams, Mitch A (2):
  igbvf: Update module identification strings
  igbvf: Bump version number

 drivers/net/ethernet/intel/igb/e1000_82575.c |    5 +-
 drivers/net/ethernet/intel/igb/e1000_mac.c   |   63 ++++++++++++-
 drivers/net/ethernet/intel/igb/e1000_mac.h   |    1 +
 drivers/net/ethernet/intel/igb/igb.h         |    1 +
 drivers/net/ethernet/intel/igb/igb_main.c    |  130 +++++++++++++++-----------
 drivers/net/ethernet/intel/igbvf/netdev.c    |   14 ++-
 6 files changed, 148 insertions(+), 66 deletions(-)

-- 
1.7.6.4

^ permalink raw reply

* [net-next 1/5] igbvf: Update module identification strings
From: Jeff Kirsher @ 2011-10-21 10:31 UTC (permalink / raw)
  To: davem; +Cc: Williams, Mitch A, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1319193121-2729-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: "Williams, Mitch A" <mitch.a.williams@intel.com>

Update adapter identification strings to properly indicate i350 VF devices
in the VF driver. Change the driver ID string to remove 82576-specific
wording. Update copyright date.

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igbvf/netdev.c |   12 +++++++-----
 1 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index db29817..10e3c9c 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -49,9 +49,9 @@
 char igbvf_driver_name[] = "igbvf";
 const char igbvf_driver_version[] = DRV_VERSION;
 static const char igbvf_driver_string[] =
-				"Intel(R) Virtual Function Network Driver";
+		  "Intel(R) Gigabit Virtual Function Network Driver";
 static const char igbvf_copyright[] =
-				"Copyright (c) 2009 - 2010 Intel Corporation.";
+		  "Copyright (c) 2009 - 2011 Intel Corporation.";
 
 static int igbvf_poll(struct napi_struct *napi, int budget);
 static void igbvf_reset(struct igbvf_adapter *);
@@ -2525,9 +2525,11 @@ static void igbvf_print_device_info(struct igbvf_adapter *adapter)
 	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
 
-	dev_info(&pdev->dev, "Intel(R) 82576 Virtual Function\n");
+	if (hw->mac.type == e1000_vfadapt_i350)
+		dev_info(&pdev->dev, "Intel(R) I350 Virtual Function\n");
+	else
+		dev_info(&pdev->dev, "Intel(R) 82576 Virtual Function\n");
 	dev_info(&pdev->dev, "Address: %pM\n", netdev->dev_addr);
-	dev_info(&pdev->dev, "MAC: %d\n", hw->mac.type);
 }
 
 static int igbvf_set_features(struct net_device *netdev, u32 features)
@@ -2864,7 +2866,7 @@ module_exit(igbvf_exit_module);
 
 
 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
-MODULE_DESCRIPTION("Intel(R) 82576 Virtual Function Network Driver");
+MODULE_DESCRIPTION("Intel(R) Gigabit Virtual Function Network Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 2/5] igbvf: Bump version number
From: Jeff Kirsher @ 2011-10-21 10:31 UTC (permalink / raw)
  To: davem; +Cc: Williams, Mitch A, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1319193121-2729-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: "Williams, Mitch A" <mitch.a.williams@intel.com>

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Tested-by: Sibai Li <sibai.li@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igbvf/netdev.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 10e3c9c..cca7812 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -45,7 +45,7 @@
 
 #include "igbvf.h"
 
-#define DRV_VERSION "2.0.0-k"
+#define DRV_VERSION "2.0.1-k"
 char igbvf_driver_name[] = "igbvf";
 const char igbvf_driver_version[] = DRV_VERSION;
 static const char igbvf_driver_string[] =
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 4/5] igb: Move DMA Coalescing init code to separate function.
From: Jeff Kirsher @ 2011-10-21 10:32 UTC (permalink / raw)
  To: davem; +Cc: Carolyn Wyborny, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1319193121-2729-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Carolyn Wyborny <carolyn.wyborny@intel.com>

This patch moves the DMA Coalescing feature initialization code from
igb_reset to a new function and replaces it with a call to the new
function.

Signed-off-by: Carolyn Wyborny <carolyn.wyborny@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c |  124 ++++++++++++++++-------------
 1 files changed, 68 insertions(+), 56 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index f689aa1..b186353 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -204,6 +204,7 @@ static struct pci_error_handlers igb_err_handler = {
 	.resume = igb_io_resume,
 };
 
+static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
 
 static struct pci_driver igb_driver = {
 	.name     = igb_driver_name,
@@ -1728,63 +1729,8 @@ void igb_reset(struct igb_adapter *adapter)
 
 	if (hw->mac.ops.init_hw(hw))
 		dev_err(&pdev->dev, "Hardware Error\n");
-	if (hw->mac.type > e1000_82580) {
-		if (adapter->flags & IGB_FLAG_DMAC) {
-			u32 reg;
-
-			/*
-			 * DMA Coalescing high water mark needs to be higher
-			 * than * the * Rx threshold.  The Rx threshold is
-			 * currently * pba - 6, so we * should use a high water
-			 * mark of pba * - 4. */
-			hwm = (pba - 4) << 10;
-
-			reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
-			       & E1000_DMACR_DMACTHR_MASK);
-
-			/* transition to L0x or L1 if available..*/
-			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
-
-			/* watchdog timer= +-1000 usec in 32usec intervals */
-			reg |= (1000 >> 5);
-			wr32(E1000_DMACR, reg);
-
-			/* no lower threshold to disable coalescing(smart fifb)
-			 * -UTRESH=0*/
-			wr32(E1000_DMCRTRH, 0);
-
-			/* set hwm to PBA -  2 * max frame size */
-			wr32(E1000_FCRTC, hwm);
 
-			/*
-			 * This sets the time to wait before requesting tran-
-			 * sition to * low power state to number of usecs needed
-			 * to receive 1 512 * byte frame at gigabit line rate
-			 */
-			reg = rd32(E1000_DMCTLX);
-			reg |= IGB_DMCTLX_DCFLUSH_DIS;
-
-			/* Delay 255 usec before entering Lx state. */
-			reg |= 0xFF;
-			wr32(E1000_DMCTLX, reg);
-
-			/* free space in Tx packet buffer to wake from DMAC */
-			wr32(E1000_DMCTXTH,
-			     (IGB_MIN_TXPBSIZE -
-			     (IGB_TX_BUF_4096 + adapter->max_frame_size))
-			     >> 6);
-
-			/* make low power state decision controlled by DMAC */
-			reg = rd32(E1000_PCIEMISC);
-			reg |= E1000_PCIEMISC_LX_DECISION;
-			wr32(E1000_PCIEMISC, reg);
-		} /* end if IGB_FLAG_DMAC set */
-	}
-	if (hw->mac.type == e1000_82580) {
-		u32 reg = rd32(E1000_PCIEMISC);
-		wr32(E1000_PCIEMISC,
-		                reg & ~E1000_PCIEMISC_LX_DECISION);
-	}
+	igb_init_dmac(adapter, pba);
 	if (!netif_running(adapter->netdev))
 		igb_power_down_link(adapter);
 
@@ -7098,4 +7044,70 @@ static void igb_vmm_control(struct igb_adapter *adapter)
 	}
 }
 
+static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 dmac_thr;
+	u16 hwm;
+
+	if (hw->mac.type > e1000_82580) {
+		if (adapter->flags & IGB_FLAG_DMAC) {
+			u32 reg;
+
+			/* force threshold to 0. */
+			wr32(E1000_DMCTXTH, 0);
+
+			/*
+			 * DMA Coalescing high water mark needs to be higher
+			 * than the RX threshold. set hwm to PBA -  2 * max
+			 * frame size
+			 */
+			hwm = pba - (2 * adapter->max_frame_size);
+			reg = rd32(E1000_DMACR);
+			reg &= ~E1000_DMACR_DMACTHR_MASK;
+			dmac_thr = pba - 4;
+
+			reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
+				& E1000_DMACR_DMACTHR_MASK);
+
+			/* transition to L0x or L1 if available..*/
+			reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
+
+			/* watchdog timer= +-1000 usec in 32usec intervals */
+			reg |= (1000 >> 5);
+			wr32(E1000_DMACR, reg);
+
+			/*
+			 * no lower threshold to disable
+			 * coalescing(smart fifb)-UTRESH=0
+			 */
+			wr32(E1000_DMCRTRH, 0);
+			wr32(E1000_FCRTC, hwm);
+
+			reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
+
+			wr32(E1000_DMCTLX, reg);
+
+			/*
+			 * free space in tx packet buffer to wake from
+			 * DMA coal
+			 */
+			wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
+			     (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
+
+			/*
+			 * make low power state decision controlled
+			 * by DMA coal
+			 */
+			reg = rd32(E1000_PCIEMISC);
+			reg &= ~E1000_PCIEMISC_LX_DECISION;
+			wr32(E1000_PCIEMISC, reg);
+		} /* endif adapter->dmac is not disabled */
+	} else if (hw->mac.type == e1000_82580) {
+		u32 reg = rd32(E1000_PCIEMISC);
+		wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
+		wr32(E1000_DMACR, 0);
+	}
+}
+
 /* igb_main.c */
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 3/5] igb: Fix for Alt MAC Address feature on 82580 and later devices
From: Jeff Kirsher @ 2011-10-21 10:31 UTC (permalink / raw)
  To: davem; +Cc: Carolyn Wyborny, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1319193121-2729-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Carolyn Wyborny <carolyn.wyborny@intel.com>

In 82580 and later devices, the alternate MAC address feature is
completely handled by the option ROM and software does not handle
it anymore.  This patch changes the check_alt_mac_addr function to
exit immediately if device is 82580 or later.

Signed-off-by: Carolyn Wyborny <carolyn.wyborny@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_mac.c |    7 +++++++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index 872119d..bad3e14 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -191,6 +191,13 @@ s32 igb_check_alt_mac_addr(struct e1000_hw *hw)
 	u16 offset, nvm_alt_mac_addr_offset, nvm_data;
 	u8 alt_mac_addr[ETH_ALEN];
 
+	/*
+	 * Alternate MAC address is handled by the option ROM for 82580
+	 * and newer. SW support not required.
+	 */
+	if (hw->mac.type >= e1000_82580)
+		goto out;
+
 	ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1,
 				 &nvm_alt_mac_addr_offset);
 	if (ret_val) {
-- 
1.7.6.4

^ permalink raw reply related

* [net-next 5/5] igb: VFTA Table Fix for i350 devices
From: Jeff Kirsher @ 2011-10-21 10:32 UTC (permalink / raw)
  To: davem; +Cc: Carolyn Wyborny, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1319193121-2729-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Carolyn Wyborny <carolyn.wyborny@intel.com>

Due to a hardware problem, writes to the VFTA register can
theoretically fail. Although the likelihood of this is very low.
This patch adds a shadow vfta in the adapter struct for reading
and adds new write functions for these devices to work around the problem.

Signed-off-by: Carolyn Wyborny <carolyn.wyborny@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_82575.c |    5 ++-
 drivers/net/ethernet/intel/igb/e1000_mac.c   |   56 ++++++++++++++++++++++++-
 drivers/net/ethernet/intel/igb/e1000_mac.h   |    1 +
 drivers/net/ethernet/intel/igb/igb.h         |    1 +
 drivers/net/ethernet/intel/igb/igb_main.c    |    6 +++
 5 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index 6580cea..7881fb9 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -1051,7 +1051,10 @@ static s32 igb_init_hw_82575(struct e1000_hw *hw)
 
 	/* Disabling VLAN filtering */
 	hw_dbg("Initializing the IEEE VLAN\n");
-	igb_clear_vfta(hw);
+	if (hw->mac.type == e1000_i350)
+		igb_clear_vfta_i350(hw);
+	else
+		igb_clear_vfta(hw);
 
 	/* Setup the receive address */
 	igb_init_rx_addrs(hw, rar_count);
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index bad3e14..73aac08 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -117,6 +117,50 @@ static void igb_write_vfta(struct e1000_hw *hw, u32 offset, u32 value)
 	wrfl();
 }
 
+/* Due to a hw errata, if the host tries to  configure the VFTA register
+ * while performing queries from the BMC or DMA, then the VFTA in some
+ * cases won't be written.
+ */
+
+/**
+ *  igb_clear_vfta_i350 - Clear VLAN filter table
+ *  @hw: pointer to the HW structure
+ *
+ *  Clears the register array which contains the VLAN filter table by
+ *  setting all the values to 0.
+ **/
+void igb_clear_vfta_i350(struct e1000_hw *hw)
+{
+	u32 offset;
+	int i;
+
+	for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) {
+		for (i = 0; i < 10; i++)
+			array_wr32(E1000_VFTA, offset, 0);
+
+		wrfl();
+	}
+}
+
+/**
+ *  igb_write_vfta_i350 - Write value to VLAN filter table
+ *  @hw: pointer to the HW structure
+ *  @offset: register offset in VLAN filter table
+ *  @value: register value written to VLAN filter table
+ *
+ *  Writes value at the given offset in the register array which stores
+ *  the VLAN filter table.
+ **/
+void igb_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value)
+{
+	int i;
+
+	for (i = 0; i < 10; i++)
+		array_wr32(E1000_VFTA, offset, value);
+
+	wrfl();
+}
+
 /**
  *  igb_init_rx_addrs - Initialize receive address's
  *  @hw: pointer to the HW structure
@@ -155,9 +199,12 @@ s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add)
 {
 	u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK;
 	u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK);
-	u32 vfta = array_rd32(E1000_VFTA, index);
+	u32 vfta;
+	struct igb_adapter *adapter = hw->back;
 	s32 ret_val = 0;
 
+	vfta = adapter->shadow_vfta[index];
+
 	/* bit was set/cleared before we started */
 	if ((!!(vfta & mask)) == add) {
 		ret_val = -E1000_ERR_CONFIG;
@@ -167,8 +214,11 @@ s32 igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add)
 		else
 			vfta &= ~mask;
 	}
-
-	igb_write_vfta(hw, index, vfta);
+	if (hw->mac.type == e1000_i350)
+		igb_write_vfta_i350(hw, index, vfta);
+	else
+		igb_write_vfta(hw, index, vfta);
+	adapter->shadow_vfta[index] = vfta;
 
 	return ret_val;
 }
diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.h b/drivers/net/ethernet/intel/igb/e1000_mac.h
index 4927f61..e45996b 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.h
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.h
@@ -60,6 +60,7 @@ s32  igb_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg,
 
 void igb_clear_hw_cntrs_base(struct e1000_hw *hw);
 void igb_clear_vfta(struct e1000_hw *hw);
+void igb_clear_vfta_i350(struct e1000_hw *hw);
 s32  igb_vfta_set(struct e1000_hw *hw, u32 vid, bool add);
 void igb_config_collision_dist(struct e1000_hw *hw);
 void igb_init_rx_addrs(struct e1000_hw *hw, u16 rar_count);
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 5594430..c69feeb 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -363,6 +363,7 @@ struct igb_adapter {
 	u32 rss_queues;
 	u32 wvbr;
 	int node;
+	u32 *shadow_vfta;
 };
 
 #define IGB_FLAG_HAS_MSI           (1 << 0)
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b186353..ced5444 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2206,6 +2206,7 @@ static void __devexit igb_remove(struct pci_dev *pdev)
 	pci_release_selected_regions(pdev,
 	                             pci_select_bars(pdev, IORESOURCE_MEM));
 
+	kfree(adapter->shadow_vfta);
 	free_netdev(netdev);
 
 	pci_disable_pcie_error_reporting(pdev);
@@ -2438,6 +2439,11 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter)
 	    ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
 		adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
 
+	/* Setup and initialize a copy of the hw vlan table array */
+	adapter->shadow_vfta = kzalloc(sizeof(u32) *
+				E1000_VLAN_FILTER_TBL_SIZE,
+				GFP_ATOMIC);
+
 	/* This call may decrease the number of queues */
 	if (igb_init_interrupt_scheme(adapter)) {
 		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
-- 
1.7.6.4

^ permalink raw reply related

* Re: Kernel panic from tg3 net driver
From: RongQing Li @ 2011-10-21 10:48 UTC (permalink / raw)
  To: Ari Savolainen
  Cc: Eric Dumazet, David Miller, richardcochran, netdev, linux-kernel
In-Reply-To: <CAEbykaWhh12pM2jRSzfGig=jeKhVWSdxfwuXHFvLTt14=BYTbA@mail.gmail.com>

Hi Ari:

Are you sure the patch is applied correctly and the log is same?
If the log is not same, could you paste it again.

Thanks

BR

2011/10/21 Ari Savolainen <ari.m.savolainen@gmail.com>:
> That's right. I tried the patch and it didn't help.
>
> Ari
>
> 2011/10/20 Eric Dumazet <eric.dumazet@gmail.com>:
>> Le jeudi 20 octobre 2011 à 16:11 -0400, David Miller a écrit :
>>> From: Eric Dumazet <eric.dumazet@gmail.com>
>>> Date: Thu, 20 Oct 2011 22:05:25 +0200
>>>
>>> > And I think this was fixed yesterday ?
>>> >
>>> > De:         roy.qing.li@gmail.com
>>> > À:  ari.m.savolainen@gmail.com, netdev@vger.kernel.org
>>> > Sujet:      [PATCH net-next] neigh: fix rcu splat in neigh_update()
>>> > Date:       Tue, 18 Oct 2011 16:32:42 +0800 (18/10/2011 10:32:42)
>>> >
>>>
>>> Good catch, it seems to be this bug.
>>
>> Oh well, sorry, it seems it was one bug hit during bisection, but maybe
>> its completely unrelated to the real problem.
>>
>>
>>
>>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* [PATCH v2 0/3]  net: time stamping fixes
From: Richard Cochran @ 2011-10-21 10:49 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Eric Dumazet, Johannes Berg
In-Reply-To: <56185ca8a7dc0223031ca0f0996302cac1b497eb.1318444117.git.richard.cochran@omicron.at>

[ Changes in v2: use atomic_inc_not_zero per Eric's suggestion. ]

The first patch fixes a bug in the time stamping code introduced in
v2.6.36 of the kernel.

The other two patches depend on the first patch and fix two bugs in a
PTP Hardware Clock driver. This driver was first introduced in Linux
version 3.0.

Richard Cochran (3):
  net: hold sock reference while processing tx timestamps
  dp83640: use proper function to free transmit time stamping packets
  dp83640: free packet queues on remove

 drivers/net/phy/dp83640.c |   11 +++++++++--
 include/linux/phy.h       |    2 +-
 include/linux/skbuff.h    |    7 ++++++-
 net/core/timestamping.c   |   12 ++++++++++--
 4 files changed, 26 insertions(+), 6 deletions(-)

-- 
1.7.2.5

^ permalink raw reply

* [PATCH v2 1/3] net: hold sock reference while processing tx timestamps
From: Richard Cochran @ 2011-10-21 10:49 UTC (permalink / raw)
  To: netdev; +Cc: David Miller, Eric Dumazet, Johannes Berg, stable
In-Reply-To: <cover.1319193734.git.richard.cochran@omicron.at>

The pair of functions,

 * skb_clone_tx_timestamp()
 * skb_complete_tx_timestamp()

were designed to allow timestamping in PHY devices. The first
function, called during the MAC driver's hard_xmit method, identifies
PTP protocol packets, clones them, and gives them to the PHY device
driver. The PHY driver may hold onto the packet and deliver it at a
later time using the second function, which adds the packet to the
socket's error queue.

As pointed out by Johannes, nothing prevents the socket from
disappearing while the cloned packet is sitting in the PHY driver
awaiting a timestamp. This patch fixes the issue by taking a reference
on the socket for each such packet. In addition, the comments
regarding the usage of these function are expanded to highlight the
rule that PHY drivers must use skb_complete_tx_timestamp() to release
the packet, in order to release the socket reference, too.

These functions first appeared in v2.6.36.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Cc: <stable@vger.kernel.org>
---
 include/linux/phy.h     |    2 +-
 include/linux/skbuff.h  |    7 ++++++-
 net/core/timestamping.c |   12 ++++++++++--
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 54fc413..79f337c 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -420,7 +420,7 @@ struct phy_driver {
 
 	/*
 	 * Requests a Tx timestamp for 'skb'. The phy driver promises
-	 * to deliver it to the socket's error queue as soon as a
+	 * to deliver it using skb_complete_tx_timestamp() as soon as a
 	 * timestamp becomes available. One of the PTP_CLASS_ values
 	 * is passed in 'type'.
 	 */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8bd383c..0f96646 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2020,8 +2020,13 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
 /**
  * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps
  *
+ * PHY drivers may accept clones of transmitted packets for
+ * timestamping via their phy_driver.txtstamp method. These drivers
+ * must call this function to return the skb back to the stack, with
+ * or without a timestamp.
+ *
  * @skb: clone of the the original outgoing packet
- * @hwtstamps: hardware time stamps
+ * @hwtstamps: hardware time stamps, may be NULL if not available
  *
  */
 void skb_complete_tx_timestamp(struct sk_buff *skb,
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 98a5264..82fb288 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -57,9 +57,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
 	case PTP_CLASS_V2_VLAN:
 		phydev = skb->dev->phydev;
 		if (likely(phydev->drv->txtstamp)) {
+			if (!atomic_inc_not_zero(&sk->sk_refcnt))
+				return;
 			clone = skb_clone(skb, GFP_ATOMIC);
-			if (!clone)
+			if (!clone) {
+				sock_put(sk);
 				return;
+			}
 			clone->sk = sk;
 			phydev->drv->txtstamp(phydev, clone, type);
 		}
@@ -77,8 +81,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
 	struct sock_exterr_skb *serr;
 	int err;
 
-	if (!hwtstamps)
+	if (!hwtstamps) {
+		sock_put(sk);
+		kfree_skb(skb);
 		return;
+	}
 
 	*skb_hwtstamps(skb) = *hwtstamps;
 	serr = SKB_EXT_ERR(skb);
@@ -87,6 +94,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
 	skb->sk = NULL;
 	err = sock_queue_err_skb(sk, skb);
+	sock_put(sk);
 	if (err)
 		kfree_skb(skb);
 }
-- 
1.7.2.5

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox