Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH 16/25] ipvs: wakeup master thread
From: pablo @ 2012-05-08 18:38 UTC (permalink / raw)
  To: netdev; +Cc: davem, openbsc
In-Reply-To: <1336502303-1722-1-git-send-email-pablo@netfilter.org>

From: Pablo Neira Ayuso <pablo@netfilter.org>

	High rate of sync messages in master can lead to
overflowing the socket buffer and dropping the messages.
Fixed sleep of 1 second without wakeup events is not suitable
for loaded masters,

	Use delayed_work to schedule sending for queued messages
and limit the delay to IPVS_SYNC_SEND_DELAY (20ms). This will
reduce the rate of wakeups but to avoid sending long bursts we
wakeup the master thread after IPVS_SYNC_WAKEUP_RATE (8) messages.

	Add hard limit for the queued messages before sending
by using "sync_qlen_max" sysctl var. It defaults to 1/32 of
the memory pages but actually represents number of messages.
It will protect us from allocating large parts of memory
when the sending rate is lower than the queuing rate.

	As suggested by Pablo, add new sysctl var
"sync_sock_size" to configure the SNDBUF (master) or
RCVBUF (slave) socket limit. Default value is 0 (preserve
system defaults).

	Change the master thread to detect and block on
SNDBUF overflow, so that we do not drop messages when
the socket limit is low but the sync_qlen_max limit is
not reached. On ENOBUFS or other errors just drop the
messages.

	Change master thread to enter TASK_INTERRUPTIBLE
state early, so that we do not miss wakeups due to messages or
kthread_should_stop event.

Thanks to Pablo Neira Ayuso for his valuable feedback!

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             |   29 ++++++++
 net/netfilter/ipvs/ip_vs_ctl.c  |   16 +++++
 net/netfilter/ipvs/ip_vs_sync.c |  149 ++++++++++++++++++++++++++++++---------
 3 files changed, 162 insertions(+), 32 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 93b81aa..30e43c8 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -869,6 +869,8 @@ struct netns_ipvs {
 #endif
 	int			sysctl_snat_reroute;
 	int			sysctl_sync_ver;
+	int			sysctl_sync_qlen_max;
+	int			sysctl_sync_sock_size;
 	int			sysctl_cache_bypass;
 	int			sysctl_expire_nodest_conn;
 	int			sysctl_expire_quiescent_template;
@@ -889,6 +891,9 @@ struct netns_ipvs {
 	struct timer_list	est_timer;	/* Estimation timer */
 	/* ip_vs_sync */
 	struct list_head	sync_queue;
+	int			sync_queue_len;
+	unsigned int		sync_queue_delay;
+	struct delayed_work	master_wakeup_work;
 	spinlock_t		sync_lock;
 	struct ip_vs_sync_buff  *sync_buff;
 	spinlock_t		sync_buff_lock;
@@ -911,6 +916,10 @@ struct netns_ipvs {
 #define DEFAULT_SYNC_THRESHOLD	3
 #define DEFAULT_SYNC_PERIOD	50
 #define DEFAULT_SYNC_VER	1
+#define IPVS_SYNC_WAKEUP_RATE	8
+#define IPVS_SYNC_QLEN_MAX	(IPVS_SYNC_WAKEUP_RATE * 4)
+#define IPVS_SYNC_SEND_DELAY	(HZ / 50)
+#define IPVS_SYNC_CHECK_PERIOD	HZ
 
 #ifdef CONFIG_SYSCTL
 
@@ -929,6 +938,16 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
 	return ipvs->sysctl_sync_ver;
 }
 
+static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_sync_qlen_max;
+}
+
+static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_sync_sock_size;
+}
+
 #else
 
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -946,6 +965,16 @@ static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
 	return DEFAULT_SYNC_VER;
 }
 
+static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+{
+	return IPVS_SYNC_QLEN_MAX;
+}
+
+static inline int sysctl_sync_sock_size(struct netns_ipvs *ipvs)
+{
+	return 0;
+}
+
 #endif
 
 /*
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 37b9199..bd3827e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1718,6 +1718,18 @@ static struct ctl_table vs_vars[] = {
 		.proc_handler	= &proc_do_sync_mode,
 	},
 	{
+		.procname	= "sync_qlen_max",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sync_sock_size",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
 		.procname	= "cache_bypass",
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
@@ -3655,6 +3667,10 @@ int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	tbl[idx++].data = &ipvs->sysctl_snat_reroute;
 	ipvs->sysctl_sync_ver = 1;
 	tbl[idx++].data = &ipvs->sysctl_sync_ver;
+	ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
+	tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
+	ipvs->sysctl_sync_sock_size = 0;
+	tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
 	tbl[idx++].data = &ipvs->sysctl_cache_bypass;
 	tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
 	tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d2df694..b3235b2 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -307,11 +307,15 @@ static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
 	spin_lock_bh(&ipvs->sync_lock);
 	if (list_empty(&ipvs->sync_queue)) {
 		sb = NULL;
+		__set_current_state(TASK_INTERRUPTIBLE);
 	} else {
 		sb = list_entry(ipvs->sync_queue.next,
 				struct ip_vs_sync_buff,
 				list);
 		list_del(&sb->list);
+		ipvs->sync_queue_len--;
+		if (!ipvs->sync_queue_len)
+			ipvs->sync_queue_delay = 0;
 	}
 	spin_unlock_bh(&ipvs->sync_lock);
 
@@ -358,9 +362,16 @@ static inline void sb_queue_tail(struct netns_ipvs *ipvs)
 	struct ip_vs_sync_buff *sb = ipvs->sync_buff;
 
 	spin_lock(&ipvs->sync_lock);
-	if (ipvs->sync_state & IP_VS_STATE_MASTER)
+	if (ipvs->sync_state & IP_VS_STATE_MASTER &&
+	    ipvs->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {
+		if (!ipvs->sync_queue_len)
+			schedule_delayed_work(&ipvs->master_wakeup_work,
+					      max(IPVS_SYNC_SEND_DELAY, 1));
+		ipvs->sync_queue_len++;
 		list_add_tail(&sb->list, &ipvs->sync_queue);
-	else
+		if ((++ipvs->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)
+			wake_up_process(ipvs->master_thread);
+	} else
 		ip_vs_sync_buff_release(sb);
 	spin_unlock(&ipvs->sync_lock);
 }
@@ -379,6 +390,7 @@ get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
 	    time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) {
 		sb = ipvs->sync_buff;
 		ipvs->sync_buff = NULL;
+		__set_current_state(TASK_RUNNING);
 	} else
 		sb = NULL;
 	spin_unlock_bh(&ipvs->sync_buff_lock);
@@ -392,26 +404,23 @@ get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
 void ip_vs_sync_switch_mode(struct net *net, int mode)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct ip_vs_sync_buff *sb;
 
+	spin_lock_bh(&ipvs->sync_buff_lock);
 	if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
-		return;
-	if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
-		return;
+		goto unlock;
+	sb = ipvs->sync_buff;
+	if (mode == sysctl_sync_ver(ipvs) || !sb)
+		goto unlock;
 
-	spin_lock_bh(&ipvs->sync_buff_lock);
 	/* Buffer empty ? then let buf_create do the job  */
-	if (ipvs->sync_buff->mesg->size <=  sizeof(struct ip_vs_sync_mesg)) {
-		kfree(ipvs->sync_buff);
+	if (sb->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
+		ip_vs_sync_buff_release(sb);
 		ipvs->sync_buff = NULL;
-	} else {
-		spin_lock_bh(&ipvs->sync_lock);
-		if (ipvs->sync_state & IP_VS_STATE_MASTER)
-			list_add_tail(&ipvs->sync_buff->list,
-				      &ipvs->sync_queue);
-		else
-			ip_vs_sync_buff_release(ipvs->sync_buff);
-		spin_unlock_bh(&ipvs->sync_lock);
-	}
+	} else
+		sb_queue_tail(ipvs);
+
+unlock:
 	spin_unlock_bh(&ipvs->sync_buff_lock);
 }
 
@@ -1130,6 +1139,28 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
 
 
 /*
+ *      Setup sndbuf (mode=1) or rcvbuf (mode=0)
+ */
+static void set_sock_size(struct sock *sk, int mode, int val)
+{
+	/* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */
+	/* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */
+	lock_sock(sk);
+	if (mode) {
+		val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
+			      sysctl_wmem_max);
+		sk->sk_sndbuf = val * 2;
+		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
+	} else {
+		val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
+			      sysctl_rmem_max);
+		sk->sk_rcvbuf = val * 2;
+		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+	}
+	release_sock(sk);
+}
+
+/*
  *      Setup loopback of outgoing multicasts on a sending socket
  */
 static void set_mcast_loop(struct sock *sk, u_char loop)
@@ -1305,6 +1336,9 @@ static struct socket *make_send_sock(struct net *net)
 
 	set_mcast_loop(sock->sk, 0);
 	set_mcast_ttl(sock->sk, 1);
+	result = sysctl_sync_sock_size(ipvs);
+	if (result > 0)
+		set_sock_size(sock->sk, 1, result);
 
 	result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
 	if (result < 0) {
@@ -1350,6 +1384,9 @@ static struct socket *make_receive_sock(struct net *net)
 	sk_change_net(sock->sk, net);
 	/* it is equivalent to the REUSEADDR option in user-space */
 	sock->sk->sk_reuse = SK_CAN_REUSE;
+	result = sysctl_sync_sock_size(ipvs);
+	if (result > 0)
+		set_sock_size(sock->sk, 0, result);
 
 	result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
 			sizeof(struct sockaddr));
@@ -1392,18 +1429,22 @@ ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length)
 	return len;
 }
 
-static void
+static int
 ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
 {
 	int msize;
+	int ret;
 
 	msize = msg->size;
 
 	/* Put size in network byte order */
 	msg->size = htons(msg->size);
 
-	if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
-		pr_err("ip_vs_send_async error\n");
+	ret = ip_vs_send_async(sock, (char *)msg, msize);
+	if (ret >= 0 || ret == -EAGAIN)
+		return ret;
+	pr_err("ip_vs_send_async error %d\n", ret);
+	return 0;
 }
 
 static int
@@ -1428,36 +1469,75 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
 	return len;
 }
 
+/* Wakeup the master thread for sending */
+static void master_wakeup_work_handler(struct work_struct *work)
+{
+	struct netns_ipvs *ipvs = container_of(work, struct netns_ipvs,
+					       master_wakeup_work.work);
+
+	spin_lock_bh(&ipvs->sync_lock);
+	if (ipvs->sync_queue_len &&
+	    ipvs->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {
+		ipvs->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;
+		wake_up_process(ipvs->master_thread);
+	}
+	spin_unlock_bh(&ipvs->sync_lock);
+}
+
+/* Get next buffer to send */
+static inline struct ip_vs_sync_buff *
+next_sync_buff(struct netns_ipvs *ipvs)
+{
+	struct ip_vs_sync_buff *sb;
+
+	sb = sb_dequeue(ipvs);
+	if (sb)
+		return sb;
+	/* Do not delay entries in buffer for more than 2 seconds */
+	return get_curr_sync_buff(ipvs, 2 * HZ);
+}
 
 static int sync_thread_master(void *data)
 {
 	struct ip_vs_sync_thread_data *tinfo = data;
 	struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+	struct sock *sk = tinfo->sock->sk;
 	struct ip_vs_sync_buff *sb;
 
 	pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
 		"syncid = %d\n",
 		ipvs->master_mcast_ifn, ipvs->master_syncid);
 
-	while (!kthread_should_stop()) {
-		while ((sb = sb_dequeue(ipvs))) {
-			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
-			ip_vs_sync_buff_release(sb);
+	for (;;) {
+		sb = next_sync_buff(ipvs);
+		if (unlikely(kthread_should_stop()))
+			break;
+		if (!sb) {
+			schedule_timeout(IPVS_SYNC_CHECK_PERIOD);
+			continue;
 		}
-
-		/* check if entries stay in ipvs->sync_buff for 2 seconds */
-		sb = get_curr_sync_buff(ipvs, 2 * HZ);
-		if (sb) {
-			ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
-			ip_vs_sync_buff_release(sb);
+		while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {
+			int ret = 0;
+
+			__wait_event_interruptible(*sk_sleep(sk),
+						   sock_writeable(sk) ||
+						   kthread_should_stop(),
+						   ret);
+			if (unlikely(kthread_should_stop()))
+				goto done;
 		}
-
-		schedule_timeout_interruptible(HZ);
+		ip_vs_sync_buff_release(sb);
 	}
 
+done:
+	__set_current_state(TASK_RUNNING);
+	if (sb)
+		ip_vs_sync_buff_release(sb);
+
 	/* clean up the sync_buff queue */
 	while ((sb = sb_dequeue(ipvs)))
 		ip_vs_sync_buff_release(sb);
+	__set_current_state(TASK_RUNNING);
 
 	/* clean up the current sync_buff */
 	sb = get_curr_sync_buff(ipvs, 0);
@@ -1538,6 +1618,10 @@ int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
 		realtask = &ipvs->master_thread;
 		name = "ipvs_master:%d";
 		threadfn = sync_thread_master;
+		ipvs->sync_queue_len = 0;
+		ipvs->sync_queue_delay = 0;
+		INIT_DELAYED_WORK(&ipvs->master_wakeup_work,
+				  master_wakeup_work_handler);
 		sock = make_send_sock(net);
 	} else if (state == IP_VS_STATE_BACKUP) {
 		if (ipvs->backup_thread)
@@ -1623,6 +1707,7 @@ int stop_sync_thread(struct net *net, int state)
 		spin_lock_bh(&ipvs->sync_lock);
 		ipvs->sync_state &= ~IP_VS_STATE_MASTER;
 		spin_unlock_bh(&ipvs->sync_lock);
+		cancel_delayed_work_sync(&ipvs->master_wakeup_work);
 		retc = kthread_stop(ipvs->master_thread);
 		ipvs->master_thread = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 13/25] ipvs: remove check for IP_VS_CONN_F_SYNC from ip_vs_bind_dest
From: pablo @ 2012-05-08 18:38 UTC (permalink / raw)
  To: netdev; +Cc: davem, openbsc
In-Reply-To: <1336502303-1722-1-git-send-email-pablo@netfilter.org>

From: Julian Anastasov <ja@ssi.bg>

	As the IP_VS_CONN_F_INACTIVE bit is properly set
in cp->flags for all kind of connections we do not need to
add special checks for synced connections when updating
the activeconns/inactconns counters for first time. Now
logic will look just like in ip_vs_unbind_dest.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_conn.c |   10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index f562e63..1c1bb30 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -585,11 +585,11 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 
 	/* Update the connection counters */
 	if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {
-		/* It is a normal connection, so increase the inactive
-		   connection counter because it is in TCP SYNRECV
-		   state (inactive) or other protocol inacive state */
-		if ((cp->flags & IP_VS_CONN_F_SYNC) &&
-		    (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
+		/* It is a normal connection, so modify the counters
+		 * according to the flags, later the protocol can
+		 * update them on state change
+		 */
+		if (!(cp->flags & IP_VS_CONN_F_INACTIVE))
 			atomic_inc(&dest->activeconns);
 		else
 			atomic_inc(&dest->inactconns);
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 12/25] ipvs: ignore IP_VS_CONN_F_NOOUTPUT in backup server
From: pablo @ 2012-05-08 18:38 UTC (permalink / raw)
  To: netdev; +Cc: davem, openbsc
In-Reply-To: <1336502303-1722-1-git-send-email-pablo@netfilter.org>

From: Julian Anastasov <ja@ssi.bg>

	As IP_VS_CONN_F_NOOUTPUT is derived from the
forwarding method we should get it from conn_flags just
like we do it for IP_VS_CONN_F_FWD_MASK bits when binding
to real server.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_conn.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 4a09b78..f562e63 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -567,7 +567,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
 		if (!(cp->flags & IP_VS_CONN_F_TEMPLATE))
 			conn_flags &= ~IP_VS_CONN_F_INACTIVE;
 		/* connections inherit forwarding method from dest */
-		cp->flags &= ~IP_VS_CONN_F_FWD_MASK;
+		cp->flags &= ~(IP_VS_CONN_F_FWD_MASK | IP_VS_CONN_F_NOOUTPUT);
 	}
 	cp->flags |= conn_flags;
 	cp->dest = dest;
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 11/25] ipvs: use GFP_KERNEL allocation where possible
From: pablo @ 2012-05-08 18:38 UTC (permalink / raw)
  To: netdev; +Cc: davem, openbsc
In-Reply-To: <1336502303-1722-1-git-send-email-pablo@netfilter.org>

From: Sasha Levin <levinsasha928@gmail.com>

Use GFP_KERNEL instead of GFP_ATOMIC when registering an ipvs protocol.

This is safe since it will always run from a process context.

Signed-off-by: Sasha Levin <levinsasha928@gmail.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_proto.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index ca16476..e91c898 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -68,7 +68,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
 	struct ip_vs_proto_data *pd =
-			kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);
+			kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);
 
 	if (!pd)
 		return -ENOMEM;
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 09/25] ipvs: LBLCR scheduler does not need GFP_ATOMIC allocation on init
From: pablo @ 2012-05-08 18:38 UTC (permalink / raw)
  To: netdev; +Cc: davem, openbsc
In-Reply-To: <1336502303-1722-1-git-send-email-pablo@netfilter.org>

From: Julian Anastasov <ja@ssi.bg>

	Schedulers are initialized and bound to services only
on commands.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_lblcr.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 9dcd39a..570e31e 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -511,7 +511,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	/*
 	 *    Allocate the ip_vs_lblcr_table for this service
 	 */
-	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+	tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
 	if (tbl == NULL)
 		return -ENOMEM;
 
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 06/25] ipvs: LBLC scheduler does not need GFP_ATOMIC allocation on init
From: pablo @ 2012-05-08 18:38 UTC (permalink / raw)
  To: netdev; +Cc: davem, openbsc
In-Reply-To: <1336502303-1722-1-git-send-email-pablo@netfilter.org>

From: Julian Anastasov <ja@ssi.bg>

	Schedulers are initialized and bound to services only
on commands.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 net/netfilter/ipvs/ip_vs_lblc.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 9b0de9a..df646cc 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -342,7 +342,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 	/*
 	 *    Allocate the ip_vs_lblc_table for this service
 	 */
-	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
+	tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);
 	if (tbl == NULL)
 		return -ENOMEM;
 
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 00/25] [v3] netfilter updates for net-next (upcoming 3.5)
From: pablo @ 2012-05-08 18:37 UTC (permalink / raw)
  To: netdev; +Cc: davem, openbsc

From: Pablo Neira Ayuso <pablo@netfilter.org>

Hi David,

Third version, now based on your fresh current net-next tree.

The following patchset contains the Netfilter updates for net-next.
Most notably:

* The new /proc/sys/net/netfilter/nf_conntrack_helper entry that
  allows to disable the automatic conntrack helper assignment from
  Eric Leblond. This patch also spots a warning to inform the user
  that this behaviour will be removed at some point. The automatic
  conntrack helper assignment may allows attackers to open hole in
  the firewall to access the protected network segments (with
  incorrect configurations). More information on this issue at:

  https://home.regit.org/netfilter-en/secure-use-of-helpers/

  In the near future, all conntrack helpers will be explicitly
  attached via the CT target, as we longing discussed during
  the last netfilter workshop.

* One new sysctl to translate the input device to vlan device name
  from Florian Westphal. He required this to get the REDIRECT target
  working with another sysctl vlan-on-top-of-bridge.

* Major improvements in the ip_vs_sync code from Julian Anastasov.
  They aim to improve scalability and to address possible message
  loss due to socket overrun under high rate of synchronization
  messages.

* Several minor memory allocation flags fixes from IPVS people
  contributors.

* Eric Leblond's patch spotted one problem that becomes noticeable
  if a) automatic helper assignment is disabled, and b) if NAT is
  in use, and c) the CT target is used to attach a non-standard
  conntrack helper port. This fix comes from myself.

* One small update to allow updating the expectation timeout from
  Kelvie Wong.

* Finally, remove ip[6]_queue support since they have been marked
  as obsolete since long time ago. Now, we have nfnetlink_queue
  which is way more flexible from myself.

You can pull these changes from:

git://1984.lsi.us.es/net-next master

If time allows, I'd like to send a second batch. There a several patches
that are very close to get into shape still on netfilter-devel.

Thanks!

Eric Dumazet (1):
  netfilter: nf_conntrack: use this_cpu_inc()

Eric Leblond (1):
  netfilter: nf_ct_helper: allow to disable automatic helper assignment

Florian Westphal (1):
  netfilter: bridge: optionally set indev to vlan

H Hartley Sweeten (2):
  ipvs: ip_vs_ftp: local functions should not be exposed globally
  ipvs: ip_vs_proto: local functions should not be exposed globally

Hans Schillstrom (1):
  net: export sysctl_[r|w]mem_max symbols needed by ip_vs_sync

Julian Anastasov (14):
  ipvs: timeout tables do not need GFP_ATOMIC allocation
  ipvs: LBLC scheduler does not need GFP_ATOMIC allocation on init
  ipvs: DH scheduler does not need GFP_ATOMIC allocation
  ipvs: WRR scheduler does not need GFP_ATOMIC allocation
  ipvs: LBLCR scheduler does not need GFP_ATOMIC allocation on init
  ipvs: SH scheduler does not need GFP_ATOMIC allocation
  ipvs: ignore IP_VS_CONN_F_NOOUTPUT in backup server
  ipvs: remove check for IP_VS_CONN_F_SYNC from ip_vs_bind_dest
  ipvs: fix ip_vs_try_bind_dest to rebind app and transmitter
  ipvs: always update some of the flags bits in backup
  ipvs: wakeup master thread
  ipvs: reduce sync rate with time thresholds
  ipvs: add support for sync threads
  ipvs: optimize the use of flags in ip_vs_bind_dest

Kelvie Wong (1):
  netfilter: nf_ct_expect: partially implement ctnetlink_change_expect

Pablo Neira Ayuso (2):
  netfilter: nf_conntrack: fix explicit helper attachment and NAT
  netfilter: remove ip_queue support

Sasha Levin (1):
  ipvs: use GFP_KERNEL allocation where possible

Tony Zelenoff (1):
  netfilter: nf_ct_ecache: refactor notifier registration

 Documentation/ABI/removed/ip_queue            |    9 +
 Documentation/networking/ip-sysctl.txt        |   13 +-
 include/linux/ip_vs.h                         |    5 +
 include/linux/netfilter/nf_conntrack_common.h |    4 +
 include/linux/netfilter_ipv4/Kbuild           |    1 -
 include/linux/netfilter_ipv4/ip_queue.h       |   72 ---
 include/linux/netlink.h                       |    2 +-
 include/net/ip_vs.h                           |   87 +++-
 include/net/netfilter/nf_conntrack.h          |   10 +-
 include/net/netfilter/nf_conntrack_helper.h   |    4 +-
 include/net/netns/conntrack.h                 |    3 +
 net/bridge/br_netfilter.c                     |   26 +-
 net/core/sock.c                               |    2 +
 net/ipv4/netfilter/Makefile                   |    3 -
 net/ipv4/netfilter/ip_queue.c                 |  639 ------------------------
 net/ipv6/netfilter/Kconfig                    |   22 -
 net/ipv6/netfilter/Makefile                   |    1 -
 net/ipv6/netfilter/ip6_queue.c                |  641 ------------------------
 net/netfilter/ipvs/ip_vs_conn.c               |   69 ++-
 net/netfilter/ipvs/ip_vs_core.c               |   30 +-
 net/netfilter/ipvs/ip_vs_ctl.c                |   70 ++-
 net/netfilter/ipvs/ip_vs_dh.c                 |    2 +-
 net/netfilter/ipvs/ip_vs_ftp.c                |    2 +-
 net/netfilter/ipvs/ip_vs_lblc.c               |    2 +-
 net/netfilter/ipvs/ip_vs_lblcr.c              |    2 +-
 net/netfilter/ipvs/ip_vs_proto.c              |    6 +-
 net/netfilter/ipvs/ip_vs_sh.c                 |    2 +-
 net/netfilter/ipvs/ip_vs_sync.c               |  662 +++++++++++++++++--------
 net/netfilter/ipvs/ip_vs_wrr.c                |    2 +-
 net/netfilter/nf_conntrack_core.c             |   15 +-
 net/netfilter/nf_conntrack_ecache.c           |   10 +-
 net/netfilter/nf_conntrack_helper.c           |  120 ++++-
 net/netfilter/nf_conntrack_netlink.c          |   10 +-
 security/selinux/nlmsgtab.c                   |   13 -
 34 files changed, 853 insertions(+), 1708 deletions(-)
 create mode 100644 Documentation/ABI/removed/ip_queue
 delete mode 100644 include/linux/netfilter_ipv4/ip_queue.h
 delete mode 100644 net/ipv4/netfilter/ip_queue.c
 delete mode 100644 net/ipv6/netfilter/ip6_queue.c

-- 
1.7.9.5

^ permalink raw reply

* Re: [PATCH] pch_gbe: Adding read memory barriers
From: Erwan Velu @ 2012-05-08 18:35 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: tshimizu818
In-Reply-To: <4FA822C4.60809@gmail.com>

Le 07/05/2012 21:30, Erwan Velu a écrit :
> From bb1e271db0fa1a29df19bede69faf8004389132d Mon Sep 17 00:00:00 2001
> From: Erwan Velu <erwan.velu@zodiacaerospace.com>
> Date: Mon, 7 May 2012 19:15:29 +0000
> Subject: [PATCH 1/1] pch_gbe: Adding read memory barriers


Hey Fellows,

Does my patch can be considered as acceptable or shall I rework 
something on it ?

Cheers,
Erwan

^ permalink raw reply

* Re: batostr() function
From: Luis R. Rodriguez @ 2012-05-08 18:26 UTC (permalink / raw)
  To: Johannes Berg
  Cc: Joe Perches, Andrei Emeltchenko, David Herrmann,
	linux-bluetooth-u79uwXL29TY76Z2rM5mHXA, netdev
In-Reply-To: <1336499313.4320.4.camel-8upI4CBIZJIJvtFkdXX2HixXY32XiHfO@public.gmane.org>

On Tue, May 8, 2012 at 10:48 AM, Johannes Berg
<johannes-cdvu00un1VgdHxzADdlk8Q@public.gmane.org> wrote:
> On Tue, 2012-05-08 at 10:18 -0700, Joe Perches wrote:
>> On Tue, 2012-05-08 at 17:30 +0300, Andrei Emeltchenko wrote:
>> > On Tue, May 08, 2012 at 04:25:08PM +0200, Johannes Berg wrote:
>> > > On Tue, 2012-05-08 at 15:30 +0200, David Herrmann wrote:
>> > > > Hi Johannes
>> > > >
>> > > > On Mon, May 7, 2012 at 1:49 PM, Johannes Berg <johannes-cdvu00un1VgdHxzADdlk8Q@public.gmane.org> wrote:
>> > > > > Really? 2 static buffers that are used alternately based on a static
>> > > > > variable? How can that possibly be thread-safe? That may work in very
>> > > > > restricted scenarios, but ...
>> > > >
>> > > > Looking at "git blame" it seems the whole function is still from
>> > > > linux-2.4. Looks like no-one ever noticed. I've sent a patchset fixing
>> > > > it, thanks.
>> > >
>> > > I was thinking you could use %pM, but it seems BT addresses are stored
>> > > the wrong way around for some reason ...
>> >
>> > This looks like better idea then allocating buffers, we can use swap to
>> > take care about "wrong order".
>>
>> https://lkml.org/lkml/2010/12/3/358
>
> Pretty much what I had in mind, thanks. Luis, you'll notice that this
> will be a pain to backport in compat. :-)

Mumble grumble. Oh well! :) I'm starting to enjoy the curve balls.

  Luis

^ permalink raw reply

* Re: [PATCH] net: update the usage of CHECKSUM_UNNECESSARY
From: Ben Hutchings @ 2012-05-08 18:20 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	jeffrey.t.kirsher-ral2JQCrhuEAvxtiuMwx3w,
	devel-s9riP+hp16TNLxjTenLetw
In-Reply-To: <20120508174831.GA27406-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

On Tue, 2012-05-08 at 20:48 +0300, Michael S. Tsirkin wrote:
> On Mon, Mar 19, 2012 at 02:12:41PM -0700, Yi Zou wrote:
> > As suggested by Ben, this adds the clarification on the usage of
> > CHECKSUM_UNNECESSARY on the outgoing patch. Also add the usage
> > description of NETIF_F_FCOE_CRC and CHECKSUM_UNNECESSARY
> > for the kernel FCoE protocol driver.
> > 
> > This is a follow-up to the following:
> > http://patchwork.ozlabs.org/patch/147315/
> > 
> > Signed-off-by: Yi Zou <yi.zou-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> > Cc: Ben Hutchings <bhutchings-s/n/eUQHGBpZroRs9YW3xA@public.gmane.org>
> > Cc: Jeff Kirsher <jeffrey.t.kirsher-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> > Cc: www.Open-FCoE.org <devel-s9riP+hp16TNLxjTenLetw@public.gmane.org>
> > ---
> > 
> >  include/linux/skbuff.h |    7 +++++++
> >  1 files changed, 7 insertions(+), 0 deletions(-)
> > 
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 8dc8257..a2b9953 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -94,6 +94,13 @@
> >   *			  about CHECKSUM_UNNECESSARY. 8)
> >   *	NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead.
> >   *
> > + *	UNNECESSARY: device will do per protocol specific csum. Protocol drivers
> > + *	that do not want net to perform the checksum calculation should use
> > + *	this flag in their outgoing skbs.
> > + *	NETIF_F_FCOE_CRC  this indicates the device can do FCoE FC CRC
> > + *			  offload. Correspondingly, the FCoE protocol driver
> > + *			  stack should use CHECKSUM_UNNECESSARY.
> > + *
> >   *	Any questions? No questions, good. 		--ANK
> >   */
> >  
> 
> So just to make sure I understand, you never get
> UNNECESSARY packets on tx unless you declared NETIF_F_FCOE_CRC?
> 
> Maybe the comment says this somehow but could not figure it out.

That's what should happen now.  In future CHECKSUM_UNNECESSARY could be
used on output by other protocols which don't use TCP/IP-style
checksums, but always dependent on the output device supporting the
relevant offload feature.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [PATCH] net: update the usage of CHECKSUM_UNNECESSARY
From: Michael S. Tsirkin @ 2012-05-08 17:48 UTC (permalink / raw)
  To: Yi Zou; +Cc: netdev, devel, bhutchings, jeffrey.t.kirsher
In-Reply-To: <20120319211241.11291.53271.stgit@localhost6.localdomain6>

On Mon, Mar 19, 2012 at 02:12:41PM -0700, Yi Zou wrote:
> As suggested by Ben, this adds the clarification on the usage of
> CHECKSUM_UNNECESSARY on the outgoing patch. Also add the usage
> description of NETIF_F_FCOE_CRC and CHECKSUM_UNNECESSARY
> for the kernel FCoE protocol driver.
> 
> This is a follow-up to the following:
> http://patchwork.ozlabs.org/patch/147315/
> 
> Signed-off-by: Yi Zou <yi.zou@intel.com>
> Cc: Ben Hutchings <bhutchings@solarflare.com>
> Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> Cc: www.Open-FCoE.org <devel@open-fcoe.org>
> ---
> 
>  include/linux/skbuff.h |    7 +++++++
>  1 files changed, 7 insertions(+), 0 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 8dc8257..a2b9953 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -94,6 +94,13 @@
>   *			  about CHECKSUM_UNNECESSARY. 8)
>   *	NETIF_F_IPV6_CSUM about as dumb as the last one but does IPv6 instead.
>   *
> + *	UNNECESSARY: device will do per protocol specific csum. Protocol drivers
> + *	that do not want net to perform the checksum calculation should use
> + *	this flag in their outgoing skbs.
> + *	NETIF_F_FCOE_CRC  this indicates the device can do FCoE FC CRC
> + *			  offload. Correspondingly, the FCoE protocol driver
> + *			  stack should use CHECKSUM_UNNECESSARY.
> + *
>   *	Any questions? No questions, good. 		--ANK
>   */
>  

So just to make sure I understand, you never get
UNNECESSARY packets on tx unless you declared NETIF_F_FCOE_CRC?

Maybe the comment says this somehow but could not figure it out.



> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: batostr() function
From: Johannes Berg @ 2012-05-08 17:48 UTC (permalink / raw)
  To: Joe Perches
  Cc: Andrei Emeltchenko, David Herrmann, linux-bluetooth, netdev,
	Luis R. Rodriguez
In-Reply-To: <1336497532.29640.24.camel@joe2Laptop>

On Tue, 2012-05-08 at 10:18 -0700, Joe Perches wrote:
> On Tue, 2012-05-08 at 17:30 +0300, Andrei Emeltchenko wrote:
> > On Tue, May 08, 2012 at 04:25:08PM +0200, Johannes Berg wrote:
> > > On Tue, 2012-05-08 at 15:30 +0200, David Herrmann wrote:
> > > > Hi Johannes
> > > > 
> > > > On Mon, May 7, 2012 at 1:49 PM, Johannes Berg <johannes@sipsolutions.net> wrote:
> > > > > Really? 2 static buffers that are used alternately based on a static
> > > > > variable? How can that possibly be thread-safe? That may work in very
> > > > > restricted scenarios, but ...
> > > > 
> > > > Looking at "git blame" it seems the whole function is still from
> > > > linux-2.4. Looks like no-one ever noticed. I've sent a patchset fixing
> > > > it, thanks.
> > > 
> > > I was thinking you could use %pM, but it seems BT addresses are stored
> > > the wrong way around for some reason ...
> > 
> > This looks like better idea then allocating buffers, we can use swap to
> > take care about "wrong order".
> 
> https://lkml.org/lkml/2010/12/3/358

Pretty much what I had in mind, thanks. Luis, you'll notice that this
will be a pain to backport in compat. :-)

johannes

^ permalink raw reply

* Re: batostr() function
From: Joe Perches @ 2012-05-08 17:18 UTC (permalink / raw)
  To: Andrei Emeltchenko
  Cc: Johannes Berg, David Herrmann,
	linux-bluetooth-u79uwXL29TY76Z2rM5mHXA, netdev
In-Reply-To: <20120508143011.GD29352@aemeltch-MOBL1>

On Tue, 2012-05-08 at 17:30 +0300, Andrei Emeltchenko wrote:
> On Tue, May 08, 2012 at 04:25:08PM +0200, Johannes Berg wrote:
> > On Tue, 2012-05-08 at 15:30 +0200, David Herrmann wrote:
> > > Hi Johannes
> > > 
> > > On Mon, May 7, 2012 at 1:49 PM, Johannes Berg <johannes-cdvu00un1VgdHxzADdlk8Q@public.gmane.org> wrote:
> > > > Really? 2 static buffers that are used alternately based on a static
> > > > variable? How can that possibly be thread-safe? That may work in very
> > > > restricted scenarios, but ...
> > > 
> > > Looking at "git blame" it seems the whole function is still from
> > > linux-2.4. Looks like no-one ever noticed. I've sent a patchset fixing
> > > it, thanks.
> > 
> > I was thinking you could use %pM, but it seems BT addresses are stored
> > the wrong way around for some reason ...
> 
> This looks like better idea then allocating buffers, we can use swap to
> take care about "wrong order".

https://lkml.org/lkml/2010/12/3/358

^ permalink raw reply

* Re: [PATCH 00/25] [v2] netfilter updates for net-next (upcoming 3.5)
From: David Miller @ 2012-05-08 17:12 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev
In-Reply-To: <20120508171049.GA16205@1984>

From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 8 May 2012 19:10:49 +0200

> I can rebase all my patches on top of fresh tree, fix all conflicts
> myself and send you another patchset that will apply to your current
> tree.
> 
> Please, let me know how to proceed.

Yes, please build a tree that compiles properly :-)

The error is pretty trivial to reproduce, just "allmodconfig" like I
do.

^ permalink raw reply

* Re: [PATCH 00/25] [v2] netfilter updates for net-next (upcoming 3.5)
From: Pablo Neira Ayuso @ 2012-05-08 17:10 UTC (permalink / raw)
  To: David Miller; +Cc: netfilter-devel, netdev
In-Reply-To: <20120508.124926.2241646617733136639.davem@davemloft.net>

On Tue, May 08, 2012 at 12:49:26PM -0400, David Miller wrote:
> From: pablo@netfilter.org
> Date: Tue,  8 May 2012 09:49:29 +0200
> 
> > Second version including requested updates.
> 
> There were lots of conflicts, due to my merge of net into net-next.
> 
> Those were easy enough, but the result doesn't build.
> 
> net/netfilter/nf_conntrack_helper.c: In function ‘nf_conntrack_helper_init_sysctl’:
> net/netfilter/nf_conntrack_helper.c:65:2: error: implicit declaration of function ‘register_net_sysctl_table’ [-Werror=implicit-function-declaration]
> net/netfilter/nf_conntrack_helper.c:66:4: error: ‘nf_net_netfilter_sysctl_path’ undeclared (first use in this function)
> net/netfilter/nf_conntrack_helper.c:66:4: note: each undeclared identifier is reported only once for each function it appears in

Strange, this compiles here. Probably you have to add the following
include to net/netfilter/nf_conntrack_helper.c:

#include <net/net_namespace.h>

I can rebase all my patches on top of fresh tree, fix all conflicts
myself and send you another patchset that will apply to your current
tree.

Please, let me know how to proceed.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] etherdev.h: Convert int is_<foo>_ether_addr to bool
From: David Miller @ 2012-05-08 17:07 UTC (permalink / raw)
  To: joe; +Cc: johannes, eric.dumazet, netdev
In-Reply-To: <1336495480.29640.19.camel@joe2Laptop>

From: Joe Perches <joe@perches.com>
Date: Tue, 08 May 2012 09:44:40 -0700

> Make the return value explicitly true or false.
> 
> Signed-off-by: Joe Perches <joe@perches.com>

Looks good, applied, thanks Joe.

^ permalink raw reply

* Re: [PATCH] ipv4: Fixed checkpatch warnings and errors
From: David Miller @ 2012-05-08 17:01 UTC (permalink / raw)
  To: greearb
  Cc: cristian.chilipirea, kuznet, jmorris, yoshfuji, kaber, netdev,
	linux-kernel, daniel.baluta
In-Reply-To: <4FA9382A.9090709@candelatech.com>

From: Ben Greear <greearb@candelatech.com>
Date: Tue, 08 May 2012 08:13:46 -0700

> On 05/08/2012 03:58 AM, Cristian Chilipirea wrote:
>> Fixed warnings and errors in af_inet.c.
> 
>>   #ifdef CONFIG_COMPAT
>> -static int inet_compat_ioctl(struct socket *sock, unsigned int cmd,
>> -unsigned long arg)
>> +static int inet_compat_ioctl(struct socket *sock, unsigned int cmd,
>> +		unsigned long arg)
>>   {
> 
> That is so much worse than what was there before.
> 
> Lets stop useless checkpatch crap on existing code
> unless it's really ugly.

Agreed, this patch is terrible.

^ permalink raw reply

* Re: [PATCH net-next 3/4] netxen: added miniDIMM support in driver.
From: David Miller @ 2012-05-08 16:53 UTC (permalink / raw)
  To: rajesh.borundia; +Cc: netdev, ameen.rahman, sony.chacko, sucheta.chakraborty
In-Reply-To: <1336489288-22347-4-git-send-email-rajesh.borundia@qlogic.com>

From: Rajesh Borundia <rajesh.borundia@qlogic.com>
Date: Tue, 8 May 2012 08:01:27 -0700

> +static struct bin_attribute bin_attr_dimm = {
> +	.attr = {.name = "dimm", .mode = (S_IRUGO | S_IWUSR)},

How about a space next to the braces?

^ permalink raw reply

* Re: [PATCH net-next 2/4] netxen_nic: Allow only useful and recommended firmware dump capture mask values
From: David Miller @ 2012-05-08 16:52 UTC (permalink / raw)
  To: rajesh.borundia; +Cc: netdev, ameen.rahman, sony.chacko, manish.chopra
In-Reply-To: <1336489288-22347-3-git-send-email-rajesh.borundia@qlogic.com>

From: Rajesh Borundia <rajesh.borundia@qlogic.com>
Date: Tue, 8 May 2012 08:01:26 -0700

> +/* Fw dump levels */
> +static const u32 FW_DUMP_LEVELS[] = {
> +	0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff };

Format this more reasonably, this looks terrible.

^ permalink raw reply

* Re: [PATCH 00/25] [v2] netfilter updates for net-next (upcoming 3.5)
From: David Miller @ 2012-05-08 16:49 UTC (permalink / raw)
  To: pablo; +Cc: netfilter-devel, netdev
In-Reply-To: <1336463394-3119-1-git-send-email-pablo@netfilter.org>

From: pablo@netfilter.org
Date: Tue,  8 May 2012 09:49:29 +0200

> Second version including requested updates.

There were lots of conflicts, due to my merge of net into net-next.

Those were easy enough, but the result doesn't build.

net/netfilter/nf_conntrack_helper.c: In function ‘nf_conntrack_helper_init_sysctl’:
net/netfilter/nf_conntrack_helper.c:65:2: error: implicit declaration of function ‘register_net_sysctl_table’ [-Werror=implicit-function-declaration]
net/netfilter/nf_conntrack_helper.c:66:4: error: ‘nf_net_netfilter_sysctl_path’ undeclared (first use in this function)
net/netfilter/nf_conntrack_helper.c:66:4: note: each undeclared identifier is reported only once for each function it appears in

^ permalink raw reply

* [PATCH] etherdev.h: Convert int is_<foo>_ether_addr to bool
From: Joe Perches @ 2012-05-08 16:44 UTC (permalink / raw)
  To: David Miller; +Cc: johannes, eric.dumazet, netdev
In-Reply-To: <20120508.033120.1272130362698029549.davem@davemloft.net>

Make the return value explicitly true or false.

Signed-off-by: Joe Perches <joe@perches.com>

---

I grepped through the uses, there are a couple of
tests that store to an int that are unaffected.

There are also a couple of senseless tests of
	is_broadcast_ether_addr() || is_multicast_ether_addr()
in staging that could be improved.

 include/linux/etherdevice.h |   12 ++++++------
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 8a18358..46a95ef 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -59,7 +59,7 @@ extern struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
  *
  * Return true if the address is all zeroes.
  */
-static inline int is_zero_ether_addr(const u8 *addr)
+static inline bool is_zero_ether_addr(const u8 *addr)
 {
 	return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]);
 }
@@ -71,7 +71,7 @@ static inline int is_zero_ether_addr(const u8 *addr)
  * Return true if the address is a multicast address.
  * By definition the broadcast address is also a multicast address.
  */
-static inline int is_multicast_ether_addr(const u8 *addr)
+static inline bool is_multicast_ether_addr(const u8 *addr)
 {
 	return 0x01 & addr[0];
 }
@@ -82,7 +82,7 @@ static inline int is_multicast_ether_addr(const u8 *addr)
  *
  * Return true if the address is a local address.
  */
-static inline int is_local_ether_addr(const u8 *addr)
+static inline bool is_local_ether_addr(const u8 *addr)
 {
 	return 0x02 & addr[0];
 }
@@ -93,7 +93,7 @@ static inline int is_local_ether_addr(const u8 *addr)
  *
  * Return true if the address is the broadcast address.
  */
-static inline int is_broadcast_ether_addr(const u8 *addr)
+static inline bool is_broadcast_ether_addr(const u8 *addr)
 {
 	return (addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5]) == 0xff;
 }
@@ -104,7 +104,7 @@ static inline int is_broadcast_ether_addr(const u8 *addr)
  *
  * Return true if the address is a unicast address.
  */
-static inline int is_unicast_ether_addr(const u8 *addr)
+static inline bool is_unicast_ether_addr(const u8 *addr)
 {
 	return !is_multicast_ether_addr(addr);
 }
@@ -118,7 +118,7 @@ static inline int is_unicast_ether_addr(const u8 *addr)
  *
  * Return true if the address is valid.
  */
-static inline int is_valid_ether_addr(const u8 *addr)
+static inline bool is_valid_ether_addr(const u8 *addr)
 {
 	/* FF:FF:FF:FF:FF:FF is a multicast address so we don't need to
 	 * explicitly check for it here. */

^ permalink raw reply related

* Re: [PATCH] net: compare_ether_addr[_64bits]() has no ordering
From: Joe Perches @ 2012-05-08 16:44 UTC (permalink / raw)
  To: David Miller; +Cc: johannes, eric.dumazet, netdev
In-Reply-To: <20120508.033120.1272130362698029549.davem@davemloft.net>

On Tue, 2012-05-08 at 03:31 -0400, David Miller wrote:
> From: Joe Perches <joe@perches.com>
> Date: Mon, 07 May 2012 23:35:36 -0700
> 
> > On Tue, 2012-05-08 at 02:26 -0400, David Miller wrote:
> >> From: Johannes Berg <johannes@sipsolutions.net>
> >> Date: Tue, 08 May 2012 07:25:44 +0200
> >> 
> >> > I suppose I could fix those first and then later change the type, but I
> >> > think having a "compare_ether_addr" function that returns *false* when
> >> > they *match* would be rather confusing. I'd rather have
> >> > "equal_ether_addr()" that returns *true* when they match.
> >> > 
> >> > I guess we could introduce equal_ether_addr() though and slowly convert,
> >> > keeping compare_ether_addr() as a sort of wrapper around it.
> >> 
> >> Indeed, this is one way to proceed.
> > 
> > perhaps is_equal_ether_addr or is_same_ether_addr instead?
> 
> Hmmm, my first choice would have been "eth_addr_equal()"

Perhaps ether_addr_equal for some API naming semi-consistency.

$ grep "\bint.*is_.*ether_addr" include/linux/etherdevice.h
static inline int is_zero_ether_addr(const u8 *addr)
static inline int is_multicast_ether_addr(const u8 *addr)
static inline int is_local_ether_addr(const u8 *addr)
static inline int is_broadcast_ether_addr(const u8 *addr)
static inline int is_unicast_ether_addr(const u8 *addr)
static inline int is_valid_ether_addr(const u8 *addr)

Perhaps all of these should be bool too
(patch in a separate email)

^ permalink raw reply

* Re: SLUB warning in netfilter code on shutdown
From: Christoph Lameter @ 2012-05-08 15:33 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Pablo Neira Ayuso, Patrick McHardy, netfilter-devel, netfilter,
	coreteam, netdev, Pekka Enberg
In-Reply-To: <20120508112236.GA15404@otc-wbsnb-06>

On Tue, 8 May 2012, Kirill A. Shutemov wrote:

> On Tue, May 08, 2012 at 02:19:24PM +0300, Kirill A. Shutemov wrote:
> > Hi,
> >
> > Screenshot attached.
>
> It's v3.4-rc6 on x86_64. Config attached.

Cannot see the bug report but I guess this is a message related to object
remaining on kmem_cache_destroy? We have other reports on that one as
well. The IPI reduction patch introduces a bug in has_cpu_slab().

Modify the return statement to

return c->page || c->partial;

^ permalink raw reply

* [PATCH net-next 3/4] netxen: added miniDIMM support in driver.
From: Rajesh Borundia @ 2012-05-08 15:01 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ameen Rahman, Sony Chacko, Sucheta Chakraborty
In-Reply-To: <1336489288-22347-1-git-send-email-rajesh.borundia@qlogic.com>

From: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>

Driver queries DIMM information from firmware and accordingly
sets "presence" field of the structure.
"presence" field when set to 0xff denotes invalid flag. And when
set to 0x0 denotes DIMM memory is not present.

Signed-off-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
---
 drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |    7 +
 .../net/ethernet/qlogic/netxen/netxen_nic_hdr.h    |   25 ++++
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |  131 ++++++++++++++++++++
 3 files changed, 163 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index b8614af..a43a26c 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -1818,6 +1818,13 @@ struct netxen_brdinfo {
 	char short_name[NETXEN_MAX_SHORT_NAME];
 };
 
+struct netxen_dimm_cfg {
+	u8 presence;
+	u8 mem_type;
+	u8 dimm_type;
+	u32 size;
+};
+
 static const struct netxen_brdinfo netxen_boards[] = {
 	{NETXEN_BRDTYPE_P2_SB31_10G_CX4, 1, "XGb CX4"},
 	{NETXEN_BRDTYPE_P2_SB31_10G_HMEZ, 1, "XGb HMEZ"},
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h
index b1a897c..a41106b 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h
@@ -955,6 +955,31 @@ enum {
 #define NX_CRB_DEV_REF_COUNT		(NETXEN_CAM_RAM(0x138))
 #define NX_CRB_DEV_STATE		(NETXEN_CAM_RAM(0x140))
 
+/* MiniDIMM related macros */
+#define NETXEN_DIMM_CAPABILITY		(NETXEN_CAM_RAM(0x258))
+#define NETXEN_DIMM_PRESENT			0x1
+#define NETXEN_DIMM_MEMTYPE_DDR2_SDRAM	0x2
+#define NETXEN_DIMM_SIZE			0x4
+#define NETXEN_DIMM_MEMTYPE(VAL)		((VAL >> 3) & 0xf)
+#define	NETXEN_DIMM_NUMROWS(VAL)		((VAL >> 7) & 0xf)
+#define	NETXEN_DIMM_NUMCOLS(VAL)		((VAL >> 11) & 0xf)
+#define	NETXEN_DIMM_NUMRANKS(VAL)		((VAL >> 15) & 0x3)
+#define NETXEN_DIMM_DATAWIDTH(VAL)		((VAL >> 18) & 0x3)
+#define NETXEN_DIMM_NUMBANKS(VAL)		((VAL >> 21) & 0xf)
+#define NETXEN_DIMM_TYPE(VAL)		((VAL >> 25) & 0x3f)
+#define NETXEN_DIMM_VALID_FLAG		0x80000000
+
+#define NETXEN_DIMM_MEM_DDR2_SDRAM	0x8
+
+#define NETXEN_DIMM_STD_MEM_SIZE	512
+
+#define NETXEN_DIMM_TYPE_RDIMM	0x1
+#define NETXEN_DIMM_TYPE_UDIMM	0x2
+#define NETXEN_DIMM_TYPE_SO_DIMM	0x4
+#define NETXEN_DIMM_TYPE_Micro_DIMM	0x8
+#define NETXEN_DIMM_TYPE_Mini_RDIMM	0x10
+#define NETXEN_DIMM_TYPE_Mini_UDIMM	0x20
+
 /* Device State */
 #define NX_DEV_COLD		1
 #define NX_DEV_INITALIZING	2
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 65a718f..05f3ffe 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -2926,6 +2926,134 @@ static struct bin_attribute bin_attr_mem = {
 	.write = netxen_sysfs_write_mem,
 };
 
+static ssize_t
+netxen_sysfs_read_dimm(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
+		char *buf, loff_t offset, size_t size)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct netxen_adapter *adapter = dev_get_drvdata(dev);
+	struct net_device *netdev = adapter->netdev;
+	struct netxen_dimm_cfg dimm;
+	u8 dw, rows, cols, banks, ranks;
+	u32 val;
+
+	if (size != sizeof(struct netxen_dimm_cfg)) {
+		netdev_err(netdev, "Invalid size\n");
+		return -1;
+	}
+
+	memset(&dimm, 0, sizeof(struct netxen_dimm_cfg));
+	val = NXRD32(adapter, NETXEN_DIMM_CAPABILITY);
+
+	/* Checks if DIMM info is valid. */
+	if (val & NETXEN_DIMM_VALID_FLAG) {
+		netdev_err(netdev, "Invalid DIMM flag\n");
+		dimm.presence = 0xff;
+		goto out;
+	}
+
+	rows = NETXEN_DIMM_NUMROWS(val);
+	cols = NETXEN_DIMM_NUMCOLS(val);
+	ranks = NETXEN_DIMM_NUMRANKS(val);
+	banks = NETXEN_DIMM_NUMBANKS(val);
+	dw = NETXEN_DIMM_DATAWIDTH(val);
+
+	dimm.presence = (val & NETXEN_DIMM_PRESENT);
+
+	/* Checks if DIMM info is present. */
+	if (!dimm.presence) {
+		netdev_err(netdev, "DIMM not present\n");
+		goto out;
+	}
+
+	dimm.dimm_type = NETXEN_DIMM_TYPE(val);
+
+	switch (dimm.dimm_type) {
+	case NETXEN_DIMM_TYPE_RDIMM:
+	case NETXEN_DIMM_TYPE_UDIMM:
+	case NETXEN_DIMM_TYPE_SO_DIMM:
+	case NETXEN_DIMM_TYPE_Micro_DIMM:
+	case NETXEN_DIMM_TYPE_Mini_RDIMM:
+	case NETXEN_DIMM_TYPE_Mini_UDIMM:
+		break;
+	default:
+		netdev_err(netdev, "Invalid DIMM type %x\n", dimm.dimm_type);
+		goto out;
+	}
+
+	if (val & NETXEN_DIMM_MEMTYPE_DDR2_SDRAM)
+		dimm.mem_type = NETXEN_DIMM_MEM_DDR2_SDRAM;
+	else
+		dimm.mem_type = NETXEN_DIMM_MEMTYPE(val);
+
+	if (val & NETXEN_DIMM_SIZE) {
+		dimm.size = NETXEN_DIMM_STD_MEM_SIZE;
+		goto out;
+	}
+
+	if (!rows) {
+		netdev_err(netdev, "Invalid no of rows %x\n", rows);
+		goto out;
+	}
+
+	if (!cols) {
+		netdev_err(netdev, "Invalid no of columns %x\n", cols);
+		goto out;
+	}
+
+	if (!banks) {
+		netdev_err(netdev, "Invalid no of banks %x\n", banks);
+		goto out;
+	}
+
+	ranks += 1;
+
+	switch (dw) {
+	case 0x0:
+		dw = 32;
+		break;
+	case 0x1:
+		dw = 33;
+		break;
+	case 0x2:
+		dw = 36;
+		break;
+	case 0x3:
+		dw = 64;
+		break;
+	case 0x4:
+		dw = 72;
+		break;
+	case 0x5:
+		dw = 80;
+		break;
+	case 0x6:
+		dw = 128;
+		break;
+	case 0x7:
+		dw = 144;
+		break;
+	default:
+		netdev_err(netdev, "Invalid data-width %x\n", dw);
+		goto out;
+	}
+
+	dimm.size = ((1 << rows) * (1 << cols) * dw * banks * ranks) / 8;
+	/* Size returned in MB. */
+	dimm.size = (dimm.size) / 0x100000;
+out:
+	memcpy(buf, &dimm, sizeof(struct netxen_dimm_cfg));
+	return sizeof(struct netxen_dimm_cfg);
+
+}
+
+static struct bin_attribute bin_attr_dimm = {
+	.attr = {.name = "dimm", .mode = (S_IRUGO | S_IWUSR)},
+	.size = 0,
+	.read = netxen_sysfs_read_dimm,
+};
+
 
 static void
 netxen_create_sysfs_entries(struct netxen_adapter *adapter)
@@ -2963,6 +3091,8 @@ netxen_create_diag_entries(struct netxen_adapter *adapter)
 		dev_info(dev, "failed to create crb sysfs entry\n");
 	if (device_create_bin_file(dev, &bin_attr_mem))
 		dev_info(dev, "failed to create mem sysfs entry\n");
+	if (device_create_bin_file(dev, &bin_attr_dimm))
+		dev_info(dev, "failed to create dimm sysfs entry\n");
 }
 
 
@@ -2975,6 +3105,7 @@ netxen_remove_diag_entries(struct netxen_adapter *adapter)
 	device_remove_file(dev, &dev_attr_diag_mode);
 	device_remove_bin_file(dev, &bin_attr_crb);
 	device_remove_bin_file(dev, &bin_attr_mem);
+	device_remove_bin_file(dev, &bin_attr_dimm);
 }
 
 #ifdef CONFIG_INET
-- 
1.7.3.3

^ permalink raw reply related

* [PATCH net-next 4/4] netxen_nic: Fix estimation of recv MSS in case of LRO
From: Rajesh Borundia @ 2012-05-08 15:01 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Ameen Rahman, Sony Chacko
In-Reply-To: <1336489288-22347-1-git-send-email-rajesh.borundia@qlogic.com>

o Linux stack estimates MSS from skb->len or skb_shinfo(skb)->gso_size.
In case of LRO skb->len is aggregate of len of number of packets hence MSS
obtained using skb->len would be incorrect. Incorrect estimation of recv MSS
would lead to delayed acks in some traffic patterns (which sends two or three
packets and wait for ack and only then send remaining packets). This leads to
drop in performance. Hence we need to set gso_size to MSS obtained from firmware.

o This is fixed recently in firmware hence the MSS is obtained based on
capability. If fw is capable of sending the MSS then only driver sets the gso_size.

Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
---
 drivers/net/ethernet/qlogic/netxen/netxen_nic.h    |   10 ++++++++--
 .../net/ethernet/qlogic/netxen/netxen_nic_ctx.c    |    3 +++
 .../net/ethernet/qlogic/netxen/netxen_nic_hdr.h    |    1 +
 .../net/ethernet/qlogic/netxen/netxen_nic_init.c   |    4 +++-
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |    9 ++++++++-
 5 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
index a43a26c..36ae0ae 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h
@@ -53,8 +53,8 @@
 
 #define _NETXEN_NIC_LINUX_MAJOR 4
 #define _NETXEN_NIC_LINUX_MINOR 0
-#define _NETXEN_NIC_LINUX_SUBVERSION 78
-#define NETXEN_NIC_LINUX_VERSIONID  "4.0.78"
+#define _NETXEN_NIC_LINUX_SUBVERSION 79
+#define NETXEN_NIC_LINUX_VERSIONID  "4.0.79"
 
 #define NETXEN_VERSION_CODE(a, b, c)	(((a) << 24) + ((b) << 16) + (c))
 #define _major(v)	(((v) >> 24) & 0xff)
@@ -419,6 +419,8 @@ struct rcv_desc {
 	(((sts_data) >> 52) & 0x1)
 #define netxen_get_lro_sts_seq_number(sts_data)		\
 	((sts_data) & 0x0FFFFFFFF)
+#define netxen_get_lro_sts_mss(sts_data1)		\
+	((sts_data1 >> 32) & 0x0FFFF)
 
 
 struct status_desc {
@@ -794,6 +796,7 @@ struct netxen_cmd_args {
 #define NX_CAP0_JUMBO_CONTIGUOUS	NX_CAP_BIT(0, 7)
 #define NX_CAP0_LRO_CONTIGUOUS		NX_CAP_BIT(0, 8)
 #define NX_CAP0_HW_LRO			NX_CAP_BIT(0, 10)
+#define NX_CAP0_HW_LRO_MSS		NX_CAP_BIT(0, 21)
 
 /*
  * Context state
@@ -1073,6 +1076,8 @@ typedef struct {
 #define NX_FW_CAPABILITY_FVLANTX		(1 << 9)
 #define NX_FW_CAPABILITY_HW_LRO			(1 << 10)
 #define NX_FW_CAPABILITY_GBE_LINK_CFG		(1 << 11)
+#define NX_FW_CAPABILITY_MORE_CAPS		(1 << 31)
+#define NX_FW_CAPABILITY_2_LRO_MAX_TCP_SEG	(1 << 2)
 
 /* module types */
 #define LINKEVENT_MODULE_NOT_PRESENT			1
@@ -1155,6 +1160,7 @@ typedef struct {
 #define NETXEN_NIC_BRIDGE_ENABLED       0X10
 #define NETXEN_NIC_DIAG_ENABLED		0x20
 #define NETXEN_FW_RESET_OWNER           0x40
+#define NETXEN_FW_MSS_CAP	        0x80
 #define NETXEN_IS_MSI_FAMILY(adapter) \
 	((adapter)->flags & (NETXEN_NIC_MSI_ENABLED | NETXEN_NIC_MSIX_ENABLED))
 
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
index c86ea12..7f556a8 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
@@ -328,6 +328,9 @@ nx_fw_cmd_create_rx_ctx(struct netxen_adapter *adapter)
 	cap = (NX_CAP0_LEGACY_CONTEXT | NX_CAP0_LEGACY_MN);
 	cap |= (NX_CAP0_JUMBO_CONTIGUOUS | NX_CAP0_LRO_CONTIGUOUS);
 
+	if (adapter->flags & NETXEN_FW_MSS_CAP)
+		cap |= NX_CAP0_HW_LRO_MSS;
+
 	prq->capabilities[0] = cpu_to_le32(cap);
 	prq->host_int_crb_mode =
 		cpu_to_le32(NX_HOST_INT_CRB_MODE_SHARED);
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h
index a41106b..28e0769 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hdr.h
@@ -776,6 +776,7 @@ enum {
 #define CRB_SW_INT_MASK_3		(NETXEN_NIC_REG(0x1e8))
 
 #define CRB_FW_CAPABILITIES_1		(NETXEN_CAM_RAM(0x128))
+#define CRB_FW_CAPABILITIES_2		(NETXEN_CAM_RAM(0x12c))
 #define CRB_MAC_BLOCK_START		(NETXEN_CAM_RAM(0x1c0))
 
 /*
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
index 718b274..0d725dc 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
@@ -1131,7 +1131,6 @@ netxen_validate_firmware(struct netxen_adapter *adapter)
 		 _build(file_fw_ver));
 		return -EINVAL;
 	}
-
 	val = nx_get_bios_version(adapter);
 	netxen_rom_fast_read(adapter, NX_BIOS_VERSION_OFFSET, (int *)&bios);
 	if ((__force u32)val != bios) {
@@ -1661,6 +1660,9 @@ netxen_process_lro(struct netxen_adapter *adapter,
 
 	length = skb->len;
 
+	if (adapter->flags & NETXEN_FW_MSS_CAP)
+		skb_shinfo(skb)->gso_size  =  netxen_get_lro_sts_mss(sts_data1);
+
 	netif_receive_skb(skb);
 
 	adapter->stats.lro_pkts++;
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 05f3ffe..b025940 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -1184,6 +1184,7 @@ netxen_nic_attach(struct netxen_adapter *adapter)
 	int err, ring;
 	struct nx_host_rds_ring *rds_ring;
 	struct nx_host_tx_ring *tx_ring;
+	u32 capab2;
 
 	if (adapter->is_up == NETXEN_ADAPTER_UP_MAGIC)
 		return 0;
@@ -1192,6 +1193,13 @@ netxen_nic_attach(struct netxen_adapter *adapter)
 	if (err)
 		return err;
 
+	adapter->flags &= ~NETXEN_FW_MSS_CAP;
+	if (adapter->capabilities & NX_FW_CAPABILITY_MORE_CAPS) {
+		capab2 = NXRD32(adapter, CRB_FW_CAPABILITIES_2);
+		if (capab2 & NX_FW_CAPABILITY_2_LRO_MAX_TCP_SEG)
+			adapter->flags |= NETXEN_FW_MSS_CAP;
+	}
+
 	err = netxen_napi_add(adapter, netdev);
 	if (err)
 		return err;
@@ -1810,7 +1818,6 @@ netxen_tso_check(struct net_device *netdev,
 		flags = FLAGS_VLAN_TAGGED;
 
 	} else if (vlan_tx_tag_present(skb)) {
-
 		flags = FLAGS_VLAN_OOB;
 		vid = vlan_tx_tag_get(skb);
 		netxen_set_tx_vlan_tci(first_desc, vid);
-- 
1.7.3.3

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox