Netdev List
 help / color / mirror / Atom feed
* [PATCH v3] netfilter: xtables target SYNPROXY
From: Changli Gao @ 2010-07-02  4:19 UTC (permalink / raw)
  To: Patrick McHardy
  Cc: David S. Miller, Alexey Kuznetsov, Jan Engelhardt,
	Jozsef Kadlecsik, Pekka Savola (ipv6), James Morris,
	Hideaki YOSHIFUJI, netfilter-devel, netdev, Changli Gao

v3:
fix the bug it can't work with bridge.

netfilter: xtables target SYNPROXY.

This patch implements an xtables target SYNPROXY. As the connection to the
TCP server won't be established until the ACK from the client is received, it
can protect the TCP server from the SYN-flood attacks.

It works in the raw table of the PREROUTING chain, before conntracking system.
Syncookies is used, so no new state is introduced into the conntracking system.
In fact, until the first connection is established, conntracking system doesn't
see any packets. So when there is a SYN-flood attack, conntracking system won't
be busy on finding and deleting the un-assured ct.

As the SYN-packet of the second connection request is sent locally, the DNAT
rules which are in the PREROUTING chain should be moved to the OUTPUT chain.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/netfilter/nf_conntrack.h        |   10 
 include/net/netfilter/nf_conntrack_core.h   |   21 
 include/net/netfilter/nf_conntrack_extend.h |    2 
 include/net/tcp.h                           |    7 
 net/ipv4/syncookies.c                       |   22 
 net/ipv4/tcp_ipv4.c                         |    9 
 net/netfilter/Kconfig                       |   17 
 net/netfilter/Makefile                      |    1 
 net/netfilter/nf_conntrack_core.c           |   45 +
 net/netfilter/xt_SYNPROXY.c                 |  679 ++++++++++++++++++++++++++++
 10 files changed, 794 insertions(+), 19 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index e624dae..5e6d8e4 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -311,5 +311,15 @@ do {							\
 #define MODULE_ALIAS_NFCT_HELPER(helper) \
         MODULE_ALIAS("nfct-helper-" helper)
 
+#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
+extern unsigned int (*syn_proxy_pre_hook)(struct sk_buff *skb,
+					  struct nf_conn *ct,
+					  enum ip_conntrack_info ctinfo);
+
+extern unsigned int (*syn_proxy_post_hook)(struct sk_buff *skb,
+					   struct nf_conn *ct,
+					   enum ip_conntrack_info ctinfo);
+#endif
 #endif /* __KERNEL__ */
 #endif /* _NF_CONNTRACK_H */
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index aced085..637b404 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -54,6 +54,23 @@ nf_conntrack_find_get(struct net *net, u16 zone,
 
 extern int __nf_conntrack_confirm(struct sk_buff *skb);
 
+static inline unsigned int syn_proxy_post_call(struct sk_buff *skb,
+					       struct nf_conn *ct,
+					       enum ip_conntrack_info ctinfo)
+{
+	unsigned int ret = NF_ACCEPT;
+#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
+	unsigned int (*syn_proxy)(struct sk_buff *, struct nf_conn *,
+				  enum ip_conntrack_info);
+	syn_proxy = rcu_dereference(syn_proxy_post_hook);
+	if (syn_proxy)
+		ret = syn_proxy(skb, ct, ctinfo);
+#endif
+
+	return ret;
+}
+
 /* Confirm a connection: returns NF_DROP if packet must be dropped. */
 static inline int nf_conntrack_confirm(struct sk_buff *skb)
 {
@@ -63,8 +80,10 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
 	if (ct && !nf_ct_is_untracked(ct)) {
 		if (!nf_ct_is_confirmed(ct))
 			ret = __nf_conntrack_confirm(skb);
-		if (likely(ret == NF_ACCEPT))
+		if (likely(ret == NF_ACCEPT)) {
 			nf_ct_deliver_cached_events(ct);
+			ret = syn_proxy_post_call(skb, ct, skb->nfctinfo);
+		}
 	}
 	return ret;
 }
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 32d15bd..b2ae7e9 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -11,6 +11,7 @@ enum nf_ct_ext_id {
 	NF_CT_EXT_ACCT,
 	NF_CT_EXT_ECACHE,
 	NF_CT_EXT_ZONE,
+	NF_CT_EXT_SYNPROXY,
 	NF_CT_EXT_NUM,
 };
 
@@ -19,6 +20,7 @@ enum nf_ct_ext_id {
 #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
 #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
 #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
+#define NF_CT_EXT_SYNPROXY_TYPE struct syn_proxy_state
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c2f96c2..06f28d3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -460,8 +460,11 @@ extern int			tcp_disconnect(struct sock *sk, int flags);
 extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
 extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, 
 				    struct ip_options *opt);
-extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, 
-				     __u16 *mss);
+extern __u32 __cookie_v4_init_sequence(__be32 saddr, __be32 daddr,
+				       __be16 sport, __be16 dport, __u32 seq,
+				       __u16 *mssp);
+extern int cookie_v4_check_sequence(const struct iphdr *iph,
+				    const struct tcphdr *th, __u32 cookie);
 
 extern __u32 cookie_init_timestamp(struct request_sock *req);
 extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 650cace..3adcba3 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -159,26 +159,21 @@ static __u16 const msstab[] = {
  * Generate a syncookie.  mssp points to the mss, which is returned
  * rounded down to the value encoded in the cookie.
  */
-__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+__u32 __cookie_v4_init_sequence(__be32 saddr, __be32 daddr, __be16 sport,
+				__be16 dport, __u32 seq, __u16 *mssp)
 {
-	const struct iphdr *iph = ip_hdr(skb);
-	const struct tcphdr *th = tcp_hdr(skb);
 	int mssind;
 	const __u16 mss = *mssp;
 
-	tcp_synq_overflow(sk);
-
 	for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
 		if (mss >= msstab[mssind])
 			break;
 	*mssp = msstab[mssind];
 
-	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
-	return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
-				     th->source, th->dest, ntohl(th->seq),
+	return secure_tcp_syn_cookie(saddr, daddr, sport, dport, seq,
 				     jiffies / (HZ * 60), mssind);
 }
+EXPORT_SYMBOL(__cookie_v4_init_sequence);
 
 /*
  * This (misnamed) value is the age of syncookie which is permitted.
@@ -191,10 +186,9 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
  * Check if a ack sequence number is a valid syncookie.
  * Return the decoded mss if it is, or 0 if not.
  */
-static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+int cookie_v4_check_sequence(const struct iphdr *iph, const struct tcphdr *th,
+			     __u32 cookie)
 {
-	const struct iphdr *iph = ip_hdr(skb);
-	const struct tcphdr *th = tcp_hdr(skb);
 	__u32 seq = ntohl(th->seq) - 1;
 	__u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
 					    th->source, th->dest, seq,
@@ -203,6 +197,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 
 	return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
 }
+EXPORT_SYMBOL(cookie_v4_check_sequence);
 
 static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 					   struct request_sock *req,
@@ -282,7 +277,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 		goto out;
 
 	if (tcp_synq_no_recent_overflow(sk) ||
-	    (mss = cookie_check(skb, cookie)) == 0) {
+	    (mss = cookie_v4_check_sequence(ip_hdr(skb), tcp_hdr(skb),
+					    cookie)) == 0) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
 		goto out;
 	}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8fa32f5..3b094c7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1332,7 +1332,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (want_cookie) {
-		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+		struct tcphdr *th;
+
+		tcp_synq_overflow(sk);
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
+		th = tcp_hdr(skb);
+		isn = __cookie_v4_init_sequence(saddr, daddr, th->source,
+						th->dest, ntohl(th->seq),
+						&req->mss);
 		req->cookie_ts = tmp_opt.tstamp_ok;
 	} else if (!isn) {
 		struct inet_peer *peer = NULL;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 413ed24..fd8ad8c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -560,6 +560,23 @@ config NETFILTER_XT_TARGET_SECMARK
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_SYNPROXY
+	tristate '"SYNPROXY" target support (EXPERIMENTAL)'
+	depends on EXPERIMENTAL
+	depends on SYN_COOKIES
+	depends on IP_NF_RAW
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	help
+	  The SYNPROXY target allows a raw rule to specify that some TCP
+	  connections are relayed to protect the TCP servers from the SYN-flood
+	  DoS attacks. Syn cookies is used to save the initial state, so no
+	  conntrack is needed until the client side connection is established.
+	  It frees the connection tracking system from creating/deleting
+	  conntracks when SYN-flood DoS attack acts.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_TCPMSS
 	tristate '"TCPMSS" target support'
 	depends on (IPV6 || IPV6=n)
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e28420a..4e32834 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) += xt_SYNPROXY.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 16b41b4..dd85d6f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -800,6 +800,26 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	return ct;
 }
 
+static inline unsigned int syn_proxy_pre_call(int protonum, struct sk_buff *skb,
+					      struct nf_conn *ct,
+					      enum ip_conntrack_info ctinfo)
+{
+	unsigned int ret = NF_ACCEPT;
+#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
+	unsigned int (*syn_proxy)(struct sk_buff *, struct nf_conn *,
+				  enum ip_conntrack_info);
+
+	if (protonum == IPPROTO_TCP) {
+		syn_proxy = rcu_dereference(syn_proxy_pre_hook);
+		if (syn_proxy)
+			ret = syn_proxy(skb, ct, ctinfo);
+	}
+#endif
+
+	return ret;
+}
+
 unsigned int
 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 		struct sk_buff *skb)
@@ -855,8 +875,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 			       l3proto, l4proto, &set_reply, &ctinfo);
 	if (!ct) {
 		/* Not valid part of a connection */
-		NF_CT_STAT_INC_ATOMIC(net, invalid);
-		ret = NF_ACCEPT;
+		ret = syn_proxy_pre_call(protonum, skb, NULL, ctinfo);
+		if (ret == NF_ACCEPT)
+			NF_CT_STAT_INC_ATOMIC(net, invalid);
 		goto out;
 	}
 
@@ -869,6 +890,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
 
 	NF_CT_ASSERT(skb->nfct);
 
+	ret = syn_proxy_pre_call(protonum, skb, ct, ctinfo);
+	if (ret != NF_ACCEPT)
+		goto out;
 	ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
 	if (ret <= 0) {
 		/* Invalid: inverse of the return code tells
@@ -1476,6 +1500,17 @@ s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			u32 seq);
 EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
 
+#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
+unsigned int (*syn_proxy_pre_hook)(struct sk_buff *skb, struct nf_conn *ct,
+				   enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL(syn_proxy_pre_hook);
+
+unsigned int (*syn_proxy_post_hook)(struct sk_buff *skb, struct nf_conn *ct,
+				    enum ip_conntrack_info ctinfo);
+EXPORT_SYMBOL(syn_proxy_post_hook);
+#endif
+
 int nf_conntrack_init(struct net *net)
 {
 	int ret;
@@ -1496,6 +1531,12 @@ int nf_conntrack_init(struct net *net)
 
 		/* Howto get NAT offsets */
 		rcu_assign_pointer(nf_ct_nat_offset, NULL);
+
+#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
+    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
+		rcu_assign_pointer(syn_proxy_pre_hook, NULL);
+		rcu_assign_pointer(syn_proxy_post_hook, NULL);
+#endif
 	}
 	return 0;
 
diff --git a/net/netfilter/xt_SYNPROXY.c b/net/netfilter/xt_SYNPROXY.c
new file mode 100644
index 0000000..1a55f33
--- /dev/null
+++ b/net/netfilter/xt_SYNPROXY.c
@@ -0,0 +1,679 @@
+/* (C) 2010- Changli Gao <xiaosuo@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * It bases on ipt_REJECT.c
+ */
+#define pr_fmt(fmt) "SYNPROXY: " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/unaligned/access_ok.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/route.h>
+#include <net/dst.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Changli Gao <xiaosuo@gmail.com>");
+MODULE_DESCRIPTION("Xtables: \"SYNPROXY\" target for IPv4");
+MODULE_ALIAS("ipt_SYNPROXY");
+
+enum {
+	TCP_SEND_FLAG_NOTRACE	= 0x1,
+	TCP_SEND_FLAG_SYNCOOKIE	= 0x2,
+	TCP_SEND_FLAG_ACK2SYN	= 0x4,
+};
+
+struct syn_proxy_state {
+	u16	seq_inited;
+	__be16	window;
+	u32	seq_diff;
+};
+
+static int get_mtu(const struct dst_entry *dst)
+{
+	int mtu;
+
+	mtu = dst_mtu(dst);
+	if (mtu)
+		return mtu;
+
+	return dst->dev ? dst->dev->mtu : 0;
+}
+
+static int get_advmss(const struct dst_entry *dst)
+{
+	int advmss;
+
+	advmss = dst_metric(dst, RTAX_ADVMSS);
+	if (advmss)
+		return advmss;
+	advmss = get_mtu(dst);
+	if (advmss)
+		return advmss - (sizeof(struct iphdr) + sizeof(struct tcphdr));
+
+	return TCP_MSS_DEFAULT;
+}
+
+static int syn_proxy_route(struct sk_buff *skb, struct net *net, u16 *pmss)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct rtable *rt;
+	struct flowi fl = {};
+	unsigned int type;
+	int flags = 0;
+	int err;
+	u16 mss;
+
+	type = inet_addr_type(net, iph->saddr);
+	if (type != RTN_LOCAL) {
+		type = inet_addr_type(net, iph->daddr);
+		if (type == RTN_LOCAL)
+			flags |= FLOWI_FLAG_ANYSRC;
+	}
+
+	if (type == RTN_LOCAL) {
+		fl.nl_u.ip4_u.daddr = iph->daddr;
+		fl.nl_u.ip4_u.saddr = iph->saddr;
+		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+		fl.flags = flags;
+		err = ip_route_output_key(net, &rt, &fl);
+		if (err)
+			goto out;
+
+		skb_dst_set(skb, &rt->dst);
+	} else {
+		/* non-local src, find valid iif to satisfy
+		 * rp-filter when calling ip_route_input. */
+		fl.nl_u.ip4_u.daddr = iph->saddr;
+		err = ip_route_output_key(net, &rt, &fl);
+		if (err)
+			goto out;
+
+		err = ip_route_input(skb, iph->daddr, iph->saddr,
+				     RT_TOS(iph->tos), rt->dst.dev);
+		if (err) {
+			dst_release(&rt->dst);
+			goto out;
+		}
+		if (pmss) {
+			mss = get_advmss(&rt->dst);
+			if (*pmss > mss)
+				*pmss = mss;
+		}
+		dst_release(&rt->dst);
+	}
+
+	err = skb_dst(skb)->error;
+	if (!err && pmss) {
+		mss = get_advmss(skb_dst(skb));
+		if (*pmss > mss)
+			*pmss = mss;
+	}
+
+out:
+	return err;
+}
+
+static int tcp_send(__be32 src, __be32 dst, __be16 sport, __be16 dport,
+		    u32 seq, u32 ack_seq, __be16 window, u16 mss, u8 tcp_flags,
+		    u8 tos, struct net_device *dev, int flags,
+		    struct sk_buff *oskb)
+{
+	struct sk_buff *skb;
+	struct iphdr *iph;
+	struct tcphdr *th;
+	int err, len;
+
+	len = sizeof(*th);
+	if (mss)
+		len += TCPOLEN_MSS;
+
+	skb = NULL;
+	/* caller must give me a large enough oskb */
+	if (oskb) {
+		unsigned char *odata = oskb->data;
+
+		if (skb_recycle_check(oskb, 0)) {
+			oskb->data = odata;
+			skb_reset_tail_pointer(oskb);
+			skb = oskb;
+			pr_debug("recycle skb\n");
+		}
+	}
+	if (!skb) {
+		skb = alloc_skb(LL_MAX_HEADER + sizeof(*iph) + len, GFP_ATOMIC);
+		if (!skb) {
+			err = -ENOMEM;
+			goto out;
+		}
+		skb_reserve(skb, LL_MAX_HEADER);
+	}
+
+	skb_reset_network_header(skb);
+	if (!(flags & TCP_SEND_FLAG_ACK2SYN) || skb != oskb) {
+		iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
+		iph->version	= 4;
+		iph->ihl	= sizeof(*iph) / 4;
+		iph->tos	= tos;
+		/* tot_len is set in ip_local_out() */
+		iph->id		= 0;
+		iph->frag_off	= htons(IP_DF);
+		iph->protocol	= IPPROTO_TCP;
+		iph->saddr	= src;
+		iph->daddr	= dst;
+		th = (struct tcphdr *)skb_put(skb, len);
+		th->source	= sport;
+		th->dest	= dport;
+	} else {
+		iph = (struct iphdr *)skb->data;
+		iph->id		= 0;
+		iph->frag_off	= htons(IP_DF);
+		skb_put(skb, iph->ihl * 4 + len);
+		th = (struct tcphdr *)(skb->data + iph->ihl * 4);
+	}
+
+	th->seq		= htonl(seq);
+	th->ack_seq	= htonl(ack_seq);
+	tcp_flag_byte(th) = tcp_flags;
+	th->doff	= len / 4;
+	th->window	= window;
+	th->urg_ptr	= 0;
+
+	skb->protocol = htons(ETH_P_IP);
+	if ((flags & TCP_SEND_FLAG_SYNCOOKIE) && mss)
+		err = syn_proxy_route(skb, dev_net(dev), &mss);
+	else
+		err = syn_proxy_route(skb, dev_net(dev), NULL);
+	if (err)
+		goto err_out;
+
+	if ((flags & TCP_SEND_FLAG_SYNCOOKIE)) {
+		if (mss) {
+			th->seq = htonl(__cookie_v4_init_sequence(dst, src,
+								  dport, sport,
+								  ack_seq - 1,
+								  &mss));
+		} else {
+			mss = TCP_MSS_DEFAULT;
+			th->seq = htonl(__cookie_v4_init_sequence(dst, src,
+								  dport, sport,
+								  ack_seq - 1,
+								  &mss));
+			mss = 0;
+		}
+	}
+
+	if (mss)
+		* (__force __be32 *)(th + 1) = htonl((TCPOPT_MSS << 24) |
+						     (TCPOLEN_MSS << 16) |
+						     mss);
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	th->check = ~tcp_v4_check(len, src, dst, 0);
+	skb->csum_start = (unsigned char *)th - skb->head;
+	skb->csum_offset = offsetof(struct tcphdr, check);
+
+	if (!(flags & TCP_SEND_FLAG_ACK2SYN) || skb != oskb)
+		iph->ttl	= dst_metric(skb_dst(skb), RTAX_HOPLIMIT);
+
+	if (skb->len > get_mtu(skb_dst(skb))) {
+		if (printk_ratelimit())
+			pr_warning("%s has smaller mtu: %d\n",
+				   skb_dst(skb)->dev->name,
+				   get_mtu(skb_dst(skb)));
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	if ((flags & TCP_SEND_FLAG_NOTRACE)) {
+		skb->nfct = &nf_ct_untracked_get()->ct_general;
+		skb->nfctinfo = IP_CT_NEW;
+		nf_conntrack_get(skb->nfct);
+	}
+
+	pr_debug("ip_local_out: %pI4n:%hu -> %pI4n:%hu (seq=%u, "
+		 "ack_seq=%u mss=%hu flags=%hhx)\n", &src, ntohs(th->source),
+		 &dst, ntohs(th->dest), ntohl(th->seq), ack_seq, mss,
+		 tcp_flags);
+
+	err = ip_local_out(skb);
+	if (err > 0)
+		err = net_xmit_errno(err);
+
+	pr_debug("ip_local_out: return with %d\n", err);
+out:
+	if (oskb && oskb != skb)
+		kfree_skb(oskb);
+
+	return err;
+
+err_out:
+	kfree_skb(skb);
+	goto out;
+}
+
+static int get_mss(u8 *data, int len)
+{
+	u8 olen;
+
+	while (len >= TCPOLEN_MSS) {
+		switch (data[0]) {
+		case TCPOPT_EOL:
+			return 0;
+		case TCPOPT_NOP:
+			data++;
+			len--;
+			break;
+		case TCPOPT_MSS:
+			if (data[1] != TCPOLEN_MSS)
+				return -EINVAL;
+			return get_unaligned_be16(data + 2);
+		default:
+			olen = data[1];
+			if (olen < 2 || olen > len)
+				return -EINVAL;
+			data += olen;
+			len -= olen;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static DEFINE_PER_CPU(struct syn_proxy_state, syn_proxy_state);
+
+/* syn_proxy_pre isn't under the protection of nf_conntrack_proto_tcp.c */
+static unsigned int syn_proxy_pre(struct sk_buff *skb, struct nf_conn *ct,
+				  enum ip_conntrack_info ctinfo)
+{
+	struct syn_proxy_state *state;
+	struct iphdr *iph;
+	struct tcphdr *th, _th;
+
+	/* only support IPv4 now */
+	iph = ip_hdr(skb);
+	if (iph->version != 4)
+		return NF_ACCEPT;
+
+	th = skb_header_pointer(skb, iph->ihl * 4, sizeof(_th), &_th);
+	if (th == NULL)
+		return NF_DROP;
+
+	if (!ct || !nf_ct_is_confirmed(ct)) {
+		int ret;
+
+		if (!th->syn && th->ack) {
+			u16 mss;
+			struct sk_buff *rec_skb;
+
+			mss = cookie_v4_check_sequence(iph, th,
+						       ntohl(th->ack_seq) - 1);
+			if (!mss)
+				return NF_ACCEPT;
+
+			pr_debug("%pI4n:%hu -> %pI4n:%hu(mss=%hu)\n",
+				 &iph->saddr, ntohs(th->source),
+				 &iph->daddr, ntohs(th->dest), mss);
+
+			if (skb_tailroom(skb) < TCPOLEN_MSS &&
+			    skb->len < iph->ihl * 4 + sizeof(*th) + TCPOLEN_MSS)
+				rec_skb = NULL;
+			else
+				rec_skb = skb;
+
+			local_bh_disable();
+			state = &__get_cpu_var(syn_proxy_state);
+			state->seq_inited = 1;
+			state->window = th->window;
+			state->seq_diff = ntohl(th->ack_seq) - 1;
+			if (rec_skb)
+				tcp_send(iph->saddr, iph->daddr, 0, 0,
+					 ntohl(th->seq) - 1, 0, th->window,
+					 mss, TCPHDR_SYN, 0, skb->dev,
+					 TCP_SEND_FLAG_ACK2SYN, rec_skb);
+			else
+				tcp_send(iph->saddr, iph->daddr, th->source,
+					 th->dest, ntohl(th->seq) - 1, 0,
+					 th->window, mss, TCPHDR_SYN,
+					 iph->tos, skb->dev, 0, NULL);
+			state->seq_inited = 0;
+			local_bh_enable();
+
+			if (!rec_skb)
+				kfree_skb(skb);
+
+			return NF_STOLEN;
+		}
+
+		if (!ct || !th->syn || th->ack)
+			return NF_ACCEPT;
+
+		ret = NF_ACCEPT;
+		local_bh_disable();
+		state = &__get_cpu_var(syn_proxy_state);
+		if (state->seq_inited) {
+			struct syn_proxy_state *nstate;
+
+			nstate = nf_ct_ext_add(ct, NF_CT_EXT_SYNPROXY,
+					       GFP_ATOMIC);
+			if (nstate != NULL) {
+				nstate->seq_inited = 0;
+				nstate->window = state->window;
+				nstate->seq_diff = state->seq_diff;
+				pr_debug("seq_diff: %u\n", nstate->seq_diff);
+			} else {
+				ret = NF_DROP;
+			}
+		}
+		local_bh_enable();
+
+		return ret;
+	}
+
+	state = nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY);
+	if (!state)
+		return NF_ACCEPT;
+
+	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+		__be32 newack;
+
+		/* don't need to mangle duplicate SYN packets */
+		if (th->syn && !th->ack)
+			return NF_ACCEPT;
+		if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*th)))
+			return NF_DROP;
+		th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
+		newack = htonl(ntohl(th->ack_seq) - state->seq_diff);
+		inet_proto_csum_replace4(&th->check, skb, th->ack_seq, newack,
+					 0);
+		pr_debug("alter ack seq: %u -> %u\n",
+			 ntohl(th->ack_seq), ntohl(newack));
+		th->ack_seq = newack;
+	} else {
+		/* Simultaneous open ? Oh, no. The connection between
+		 * client and us is established. */
+		if (th->syn && !th->ack)
+			return NF_DROP;
+	}
+
+	return NF_ACCEPT;
+}
+
+static unsigned int syn_proxy_mangle_pkt(struct sk_buff *skb, struct iphdr *iph,
+					 struct tcphdr *th, u32 seq_diff)
+{
+	__be32 new;
+	int olen;
+
+	if (skb->len < (iph->ihl + th->doff) * 4)
+		return NF_DROP;
+	if (!skb_make_writable(skb, (iph->ihl + th->doff) * 4))
+		return NF_DROP;
+	iph = (struct iphdr *)(skb->data);
+	th = (struct tcphdr *)(skb->data + iph->ihl * 4);
+
+	new = tcp_flag_word(th) & (~TCP_FLAG_SYN);
+	inet_proto_csum_replace4(&th->check, skb, tcp_flag_word(th), new, 0);
+	tcp_flag_word(th) = new;
+
+	new = htonl(ntohl(th->seq) + seq_diff);
+	inet_proto_csum_replace4(&th->check, skb, th->seq, new, 0);
+	pr_debug("alter seq: %u -> %u\n", ntohl(th->seq), ntohl(new));
+	th->seq = new;
+
+	olen = th->doff - sizeof(*th) / 4;
+	if (olen) {
+		__be32 *opt;
+
+		opt = (__force __be32 *)(th + 1);
+#define TCPOPT_EOL_WORD ((TCPOPT_EOL << 24) + (TCPOPT_EOL << 16) + \
+			 (TCPOPT_EOL << 8) + TCPOPT_EOL)
+		inet_proto_csum_replace4(&th->check, skb, *opt, TCPOPT_EOL_WORD,
+					 0);
+		*opt = TCPOPT_EOL_WORD;
+	}
+
+	return NF_ACCEPT;
+}
+
+static unsigned int syn_proxy_post(struct sk_buff *skb, struct nf_conn *ct,
+				   enum ip_conntrack_info ctinfo)
+{
+	struct syn_proxy_state *state;
+	struct iphdr *iph;
+	struct tcphdr *th;
+
+	/* untraced packets don't have NF_CT_EXT_SYNPROXY ext, as they don't
+	 * enter syn_proxy_pre() */
+	state = nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY);
+	if (state == NULL)
+		return NF_ACCEPT;
+
+	iph = ip_hdr(skb);
+	if (!skb_make_writable(skb, iph->ihl * 4 + sizeof(*th)))
+		return NF_DROP;
+	th = (struct tcphdr *)(skb->data + iph->ihl * 4);
+	if (!state->seq_inited) {
+		if (th->syn) {
+			/* It must be from original direction, as the ones
+			 * from the other side are dropped in function
+			 * syn_proxy_pre() */
+			if (!th->ack)
+				return NF_ACCEPT;
+
+			pr_debug("SYN-ACK %pI4n:%hu -> %pI4n:%hu "
+				 "(seq=%u ack_seq=%u)\n",
+				 &iph->saddr, ntohs(th->source), &iph->daddr,
+				 ntohs(th->dest), ntohl(th->seq),
+				 ntohl(th->ack_seq));
+
+			/* SYN-ACK from reply direction with the protection
+			 * of conntrack */
+			spin_lock_bh(&ct->lock);
+			if (!state->seq_inited) {
+				state->seq_inited = 1;
+				pr_debug("update seq_diff %u -> %u\n",
+					 state->seq_diff,
+					 state->seq_diff - ntohl(th->seq));
+				state->seq_diff -= ntohl(th->seq);
+			}
+			spin_unlock_bh(&ct->lock);
+			tcp_send(iph->daddr, iph->saddr, th->dest, th->source,
+				 ntohl(th->ack_seq),
+				 ntohl(th->seq) + 1 + state->seq_diff,
+				 state->window, 0, TCPHDR_ACK, iph->tos,
+				 skb->dev, 0, NULL);
+
+			return syn_proxy_mangle_pkt(skb, iph, th,
+						    state->seq_diff + 1);
+		} else {
+			__be32 newseq;
+
+			if (!th->rst)
+				return NF_ACCEPT;
+			newseq = htonl(state->seq_diff + 1);
+			inet_proto_csum_replace4(&th->check, skb, th->seq,
+						 newseq, 0);
+			pr_debug("alter RST seq: %u -> %u\n",
+				 ntohl(th->seq), ntohl(newseq));
+			th->seq = newseq;
+
+			return NF_ACCEPT;
+		}
+	}
+
+	/* ct should be in ESTABLISHED state, but if the ack packets from
+	 * us are lost. */
+	if (th->syn) {
+		if (!th->ack)
+			return NF_ACCEPT;
+
+		tcp_send(iph->daddr, iph->saddr, th->dest, th->source,
+			 ntohl(th->ack_seq),
+			 ntohl(th->seq) + 1 + state->seq_diff,
+			 state->window, 0, TCPHDR_ACK, iph->tos,
+			 skb->dev, 0, NULL);
+
+		return syn_proxy_mangle_pkt(skb, iph, th, state->seq_diff + 1);
+	}
+
+	if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
+		__be32 newseq;
+
+		newseq = htonl(ntohl(th->seq) + state->seq_diff);
+		inet_proto_csum_replace4(&th->check, skb, th->seq, newseq, 0);
+		pr_debug("alter seq: %u -> %u\n", ntohl(th->seq),
+			 ntohl(newseq));
+		th->seq = newseq;
+	}
+
+	return NF_ACCEPT;
+}
+
+static unsigned int tcp_process(struct sk_buff *skb)
+{
+	const struct iphdr *iph;
+	const struct tcphdr *th;
+	int err;
+	u16 mss;
+
+	iph = ip_hdr(skb);
+	if (iph->frag_off & htons(IP_OFFSET))
+		goto out;
+	if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(*th)))
+		goto out;
+	th = (const struct tcphdr *)(skb->data + iph->ihl * 4);
+	if ((tcp_flag_byte(th) &
+	     (TCPHDR_FIN | TCPHDR_RST | TCPHDR_ACK | TCPHDR_SYN)) != TCPHDR_SYN)
+		goto out;
+
+	if (nf_ip_checksum(skb, NF_INET_PRE_ROUTING, iph->ihl * 4, IPPROTO_TCP))
+		goto out;
+	mss = 0;
+	if (th->doff > sizeof(*th) / 4) {
+		if (!pskb_may_pull(skb, (iph->ihl + th->doff) * 4))
+			goto out;
+		err = get_mss((u8 *)(th + 1), th->doff * 4 - sizeof(*th));
+		if (err < 0)
+			goto out;
+		if (err != 0)
+			mss = err;
+	} else if (th->doff != sizeof(*th) / 4)
+		goto out;
+
+	tcp_send(iph->daddr, iph->saddr, th->dest, th->source, 0,
+		 ntohl(th->seq) + 1, 0, mss, TCPHDR_SYN | TCPHDR_ACK,
+		 iph->tos, skb->dev,
+		 TCP_SEND_FLAG_NOTRACE | TCP_SEND_FLAG_SYNCOOKIE, skb);
+
+	return NF_STOLEN;
+
+out:
+	return NF_DROP;
+}
+
+static unsigned int synproxy_tg(struct sk_buff *skb,
+				const struct xt_action_param *par)
+{
+	struct nf_conn *ct;
+	enum ip_conntrack_info ctinfo;
+	int ret;
+
+	/* received from lo */
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct)
+		return IPT_CONTINUE;
+
+	local_bh_disable();
+	if (!__get_cpu_var(syn_proxy_state).seq_inited)
+		ret = tcp_process(skb);
+	else
+		ret = IPT_CONTINUE;
+	local_bh_enable();
+
+	return ret;
+}
+
+static int synproxy_tg_check(const struct xt_tgchk_param *par)
+{
+	int ret;
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0)
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
+
+	return ret;
+}
+
+static void synproxy_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg_reg __read_mostly = {
+	.name		= "SYNPROXY",
+	.family		= NFPROTO_IPV4,
+	.target		= synproxy_tg,
+	.table		= "raw",
+	.hooks		= 1 << NF_INET_PRE_ROUTING,
+	.proto		= IPPROTO_TCP,
+	.checkentry	= synproxy_tg_check,
+	.destroy	= synproxy_tg_destroy,
+	.me		= THIS_MODULE,
+};
+
+static struct nf_ct_ext_type syn_proxy_state_ext __read_mostly = {
+	.len	= sizeof(struct syn_proxy_state),
+	.align	= __alignof__(struct syn_proxy_state),
+	.id	= NF_CT_EXT_SYNPROXY,
+};
+
+static int __init synproxy_tg_init(void)
+{
+	int err;
+
+	rcu_assign_pointer(syn_proxy_pre_hook, syn_proxy_pre);
+	rcu_assign_pointer(syn_proxy_post_hook, syn_proxy_post);
+	err = nf_ct_extend_register(&syn_proxy_state_ext);
+	if (err)
+		goto err_out;
+	err = xt_register_target(&synproxy_tg_reg);
+	if (err)
+		goto err_out2;
+
+	return err;
+
+err_out2:
+	nf_ct_extend_unregister(&syn_proxy_state_ext);
+err_out:
+	rcu_assign_pointer(syn_proxy_post_hook, NULL);
+	rcu_assign_pointer(syn_proxy_pre_hook, NULL);
+	rcu_barrier();
+
+	return err;
+}
+
+static void __exit synproxy_tg_exit(void)
+{
+	xt_unregister_target(&synproxy_tg_reg);
+	nf_ct_extend_unregister(&syn_proxy_state_ext);
+	rcu_assign_pointer(syn_proxy_post_hook, NULL);
+	rcu_assign_pointer(syn_proxy_pre_hook, NULL);
+	rcu_barrier();
+}
+
+module_init(synproxy_tg_init);
+module_exit(synproxy_tg_exit);

^ permalink raw reply related

* Re: Fwd: Possible bug in net/ipv4/route.c?
From: Eric Dumazet @ 2010-07-02  5:38 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki; +Cc: netdev@vger.kernel.org, linux-kernel
In-Reply-To: <4C2D53D3.6050106@linux-ipv6.org>

Le vendredi 02 juillet 2010 à 11:49 +0900, YOSHIFUJI Hideaki a écrit :
> Switch to netdev.
> 

thanks ;)

> --yoshfuji
> 
> -------- Original Message --------
> Subject: Possible bug in net/ipv4/route.c?
> Date: Thu, 1 Jul 2010 16:00:29 -0700
> From: Sol Kavy <skavy@ubicom.com>
> To: <linux-kernel@vger.kernel.org>
> CC: Greg Ren <gren@ubicom.com>, Guojun Jin <gjin@ubicom.com>,        Murat Sezgin <msezgin@ubicom.com>, Sener Ilgen <silgen@ubicom.com>
> 
> Found Linux: 2.6.28
> Arch: Ubicom32 <not yet pushed>
> Project: uCLinux based Router
> Test: Bit torrent Stress Test
> 
> Note: The top of Linus git net/ipv4/route.c appears to have the same issue.
> 

Please use < 72 char lines

> The following is a patch for clearing out IP options area in an input

>  skb during link failure processing.  Without this patch, the

>  icmp_send() can result in a call to ip_options_echo() where the 

> common buffer area of the skb is incorrectly interpreted.  Depending on the previous use of the skb->cb[], the interpreted option length values can cause stack corruption by copying more than 40 bytes to the output options.
> 
> In our case, a driver is using the skb->cb[] area to hold driver
> specific data.   The driver is not zeroing out the area after use.  I
> can see three basic solutions:
> 
> 1) Drivers are not allowed to use the skb->cb[] area at all.  Ubicom
> should modify the driver to use a different approach.
> 
> 2) The layer using skb->cb[] should clear this area after use and
> before handing the skb to another layer.  Ubicom should modify the
> driver to clear the skb->cb[] area before sending it up the line.
> 

This is the right option. If you use one word in cb[], only your driver
knows how to clear it efficiently.

> 3) Any layer that "uses" the skb->cb[] area must clear the area before
> use.  In which case, the proposed patch would fix the problem for the
> ipv4_link_failure().  I believe that this is the correct fix because I
> see ip_rcv() clears the skb->cb[] before using it.
> 

No : ip_rcv clears() skb->cb when leaving ip_rcv, not entering.

skb allocation clears whole cb[], and each layer is responsible to clear
the part it eventually dirtied.

> Can someone confirm that this is the appropriate fix?  If this is
> documented somewhere, please direct me to the documentation.
> 
> Please send email to sol@ubicom.com in addition to posting your
> response.
> 
> Thanks,
> 
> Sol Kavy/Murat Sezgin
> Ubicom, Inc.
> 
> Patch:  
> 
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 125ee64..d13805f 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -1606,6 +1606,14 @@ static void ipv4_link_failure(struct sk_buff *skb)
> {
>         struct rtable *rt;
> 
> +       /*
> +         * Since link failure can be called with skbs from many layers (see arp)
> +         * the cb area of the skb must be cleared before use. Because the cb area 
> +         * can be formatted according to the caller layer's cb area format and it may cause
> +         * corruptions when it is handled in a different network layer.
> +         */
> +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
>         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
>         rt = skb->rtable;
> 
> The packet is enqueud by:
> do_IRQ()->do_softirq()->__do_softirq()->net_rx_action()->ubi32_eth_napi_poll()->ubi32_eth_receive()->__vlan_hwaccel_rx()->netif_receive_skb()->br_handle_frame()->nf_hook_slow()->br_nf_pre_routing_finish()->br_nfr_pre_routing_finish_bridge()->neight_resolve_output()->__neigh_event_send().
> 
> The packet is then dequeued by: 
> do_IRQ() -> irq_exit() -> do_softirq() -> run_timer_softirq() -> neigh_timer_handler() -> arp_error_report() -> ipv4_link_failure() -> icmp_send() -> ip_options_echo().
> 
> Because the Ubicom Ethernet driver overwrites the common buffer area, the enqueued packet contains garbage when casted as an IP options data structure.  This results in ip_options_echo() miss reading the option length information and overwriting memory.  By clearing the skb->cb[] before processing the icmp_send() against the packet, we ensure that ip_options_echo() does not corrupt memory.   
> 
> 
> 



^ permalink raw reply

* Re: [PATCH 1/1] ehea: Allocate stats buffer with GFP_KERNEL
From: David Miller @ 2010-07-02  5:48 UTC (permalink / raw)
  To: brking; +Cc: ossthema, osstklei, raisch, netdev
In-Reply-To: <201006302159.o5ULx8ow025348@d01av03.pok.ibm.com>

From: Brian King <brking@linux.vnet.ibm.com>
Date: Wed, 30 Jun 2010 16:59:12 -0500

> 
> Since ehea_get_stats calls ehea_h_query_ehea_port, which
> can sleep, we can also sleep when allocating a page in
> this function. This fixes some memory allocation failure
> warnings seen under low memory conditions.
> 
> Signed-off-by: Brian King <brking@linux.vnet.ibm.com>

Applied to net-next-2.6

^ permalink raw reply

* Re: [PATCH] ll_temac: add error checking to DMA init path
From: David Miller @ 2010-07-02  5:48 UTC (permalink / raw)
  To: dkirjanov; +Cc: john.linn, brian.hill, netdev
In-Reply-To: <20100701093905.GA29132@hera.kernel.org>

From: Denis Kirjanov <dkirjanov@hera.kernel.org>
Date: Thu, 1 Jul 2010 09:39:05 +0000

> Add error checking to DMA descriptor rings initialization code.
> 
> Signed-off-by: Denis Kirjanov <dkirjanov@kernel.org>

Applied to net-next-2.6

^ permalink raw reply

* Re: [PATCH net-next-2.6] be2net: changes to properly provide phy details
From: David Miller @ 2010-07-02  5:48 UTC (permalink / raw)
  To: ajitk; +Cc: netdev
In-Reply-To: <20100701135049.GA4518@serverengines.com>

From: Ajit Khaparde <ajitk@serverengines.com>
Date: Thu, 1 Jul 2010 19:21:00 +0530

> be2net driver is currently not showing correct phy details in certain cases.
> This patch fixes it.
> 
> Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>

Applied to net-next-2.6

^ permalink raw reply

* Re: [net-next-2.6 PATCH] x86: Drop CONFIG_MCORE2 check around setting of NET_IP_ALIGN
From: David Miller @ 2010-07-02  5:49 UTC (permalink / raw)
  To: jeffrey.t.kirsher
  Cc: netdev, gospo, bphilips, ak, tglx, mingo, hpa, x86,
	alexander.h.duyck
In-Reply-To: <20100701232742.15934.49030.stgit@localhost.localdomain>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 01 Jul 2010 16:28:27 -0700

> From: Alexander Duyck <alexander.h.duyck@intel.com>
> 
> This patch removes the CONFIG_MCORE2 check from around NET_IP_ALIGN.  It is
> based on a suggestion from Andi Kleen.  The assumption is that there are
> not any x86 cores where unaligned access is really slow, and this change
> would allow for a performance improvement to still exist on configurations
> that are not necessarily optimized for Core 2.
> 
> Cc: Andi Kleen <ak@linux.intel.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@redhat.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: x86@kernel.org
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Applied, with HPA's ack.

^ permalink raw reply

* Re: [net-next-2.6 PATCH 1/5] igb: fix PHY config access on 82580
From: David Miller @ 2010-07-02  5:49 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, bphilips, nicholasx.d.nunley
In-Reply-To: <20100701233733.16171.4629.stgit@localhost.localdomain>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 01 Jul 2010 16:37:54 -0700

> From: Nick Nunley <nicholasx.d.nunley@intel.com>
> 
> 82580 NICs can have up to 4 functions. This fixes phy accesses
> to use the correct locks for functions 2 and 3.
> 
> Signed-off-by: Nicholas Nunley <nicholasx.d.nunley@intel.com>
> Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Applied.

^ permalink raw reply

* Re: [net-next-2.6 PATCH 2/5] igb: Use only a single Tx queue in SR-IOV mode
From: David Miller @ 2010-07-02  5:50 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, bphilips, stable, gregory.v.rose
In-Reply-To: <20100701233814.16171.78454.stgit@localhost.localdomain>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 01 Jul 2010 16:38:16 -0700

> From: Greg Rose <gregory.v.rose@intel.com>
> 
> The 82576 expects the second rx queue in any pool to receive L2 switch
> loop back packets sent from the second tx queue in another pool.  The
> 82576 VF driver does not enable the second rx queue so if the PF driver
> sends packets destined to a VF from its second tx queue then the VF
> driver will never see them.  In SR-IOV mode limit the number of tx queues
> used by the PF driver to one. This patch fixes a bug reported in which
> the PF cannot communciate with the VF and should be considered for 2.6.34
> stable.
> 
> CC: stable@kernel.org
> Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
> Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Applied.

^ permalink raw reply

* Re: [net-next-2.6 PATCH 3/5] igb: Fix Tx hangs seen when loading igb with max_vfs > 7.
From: David Miller @ 2010-07-02  5:50 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, bphilips, emil.s.tantilov
In-Reply-To: <20100701233838.16171.90269.stgit@localhost.localdomain>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 01 Jul 2010 16:38:40 -0700

> From: Emil Tantilov <emil.s.tantilov@intel.com>
> 
> Check the value of max_vfs at the time of assignment of vfs_allocated_count.
> 
> The previous check in igb_probe_vfs was too late as by that time the rx/tx
> rings were initialized with the wrong offset.
> 
> Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
> Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Applied.

^ permalink raw reply

* Re: [net-next-2.6 PATCH 4/5] igb: correct link test not being run when link is down
From: David Miller @ 2010-07-02  5:50 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, bphilips, alexander.h.duyck
In-Reply-To: <20100701233859.16171.45966.stgit@localhost.localdomain>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 01 Jul 2010 16:39:01 -0700

> From: Alexander Duyck <alexander.h.duyck@intel.com>
> 
> The igb online link test was always reporting pass because instead of
> checking for if_running it was checking for netif_carrier_ok.
> 
> This change corrects the test so that it is run if the interface is running
> instead of checking for netif carrier ok.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> Tested-by: Emil Tantilov <emil.s.tantilov@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Applied.

^ permalink raw reply

* Re: [net-next-2.6 PATCH 5/5] igb: Add comment
From: David Miller @ 2010-07-02  5:50 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, bphilips, gregory.v.rose
In-Reply-To: <20100701233921.16171.58140.stgit@localhost.localdomain>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 01 Jul 2010 16:39:23 -0700

> From: Greg Rose <gregory.v.rose@intel.com>
> 
> Add explanatory comment to avoid confusion when a pointer is set
> to the second word of an array instead of the customary cast of a
> pointer to the beginning of the array.
> 
> Signed-off-by: Greg Rose <gregory.v.rose@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Applied.

^ permalink raw reply

* Re: [net-next-2.6 PATCH] ixgbe: use NETIF_F_LRO
From: David Miller @ 2010-07-02  5:50 UTC (permalink / raw)
  To: jeffrey.t.kirsher; +Cc: netdev, gospo, bphilips, sgruszka, donald.c.skidmore
In-Reply-To: <20100701235811.16456.66115.stgit@localhost.localdomain>

From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 01 Jul 2010 16:58:25 -0700

> From: Stanislaw Gruszka <sgruszka@redhat.com>
> 
> Both ETH_FLAG_LRO and NETIF_F_LRO have the same value, but NETIF_F_LRO
> is intended to use with netdev->features.
> 
> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
> Acked-by: Don Skidmore <donald.c.skidmore@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

Applied.

^ permalink raw reply

* [net-next-2.6 PATCH] igb: drop support for UDP hashing w/ RSS
From: Jeff Kirsher @ 2010-07-02  6:01 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, bphilips, Alexander Duyck, Jeff Kirsher

From: Alexander Duyck <alexander.h.duyck@intel.com>

This change removes UDP from the supported protocols for RSS hashing.  The
reason for removing this protocol is because IP fragmentation was causing a
network flow to be broken into two streams, one for fragmented, and one for
non-fragmented and this in turn was causing out-of-order issues.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/igb/igb_main.c |   18 ++++++++++--------
 1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index 9cb04e2..9465617 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -2717,14 +2717,16 @@ static void igb_setup_mrqc(struct igb_adapter *adapter)
 	}
 	igb_vmm_control(adapter);
 
-	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
-		 E1000_MRQC_RSS_FIELD_IPV4_TCP);
-	mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
-		 E1000_MRQC_RSS_FIELD_IPV6_TCP);
-	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
-		 E1000_MRQC_RSS_FIELD_IPV6_UDP);
-	mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
-		 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
+	/*
+	 * Generate RSS hash based on TCP port numbers and/or
+	 * IPv4/v6 src and dst addresses since UDP cannot be
+	 * hashed reliably due to IP fragmentation
+	 */
+	mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
+		E1000_MRQC_RSS_FIELD_IPV4_TCP |
+		E1000_MRQC_RSS_FIELD_IPV6 |
+		E1000_MRQC_RSS_FIELD_IPV6_TCP |
+		E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
 
 	wr32(E1000_MRQC, mrqc);
 }


^ permalink raw reply related

* [PATCH] netfilter: postpone the checksum calculation
From: Changli Gao @ 2010-07-02  6:02 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: David S. Miller, netfilter-devel, netdev, Changli Gao

netfilter: postpone the checksum calculation.

postpone the checksum calculation, then if the output NIC supports checksum
offloading, we can utlize it. And though the output NIC doesn't support
checksum offloading, but we'll mangle this packet, this can free us from
updating the checksum, as the checksum calculation occurs later.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/ipv4/netfilter/ipt_REJECT.c |   10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f5f4a88..3d0e064 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -95,10 +95,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 	}
 
 	tcph->rst	= 1;
-	tcph->check	= tcp_v4_check(sizeof(struct tcphdr),
-				       niph->saddr, niph->daddr,
-				       csum_partial(tcph,
-						    sizeof(struct tcphdr), 0));
+	tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
+				    niph->daddr, 0);
+	nskb->ip_summed = CHECKSUM_PARTIAL;
+	nskb->csum_start = (unsigned char *)tcph - nskb->head;
+	nskb->csum_offset = offsetof(struct tcphdr, check);
 
 	addr_type = RTN_UNSPEC;
 	if (hook != NF_INET_FORWARD
@@ -115,7 +116,6 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 		goto free_nskb;
 
 	niph->ttl	= dst_metric(skb_dst(nskb), RTAX_HOPLIMIT);
-	nskb->ip_summed = CHECKSUM_NONE;
 
 	/* "Never happens" */
 	if (nskb->len > dst_mtu(skb_dst(nskb)))

^ permalink raw reply related

* [net-next-2.6 PATCH] ixgbe: use netif_<level> instead of netdev_<level>
From: Jeff Kirsher @ 2010-07-02  6:05 UTC (permalink / raw)
  To: davem; +Cc: netdev, gospo, bphilips, Joe Perches, Emil Tantilov, Jeff Kirsher

From: Emil Tantilov <emil.s.tantilov@intel.com>

This patch restores the ability to set msglvl through ethtool.
The issue was introduced by:
commit 849c45423c0c108e08d67644728cc9b0ed225fa1

CC: Joe Perches <joe@perches.com>

Reported-by: Joe Perches <joe@perches.com>
Signed-off-by: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---

 drivers/net/ixgbe/ixgbe_82599.c   |    2 -
 drivers/net/ixgbe/ixgbe_common.h  |   19 ++++-----
 drivers/net/ixgbe/ixgbe_dcb_nl.c  |    2 -
 drivers/net/ixgbe/ixgbe_ethtool.c |   40 ++++++++++---------
 drivers/net/ixgbe/ixgbe_fcoe.c    |   26 ++++++------
 drivers/net/ixgbe/ixgbe_main.c    |   79 +++++++++++++++++++------------------
 drivers/net/ixgbe/ixgbe_sriov.c   |    7 ++-
 7 files changed, 89 insertions(+), 86 deletions(-)

diff --git a/drivers/net/ixgbe/ixgbe_82599.c b/drivers/net/ixgbe/ixgbe_82599.c
index 0ee175a..3e06a61 100644
--- a/drivers/net/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ixgbe/ixgbe_82599.c
@@ -715,7 +715,7 @@ static s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw,
 
 out:
 	if (link_up && (link_speed == IXGBE_LINK_SPEED_1GB_FULL))
-		e_info("Smartspeed has downgraded the link speed from "
+		e_info(hw, "Smartspeed has downgraded the link speed from "
 		       "the maximum advertised\n");
 	return status;
 }
diff --git a/drivers/net/ixgbe/ixgbe_common.h b/drivers/net/ixgbe/ixgbe_common.h
index d5d3aae..5cf15aa 100644
--- a/drivers/net/ixgbe/ixgbe_common.h
+++ b/drivers/net/ixgbe/ixgbe_common.h
@@ -108,16 +108,6 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index);
 extern struct net_device *ixgbe_get_hw_dev(struct ixgbe_hw *hw);
 #define hw_dbg(hw, format, arg...) \
 	netdev_dbg(ixgbe_get_hw_dev(hw), format, ##arg)
-#define e_err(format, arg...) \
-	netdev_err(adapter->netdev, format, ## arg)
-#define e_info(format, arg...) \
-	netdev_info(adapter->netdev, format, ## arg)
-#define e_warn(format, arg...) \
-	netdev_warn(adapter->netdev, format, ## arg)
-#define e_notice(format, arg...) \
-	netdev_notice(adapter->netdev, format, ## arg)
-#define e_crit(format, arg...) \
-	netdev_crit(adapter->netdev, format, ## arg)
 #define e_dev_info(format, arg...) \
 	dev_info(&adapter->pdev->dev, format, ## arg)
 #define e_dev_warn(format, arg...) \
@@ -126,5 +116,12 @@ extern struct net_device *ixgbe_get_hw_dev(struct ixgbe_hw *hw);
 	dev_err(&adapter->pdev->dev, format, ## arg)
 #define e_dev_notice(format, arg...) \
 	dev_notice(&adapter->pdev->dev, format, ## arg)
-
+#define e_info(msglvl, format, arg...) \
+	netif_info(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_err(msglvl, format, arg...) \
+	netif_err(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_warn(msglvl, format, arg...) \
+	netif_warn(adapter, msglvl, adapter->netdev, format, ## arg)
+#define e_crit(msglvl, format, arg...) \
+	netif_crit(adapter, msglvl, adapter->netdev, format, ## arg)
 #endif /* IXGBE_COMMON */
diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c
index 6576235..b53b465 100644
--- a/drivers/net/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c
@@ -121,7 +121,7 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
 			goto out;
 
 		if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
-			e_err("Enable failed, needs MSI-X\n");
+			e_err(drv, "Enable failed, needs MSI-X\n");
 			err = 1;
 			goto out;
 		}
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 5275e9c..b35ef36 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -301,7 +301,7 @@ static int ixgbe_set_settings(struct net_device *netdev,
 		hw->mac.autotry_restart = true;
 		err = hw->mac.ops.setup_link(hw, advertised, true, true);
 		if (err) {
-			e_info("setup link failed with code %d\n", err);
+			e_info(probe, "setup link failed with code %d\n", err);
 			hw->mac.ops.setup_link(hw, old, true, true);
 		}
 	} else {
@@ -1194,8 +1194,8 @@ static struct ixgbe_reg_test reg_test_82598[] = {
 		writel((_test[pat] & W), (adapter->hw.hw_addr + R));          \
 		val = readl(adapter->hw.hw_addr + R);                         \
 		if (val != (_test[pat] & W & M)) {                            \
-			e_err("pattern test reg %04X failed: got "	\
-			      "0x%08X expected 0x%08X\n",		\
+			e_err(drv, "pattern test reg %04X failed: got "   \
+			      "0x%08X expected 0x%08X\n",		      \
 			      R, val, (_test[pat] & W & M));                \
 			*data = R;                                            \
 			writel(before, adapter->hw.hw_addr + R);              \
@@ -1212,8 +1212,8 @@ static struct ixgbe_reg_test reg_test_82598[] = {
 	writel((W & M), (adapter->hw.hw_addr + R));                           \
 	val = readl(adapter->hw.hw_addr + R);                                 \
 	if ((W & M) != (val & M)) {                                           \
-		e_err("set/check reg %04X test failed: got 0x%08X "	\
-		      "expected 0x%08X\n", R, (val & M), (W & M));	\
+		e_err(drv, "set/check reg %04X test failed: got 0x%08X "  \
+		      "expected 0x%08X\n", R, (val & M), (W & M));        \
 		*data = R;                                                    \
 		writel(before, (adapter->hw.hw_addr + R));                    \
 		return 1;                                                     \
@@ -1246,8 +1246,8 @@ static int ixgbe_reg_test(struct ixgbe_adapter *adapter, u64 *data)
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, toggle);
 	after = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle;
 	if (value != after) {
-		e_err("failed STATUS register test got: 0x%08X expected: "
-		      "0x%08X\n", after, value);
+		e_err(drv, "failed STATUS register test got: 0x%08X "
+		      "expected: 0x%08X\n", after, value);
 		*data = 1;
 		return 1;
 	}
@@ -1347,8 +1347,8 @@ static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data)
 		*data = 1;
 		return -1;
 	}
-	e_info("testing %s interrupt\n", shared_int ?
-		   "shared" : "unshared");
+	e_info(hw, "testing %s interrupt\n", shared_int ?
+	       "shared" : "unshared");
 
 	/* Disable all the interrupts */
 	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF);
@@ -1853,7 +1853,7 @@ static void ixgbe_diag_test(struct net_device *netdev,
 	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
 		/* Offline tests */
 
-		e_info("offline testing starting\n");
+		e_info(hw, "offline testing starting\n");
 
 		/* Link test performed before hardware reset so autoneg doesn't
 		 * interfere with test result */
@@ -1886,17 +1886,17 @@ static void ixgbe_diag_test(struct net_device *netdev,
 		else
 			ixgbe_reset(adapter);
 
-		e_info("register testing starting\n");
+		e_info(hw, "register testing starting\n");
 		if (ixgbe_reg_test(adapter, &data[0]))
 			eth_test->flags |= ETH_TEST_FL_FAILED;
 
 		ixgbe_reset(adapter);
-		e_info("eeprom testing starting\n");
+		e_info(hw, "eeprom testing starting\n");
 		if (ixgbe_eeprom_test(adapter, &data[1]))
 			eth_test->flags |= ETH_TEST_FL_FAILED;
 
 		ixgbe_reset(adapter);
-		e_info("interrupt testing starting\n");
+		e_info(hw, "interrupt testing starting\n");
 		if (ixgbe_intr_test(adapter, &data[2]))
 			eth_test->flags |= ETH_TEST_FL_FAILED;
 
@@ -1904,13 +1904,14 @@ static void ixgbe_diag_test(struct net_device *netdev,
 		 * loopback diagnostic. */
 		if (adapter->flags & (IXGBE_FLAG_SRIOV_ENABLED |
 				      IXGBE_FLAG_VMDQ_ENABLED)) {
-			e_info("Skip MAC loopback diagnostic in VT mode\n");
+			e_info(hw, "Skip MAC loopback diagnostic in VT "
+			       "mode\n");
 			data[3] = 0;
 			goto skip_loopback;
 		}
 
 		ixgbe_reset(adapter);
-		e_info("loopback testing starting\n");
+		e_info(hw, "loopback testing starting\n");
 		if (ixgbe_loopback_test(adapter, &data[3]))
 			eth_test->flags |= ETH_TEST_FL_FAILED;
 
@@ -1921,7 +1922,7 @@ skip_loopback:
 		if (if_running)
 			dev_open(netdev);
 	} else {
-		e_info("online testing starting\n");
+		e_info(hw, "online testing starting\n");
 		/* Online tests */
 		if (ixgbe_link_test(adapter, &data[4]))
 			eth_test->flags |= ETH_TEST_FL_FAILED;
@@ -2139,7 +2140,8 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
 			adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
 			if (netdev->features & NETIF_F_LRO) {
 				netdev->features &= ~NETIF_F_LRO;
-				e_info("rx-usecs set to 0, disabling RSC\n");
+				e_info(probe, "rx-usecs set to 0, "
+				       "disabling RSC\n");
 			}
 			need_reset = true;
 		}
@@ -2239,8 +2241,8 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 		} else if (!adapter->rx_itr_setting) {
 			netdev->features &= ~NETIF_F_LRO;
 			if (data & ETH_FLAG_LRO)
-				e_info("rx-usecs set to 0, "
-					"LRO/RSC cannot be enabled.\n");
+				e_info(probe, "rx-usecs set to 0, "
+				       "LRO/RSC cannot be enabled.\n");
 		}
 	}
 
diff --git a/drivers/net/ixgbe/ixgbe_fcoe.c b/drivers/net/ixgbe/ixgbe_fcoe.c
index 84e1194..f6ef4cd 100644
--- a/drivers/net/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ixgbe/ixgbe_fcoe.c
@@ -164,20 +164,20 @@ int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
 
 	adapter = netdev_priv(netdev);
 	if (xid >= IXGBE_FCOE_DDP_MAX) {
-		e_warn("xid=0x%x out-of-range\n", xid);
+		e_warn(drv, "xid=0x%x out-of-range\n", xid);
 		return 0;
 	}
 
 	fcoe = &adapter->fcoe;
 	if (!fcoe->pool) {
-		e_warn("xid=0x%x no ddp pool for fcoe\n", xid);
+		e_warn(drv, "xid=0x%x no ddp pool for fcoe\n", xid);
 		return 0;
 	}
 
 	ddp = &fcoe->ddp[xid];
 	if (ddp->sgl) {
-		e_err("xid 0x%x w/ non-null sgl=%p nents=%d\n",
-			  xid, ddp->sgl, ddp->sgc);
+		e_err(drv, "xid 0x%x w/ non-null sgl=%p nents=%d\n",
+		      xid, ddp->sgl, ddp->sgc);
 		return 0;
 	}
 	ixgbe_fcoe_clear_ddp(ddp);
@@ -185,14 +185,14 @@ int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
 	/* setup dma from scsi command sgl */
 	dmacount = pci_map_sg(adapter->pdev, sgl, sgc, DMA_FROM_DEVICE);
 	if (dmacount == 0) {
-		e_err("xid 0x%x DMA map error\n", xid);
+		e_err(drv, "xid 0x%x DMA map error\n", xid);
 		return 0;
 	}
 
 	/* alloc the udl from our ddp pool */
 	ddp->udl = pci_pool_alloc(fcoe->pool, GFP_KERNEL, &ddp->udp);
 	if (!ddp->udl) {
-		e_err("failed allocated ddp context\n");
+		e_err(drv, "failed allocated ddp context\n");
 		goto out_noddp_unmap;
 	}
 	ddp->sgl = sgl;
@@ -205,7 +205,7 @@ int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid,
 		while (len) {
 			/* max number of buffers allowed in one DDP context */
 			if (j >= IXGBE_BUFFCNT_MAX) {
-				e_err("xid=%x:%d,%d,%d:addr=%llx "
+				e_err(drv, "xid=%x:%d,%d,%d:addr=%llx "
 				      "not enough descriptors\n",
 				      xid, i, j, dmacount, (u64)addr);
 				goto out_noddp_free;
@@ -385,7 +385,7 @@ int ixgbe_fso(struct ixgbe_adapter *adapter,
 	struct fc_frame_header *fh;
 
 	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_type != SKB_GSO_FCOE)) {
-		e_err("Wrong gso type %d:expecting SKB_GSO_FCOE\n",
+		e_err(drv, "Wrong gso type %d:expecting SKB_GSO_FCOE\n",
 		      skb_shinfo(skb)->gso_type);
 		return -EINVAL;
 	}
@@ -412,7 +412,7 @@ int ixgbe_fso(struct ixgbe_adapter *adapter,
 		fcoe_sof_eof |= IXGBE_ADVTXD_FCOEF_SOF;
 		break;
 	default:
-		e_warn("unknown sof = 0x%x\n", sof);
+		e_warn(drv, "unknown sof = 0x%x\n", sof);
 		return -EINVAL;
 	}
 
@@ -439,7 +439,7 @@ int ixgbe_fso(struct ixgbe_adapter *adapter,
 		fcoe_sof_eof |= IXGBE_ADVTXD_FCOEF_EOF_A;
 		break;
 	default:
-		e_warn("unknown eof = 0x%x\n", eof);
+		e_warn(drv, "unknown eof = 0x%x\n", eof);
 		return -EINVAL;
 	}
 
@@ -515,7 +515,7 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter)
 					     adapter->pdev, IXGBE_FCPTR_MAX,
 					     IXGBE_FCPTR_ALIGN, PAGE_SIZE);
 		if (!fcoe->pool)
-			e_err("failed to allocated FCoE DDP pool\n");
+			e_err(drv, "failed to allocated FCoE DDP pool\n");
 
 		spin_lock_init(&fcoe->lock);
 	}
@@ -611,7 +611,7 @@ int ixgbe_fcoe_enable(struct net_device *netdev)
 	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)
 		goto out_enable;
 
-	e_info("Enabling FCoE offload features.\n");
+	e_info(drv, "Enabling FCoE offload features.\n");
 	if (netif_running(netdev))
 		netdev->netdev_ops->ndo_stop(netdev);
 
@@ -657,7 +657,7 @@ int ixgbe_fcoe_disable(struct net_device *netdev)
 	if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
 		goto out_disable;
 
-	e_info("Disabling FCoE offload features.\n");
+	e_info(drv, "Disabling FCoE offload features.\n");
 	if (netif_running(netdev))
 		netdev->netdev_ops->ndo_stop(netdev);
 
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index dd46345..8c7617b 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -696,7 +696,7 @@ static inline bool ixgbe_check_tx_hang(struct ixgbe_adapter *adapter,
 		/* detected Tx unit hang */
 		union ixgbe_adv_tx_desc *tx_desc;
 		tx_desc = IXGBE_TX_DESC_ADV(*tx_ring, eop);
-		e_err("Detected Tx Unit Hang\n"
+		e_err(drv, "Detected Tx Unit Hang\n"
 		      "  Tx Queue             <%d>\n"
 		      "  TDH, TDT             <%x>, <%x>\n"
 		      "  next_to_use          <%x>\n"
@@ -812,8 +812,8 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 	if (adapter->detect_tx_hung) {
 		if (ixgbe_check_tx_hang(adapter, tx_ring, i)) {
 			/* schedule immediate reset if we believe we hung */
-			e_info("tx hang %d detected, resetting adapter\n",
-			       adapter->tx_timeout_count + 1);
+			e_info(probe, "tx hang %d detected, resetting "
+			       "adapter\n", adapter->tx_timeout_count + 1);
 			ixgbe_tx_timeout(adapter->netdev);
 		}
 	}
@@ -1652,8 +1652,8 @@ static void ixgbe_check_overtemp_task(struct work_struct *work)
 				return;
 			break;
 		}
-		e_crit("Network adapter has been stopped because it "
-		       "has over heated. Restart the computer. If the problem "
+		e_crit(drv, "Network adapter has been stopped because it has "
+		       "over heated. Restart the computer. If the problem "
 		       "persists, power off the system and replace the "
 		       "adapter\n");
 		/* write to clear the interrupt */
@@ -1667,7 +1667,7 @@ static void ixgbe_check_fan_failure(struct ixgbe_adapter *adapter, u32 eicr)
 
 	if ((adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) &&
 	    (eicr & IXGBE_EICR_GPI_SDP1)) {
-		e_crit("Fan has stopped, replace the adapter\n");
+		e_crit(probe, "Fan has stopped, replace the adapter\n");
 		/* write to clear the interrupt */
 		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
 	}
@@ -2153,7 +2153,7 @@ static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter)
 		                  handler, 0, adapter->name[vector],
 		                  adapter->q_vector[vector]);
 		if (err) {
-			e_err("request_irq failed for MSIX interrupt: "
+			e_err(probe, "request_irq failed for MSIX interrupt "
 			      "Error: %d\n", err);
 			goto free_queue_irqs;
 		}
@@ -2163,7 +2163,7 @@ static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter)
 	err = request_irq(adapter->msix_entries[vector].vector,
 	                  ixgbe_msix_lsc, 0, adapter->name[vector], netdev);
 	if (err) {
-		e_err("request_irq for msix_lsc failed: %d\n", err);
+		e_err(probe, "request_irq for msix_lsc failed: %d\n", err);
 		goto free_queue_irqs;
 	}
 
@@ -2349,7 +2349,7 @@ static int ixgbe_request_irq(struct ixgbe_adapter *adapter)
 	}
 
 	if (err)
-		e_err("request_irq failed, Error %d\n", err);
+		e_err(probe, "request_irq failed, Error %d\n", err);
 
 	return err;
 }
@@ -2420,7 +2420,7 @@ static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter)
 	map_vector_to_rxq(adapter, 0, 0);
 	map_vector_to_txq(adapter, 0, 0);
 
-	e_info("Legacy interrupt IVAR setup done\n");
+	e_info(hw, "Legacy interrupt IVAR setup done\n");
 }
 
 /**
@@ -3316,7 +3316,7 @@ static inline void ixgbe_rx_desc_queue_enable(struct ixgbe_adapter *adapter,
 			msleep(1);
 	}
 	if (k >= IXGBE_MAX_RX_DESC_POLL) {
-		e_err("RXDCTL.ENABLE on Rx queue %d not set within "
+		e_err(drv, "RXDCTL.ENABLE on Rx queue %d not set within "
 		      "the polling period\n", rxr);
 	}
 	ixgbe_release_rx_desc(&adapter->hw, adapter->rx_ring[rxr],
@@ -3446,7 +3446,7 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter)
 			} while (--wait_loop &&
 			         !(txdctl & IXGBE_TXDCTL_ENABLE));
 			if (!wait_loop)
-				e_err("Could not enable Tx Queue %d\n", j);
+				e_err(drv, "Could not enable Tx Queue %d\n", j);
 		}
 	}
 
@@ -3494,7 +3494,7 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter)
 	if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
 		u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
 		if (esdp & IXGBE_ESDP_SDP1)
-			e_crit("Fan has stopped, replace the adapter\n");
+			e_crit(drv, "Fan has stopped, replace the adapter\n");
 	}
 
 	/*
@@ -3523,7 +3523,7 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter)
 	} else {
 		err = ixgbe_non_sfp_link_config(hw);
 		if (err)
-			e_err("link_config FAILED %d\n", err);
+			e_err(probe, "link_config FAILED %d\n", err);
 	}
 
 	for (i = 0; i < adapter->num_tx_queues; i++)
@@ -3977,12 +3977,12 @@ static inline bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter)
 		adapter->num_tx_queues = 1;
 #ifdef CONFIG_IXGBE_DCB
 		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
-			e_info("FCoE enabled with DCB\n");
+			e_info(probe, "FCoE enabled with DCB\n");
 			ixgbe_set_dcb_queues(adapter);
 		}
 #endif
 		if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) {
-			e_info("FCoE enabled with RSS\n");
+			e_info(probe, "FCoE enabled with RSS\n");
 			if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) ||
 			    (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
 				ixgbe_set_fdir_queues(adapter);
@@ -4633,8 +4633,8 @@ int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
 	}
 
 	e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
-	       (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
-	       adapter->num_rx_queues, adapter->num_tx_queues);
+		   (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
+		   adapter->num_rx_queues, adapter->num_tx_queues);
 
 	set_bit(__IXGBE_DOWN, &adapter->state);
 
@@ -4711,7 +4711,7 @@ static void ixgbe_sfp_task(struct work_struct *work)
 				  "supported module.\n");
 			unregister_netdev(adapter->netdev);
 		} else {
-			e_info("detected SFP+: %d\n", hw->phy.sfp_type);
+			e_info(probe, "detected SFP+: %d\n", hw->phy.sfp_type);
 		}
 		/* don't need this routine any more */
 		clear_bit(__IXGBE_SFP_MODULE_NOT_FOUND, &adapter->state);
@@ -4891,7 +4891,7 @@ int ixgbe_setup_tx_resources(struct ixgbe_adapter *adapter,
 err:
 	vfree(tx_ring->tx_buffer_info);
 	tx_ring->tx_buffer_info = NULL;
-	e_err("Unable to allocate memory for the Tx descriptor ring\n");
+	e_err(probe, "Unable to allocate memory for the Tx descriptor ring\n");
 	return -ENOMEM;
 }
 
@@ -4913,7 +4913,7 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
 		err = ixgbe_setup_tx_resources(adapter, adapter->tx_ring[i]);
 		if (!err)
 			continue;
-		e_err("Allocation for Tx Queue %u failed\n", i);
+		e_err(probe, "Allocation for Tx Queue %u failed\n", i);
 		break;
 	}
 
@@ -4938,7 +4938,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
 	if (!rx_ring->rx_buffer_info)
 		rx_ring->rx_buffer_info = vmalloc(size);
 	if (!rx_ring->rx_buffer_info) {
-		e_err("vmalloc allocation failed for the Rx desc ring\n");
+		e_err(probe, "vmalloc allocation failed for the Rx "
+		      "descriptor ring\n");
 		goto alloc_failed;
 	}
 	memset(rx_ring->rx_buffer_info, 0, size);
@@ -4951,7 +4952,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
 					   &rx_ring->dma, GFP_KERNEL);
 
 	if (!rx_ring->desc) {
-		e_err("Memory allocation failed for the Rx desc ring\n");
+		e_err(probe, "Memory allocation failed for the Rx "
+		      "descriptor ring\n");
 		vfree(rx_ring->rx_buffer_info);
 		goto alloc_failed;
 	}
@@ -4984,7 +4986,7 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
 		err = ixgbe_setup_rx_resources(adapter, adapter->rx_ring[i]);
 		if (!err)
 			continue;
-		e_err("Allocation for Rx Queue %u failed\n", i);
+		e_err(probe, "Allocation for Rx Queue %u failed\n", i);
 		break;
 	}
 
@@ -5083,7 +5085,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
 	if ((new_mtu < 68) || (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE))
 		return -EINVAL;
 
-	e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu);
+	e_info(probe, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
 	/* must set new MTU before calling down or up */
 	netdev->mtu = new_mtu;
 
@@ -5598,7 +5600,7 @@ static void ixgbe_fdir_reinit_task(struct work_struct *work)
 			set_bit(__IXGBE_FDIR_INIT_DONE,
 			        &(adapter->tx_ring[i]->reinit_state));
 	} else {
-		e_err("failed to finish FDIR re-initialization, "
+		e_err(probe, "failed to finish FDIR re-initialization, "
 		      "ignored adding FDIR ATR filters\n");
 	}
 	/* Done FDIR Re-initialization, enable transmits */
@@ -5670,7 +5672,7 @@ static void ixgbe_watchdog_task(struct work_struct *work)
 				flow_tx = !!(rmcs & IXGBE_RMCS_TFCE_802_3X);
 			}
 
-			e_info("NIC Link is Up %s, Flow Control: %s\n",
+			e_info(drv, "NIC Link is Up %s, Flow Control: %s\n",
 			       (link_speed == IXGBE_LINK_SPEED_10GB_FULL ?
 			       "10 Gbps" :
 			       (link_speed == IXGBE_LINK_SPEED_1GB_FULL ?
@@ -5688,7 +5690,7 @@ static void ixgbe_watchdog_task(struct work_struct *work)
 		adapter->link_up = false;
 		adapter->link_speed = 0;
 		if (netif_carrier_ok(netdev)) {
-			e_info("NIC Link is Down\n");
+			e_info(drv, "NIC Link is Down\n");
 			netif_carrier_off(netdev);
 		}
 	}
@@ -5864,8 +5866,9 @@ static bool ixgbe_tx_csum(struct ixgbe_adapter *adapter,
 				break;
 			default:
 				if (unlikely(net_ratelimit())) {
-					e_warn("partial checksum but "
-					       "proto=%x!\n", skb->protocol);
+					e_warn(probe, "partial checksum "
+					       "but proto=%x!\n",
+					       skb->protocol);
 				}
 				break;
 			}
@@ -6475,7 +6478,7 @@ static void __devinit ixgbe_probe_vf(struct ixgbe_adapter *adapter,
 	adapter->flags |= IXGBE_FLAG_SRIOV_ENABLED;
 	err = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
 	if (err) {
-		e_err("Failed to enable PCI sriov: %d\n", err);
+		e_err(probe, "Failed to enable PCI sriov: %d\n", err);
 		goto err_novfs;
 	}
 	/* If call to enable VFs succeeded then allocate memory
@@ -6499,8 +6502,8 @@ static void __devinit ixgbe_probe_vf(struct ixgbe_adapter *adapter,
 	}
 
 	/* Oh oh */
-	e_err("Unable to allocate memory for VF Data Storage - SRIOV "
-	      "disabled\n");
+	e_err(probe, "Unable to allocate memory for VF Data Storage - "
+	      "SRIOV disabled\n");
 	pci_disable_sriov(adapter->pdev);
 
 err_novfs:
@@ -6670,7 +6673,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
 		u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP);
 		if (esdp & IXGBE_ESDP_SDP1)
-			e_crit("Fan has stopped, replace the adapter\n");
+			e_crit(probe, "Fan has stopped, replace the adapter\n");
 	}
 
 	/* reset_hw fills in the perm_addr as well */
@@ -6701,7 +6704,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 
 	ixgbe_probe_vf(adapter, ii);
 
-	netdev->features =    NETIF_F_SG |
+	netdev->features = NETIF_F_SG |
 	                   NETIF_F_IP_CSUM |
 	                   NETIF_F_HW_VLAN_TX |
 	                   NETIF_F_HW_VLAN_RX |
@@ -6854,7 +6857,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 	}
 #endif
 	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
-		e_info("IOV is enabled with %d VFs\n", adapter->num_vfs);
+		e_info(probe, "IOV is enabled with %d VFs\n", adapter->num_vfs);
 		for (i = 0; i < adapter->num_vfs; i++)
 			ixgbe_vf_configuration(pdev, (i | 0x10000000));
 	}
@@ -7002,7 +7005,7 @@ static pci_ers_result_t ixgbe_io_slot_reset(struct pci_dev *pdev)
 	int err;
 
 	if (pci_enable_device_mem(pdev)) {
-		e_err("Cannot re-enable PCI device after reset.\n");
+		e_err(probe, "Cannot re-enable PCI device after reset.\n");
 		result = PCI_ERS_RESULT_DISCONNECT;
 	} else {
 		pci_set_master(pdev);
@@ -7040,7 +7043,7 @@ static void ixgbe_io_resume(struct pci_dev *pdev)
 
 	if (netif_running(netdev)) {
 		if (ixgbe_up(adapter)) {
-			e_info("ixgbe_up failed after reset\n");
+			e_info(probe, "ixgbe_up failed after reset\n");
 			return;
 		}
 	}
diff --git a/drivers/net/ixgbe/ixgbe_sriov.c b/drivers/net/ixgbe/ixgbe_sriov.c
index 6e6dee0..49661a1 100644
--- a/drivers/net/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ixgbe/ixgbe_sriov.c
@@ -185,7 +185,8 @@ int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask)
 
 	if (enable) {
 		random_ether_addr(vf_mac_addr);
-		e_info("IOV: VF %d is enabled MAC %pM\n", vfn, vf_mac_addr);
+		e_info(probe, "IOV: VF %d is enabled MAC %pM\n",
+		       vfn, vf_mac_addr);
 		/*
 		 * Store away the VF "permananet" MAC address, it will ask
 		 * for it later.
@@ -244,7 +245,7 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 	if (msgbuf[0] == IXGBE_VF_RESET) {
 		unsigned char *vf_mac = adapter->vfinfo[vf].vf_mac_addresses;
 		u8 *addr = (u8 *)(&msgbuf[1]);
-		e_info("VF Reset msg received from vf %d\n", vf);
+		e_info(probe, "VF Reset msg received from vf %d\n", vf);
 		adapter->vfinfo[vf].clear_to_send = false;
 		ixgbe_vf_reset_msg(adapter, vf);
 		adapter->vfinfo[vf].clear_to_send = true;
@@ -297,7 +298,7 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 		retval = ixgbe_set_vf_vlan(adapter, add, vid, vf);
 		break;
 	default:
-		e_err("Unhandled Msg %8.8x\n", msgbuf[0]);
+		e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
 		break;
 	}


^ permalink raw reply related

* Re: [PATCH] igbvf: avoid name clash between PF and VF
From: Stefan Assmann @ 2010-07-02  6:37 UTC (permalink / raw)
  To: Casey Leedom
  Cc: e1000-devel, netdev, gregory.v.rose, jeffrey.t.kirsher,
	Andy Gospodarek
In-Reply-To: <201007011012.48236.leedom@chelsio.com>

On 01.07.2010 19:12, Casey Leedom wrote:
> | From: Stefan Assmann <sassmann@redhat.com>
> | Date: Wednesday, June 30, 2010 11:37 pm
> | 
> | You're correct, the problem shouldn't occur with cxgb4vf and therefore
> | this change shouldn't be necessary. However we might consider a
> | consistent naming scheme for VFs in all drivers. But I don't have a
> | strong opinion about this, either way would be fine by me.
> 
>   Sorry, I hadn't meant to imply any criticism of your naming proposal.  I was 
> just trying to clarify when/where such a scheme might be necessary.

Sure, that's the reason why we're discussing this here.

> 
>   On the naming proposal itself, it strikes me that the most common use of PCI-E 
> SR-IOV Virtual Functions will be to export them to KVM Virtual Machines via PCI 
> "Pass Through."  So there shouldn't be any naming conflict there, right?  Or is 
> it the same scenario you described before: that the VF NIC device might be found 
> before the normal "eth0", etc. withing the Virtual Machine?

I haven't had a scenario were passing multiple VF NICs to the guest was
necessary. In theory it might happen there as well, if you have multiple
NICs (with persistent and random MACs) in the guest. But usually you
just have a single VF inside the guest and then you're fine.

The scenario that I'm targeting is on the host side mostly.

  Stefan
--
Stefan Assmann         | Red Hat GmbH
Software Engineer      | Otto-Hahn-Strasse 20, 85609 Dornach
                       | HR: Amtsgericht Muenchen HRB 153243
                       | GF: Brendan Lane, Charlie Peters,
sassmann at redhat.com |     Michael Cunningham, Charles Cachera

------------------------------------------------------------------------------
This SF.net email is sponsored by Sprint
What will you do first with EVO, the first 4G phone?
Visit sprint.com/first -- http://p.sf.net/sfu/sprint-com-first
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply

* [PATCH] netfilter: fix a dst leak in ipv6 REJECT
From: Eric Dumazet @ 2010-07-02  7:06 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Netfilter Development Mailinglist, netdev, stable

We should release dst if dst->error is set.

Bug introduced in 2.6.14 by commit e104411b82f5c
([XFRM]: Always release dst_entry on error in xfrm_lookup)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv6/netfilter/ip6t_REJECT.c |    6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 47d2277..2933396 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -97,9 +97,11 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	fl.fl_ip_dport = otcph.source;
 	security_skb_classify_flow(oldskb, &fl);
 	dst = ip6_route_output(net, NULL, &fl);
-	if (dst == NULL)
+	if (dst == NULL || dst->error) {
+		dst_release(dst);
 		return;
-	if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0))
+	}
+	if (xfrm_lookup(net, &dst, &fl, NULL, 0))
 		return;
 
 	hh_len = (dst->dev->hard_header_len + 15)&~15;



^ permalink raw reply related

* Re: [PATCH] ethtool: Fix list of hash options in manual page
From: Jeff Garzik @ 2010-07-02  7:21 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Aníbal Monsalve Salazar, netdev
In-Reply-To: <1274836511.19775.3.camel@localhost>

On 05/25/2010 09:15 PM, Ben Hutchings wrote:
> 'p' is not a valid option.
> The 'm' option was missing a preceding 'B' for bold.
>
> Signed-off-by: Ben Hutchings<ben@decadent.org.uk>

applied



^ permalink raw reply

* Re: [PATCH ethtool 2/2] ethtool: Add support for control of RX flow hash indirection
From: Jeff Garzik @ 2010-07-02  7:25 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Jeff Garzik, netdev, linux-net-drivers
In-Reply-To: <1277910792.2082.18.camel@achroite.uk.solarflarecom.com>

On 06/30/2010 11:13 AM, Ben Hutchings wrote:
> Many NICs use an indirection table to map an RX flow hash value to one
> of an arbitrary number of queues (not necessarily a power of 2).  It
> can be useful to remove some queues from this indirection table so
> that they are only used for flows that are specifically filtered
> there.  It may also be useful to weight the mapping to account for
> user processes with the same CPU-affinity as the RX interrupts.
>
> Signed-off-by: Ben Hutchings<bhutchings@solarflare.com>

applied 1-2, after making requested printf correction



^ permalink raw reply

* Re: [PATCH] bridge: add per bridge device controls for invoking iptables
From: Patrick McHardy @ 2010-07-02  7:34 UTC (permalink / raw)
  To: David Miller; +Cc: shemminger, netdev
In-Reply-To: <20100630.142717.123424464.davem@davemloft.net>

David Miller wrote:
> From: Stephen Hemminger <shemminger@vyatta.com>
> Date: Wed, 30 Jun 2010 14:24:40 -0700
>
>   
>> On Mon, 28 Jun 2010 14:47:00 +0200
>> kaber@trash.net wrote:
>>
>>     
>>> From: Patrick McHardy <kaber@trash.net>
>>>
>>> Support more fine grained control of bridge netfilter iptables invocation
>>> by adding seperate brnf_call_*tables parameters for each device using the
>>> sysfs interface. Packets are passed to layer 3 netfilter when either the
>>> global parameter or the per bridge parameter is enabled.
>>>
>>> Signed-off-by: Patrick McHardy <kaber@trash.net>
>>>       
>> Looks like a good idea.
>>
>> Acked-by: Stephen Hemminger <shemminger@vyatta.com>
>>     
>
> Acked-by: David S. Miller <davem@davemloft.net>
>
> Patrick since this is mostly netfilter'ish, please toss it into one
> of your trees.
>   

Will do, thanks.

^ permalink raw reply

* Re: [PATCH] nf_conntrack_reasm: add fast path for in-order fragments
From: Patrick McHardy @ 2010-07-02  7:53 UTC (permalink / raw)
  To: Changli Gao
  Cc: David S. Miller, Alexey Kuznetsov, Pekka Savola (ipv6),
	James Morris, Hideaki YOSHIFUJI, Eric Dumazet, netfilter-devel,
	netdev, Mitchell Erblich
In-Reply-To: <1277956710-22313-1-git-send-email-xiaosuo@gmail.com>

Changli Gao wrote:
> nf_conntrack_reasm: add fast path for in-order fragments
>
> As the fragments are sent in order in most of OSes, such as Windows, Darwin and
> FreeBSD, it is likely the new fragments are at the end of the inet_frag_queue.
> In the fast path, we check if the skb at the end of the inet_frag_queue is the
> prev we expect.
>
>   

I'll apply this when merging with net-next the next time since this depends
on your "fragment: add fast path for in-order fragments" patch.

^ permalink raw reply

* Re: [PATCH] netfilter: fix a dst leak in ipv6 REJECT
From: Patrick McHardy @ 2010-07-02  8:07 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Netfilter Development Mailinglist, netdev, stable
In-Reply-To: <1278054411.2597.13.camel@edumazet-laptop>

Eric Dumazet wrote:
> We should release dst if dst->error is set.
>
> Bug introduced in 2.6.14 by commit e104411b82f5c
> ([XFRM]: Always release dst_entry on error in xfrm_lookup)
>   

Applied, thanks Eric.

^ permalink raw reply

* [PATCH net-next-2.6] ipv4: use skb_dst_copy() in ip_copy_metadata()
From: Eric Dumazet @ 2010-07-02  9:48 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Avoid touching dst refcount in ip_fragment().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv4/ip_output.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7d1f4b4..d647852 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -411,7 +411,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->priority = from->priority;
 	to->protocol = from->protocol;
 	skb_dst_drop(to);
-	skb_dst_set(to, dst_clone(skb_dst(from)));
+	skb_dst_copy(to, from);
 	to->dev = from->dev;
 	to->mark = from->mark;
 



^ permalink raw reply related

* [PATCH 1/9] netfilter: nf_nat: support user-specified SNAT rules in LOCAL_IN
From: kaber @ 2010-07-02  9:52 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1278064342-19059-1-git-send-email-kaber@trash.net>

From: Patrick McHardy <kaber@trash.net>

2.6.34 introduced 'conntrack zones' to deal with cases where packets
from multiple identical networks are handled by conntrack/NAT. Packets
are looped through veth devices, during which they are NATed to private
addresses, after which they can continue normally through the stack
and possibly have NAT rules applied a second time.

This works well, but is needlessly complicated for cases where only
a single SNAT/DNAT mapping needs to be applied to these packets. In that
case, all that needs to be done is to assign each network to a seperate
zone and perform NAT as usual. However this doesn't work for packets
destined for the machine performing NAT itself since its corrently not
possible to configure SNAT mappings for the LOCAL_IN chain.

This patch adds a new INPUT chain to the NAT table and changes the
targets performing SNAT to be usable in that chain.

Example usage with two identical networks (192.168.0.0/24) on eth0/eth1:

iptables -t raw -A PREROUTING -i eth0 -j CT --zone 1
iptables -t raw -A PREROUTING -i eth0 -j MARK --set-mark 1
iptables -t raw -A PREROUTING -i eth1 -j CT --zone 2
iptabels -t raw -A PREROUTING -i eth1 -j MARK --set-mark 2

iptables -t nat -A INPUT       -m mark --mark 1 -j NETMAP --to 10.0.0.0/24
iptables -t nat -A POSTROUTING -m mark --mark 1 -j NETMAP --to 10.0.0.0/24
iptables -t nat -A INPUT       -m mark --mark 2 -j NETMAP --to 10.0.1.0/24
iptables -t nat -A POSTROUTING -m mark --mark 2 -j NETMAP --to 10.0.1.0/24

iptables -t raw -A PREROUTING -d 10.0.0.0/24 -j CT --zone 1
iptables -t raw -A OUTPUT     -d 10.0.0.0/24 -j CT --zone 1
iptables -t raw -A PREROUTING -d 10.0.1.0/24 -j CT --zone 2
iptables -t raw -A OUTPUT     -d 10.0.1.0/24 -j CT --zone 2

iptables -t nat -A PREROUTING -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A OUTPUT     -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A PREROUTING -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A OUTPUT     -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_nat_rule.h    |    2 --
 net/ipv4/netfilter/ipt_NETMAP.c        |    6 ++++--
 net/ipv4/netfilter/nf_nat_rule.c       |   10 ++++++----
 net/ipv4/netfilter/nf_nat_standalone.c |    8 +-------
 4 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h
index e4a18ae..2890bdc 100644
--- a/include/net/netfilter/nf_nat_rule.h
+++ b/include/net/netfilter/nf_nat_rule.h
@@ -12,6 +12,4 @@ extern int nf_nat_rule_find(struct sk_buff *skb,
 			    const struct net_device *out,
 			    struct nf_conn *ct);
 
-extern unsigned int
-alloc_null_binding(struct nf_conn *ct, unsigned int hooknum);
 #endif /* _NF_NAT_RULE_H */
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index f43867d..6cdb298 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_POST_ROUTING ||
-		     par->hooknum == NF_INET_LOCAL_OUT);
+		     par->hooknum == NF_INET_LOCAL_OUT ||
+		     par->hooknum == NF_INET_LOCAL_IN);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
@@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = {
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) |
 			  (1 << NF_INET_POST_ROUTING) |
-			  (1 << NF_INET_LOCAL_OUT),
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
 	.checkentry 	= netmap_tg_check,
 	.me 		= THIS_MODULE
 };
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 98ed782..ebbd319 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,7 +28,8 @@
 
 #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
 			 (1 << NF_INET_POST_ROUTING) | \
-			 (1 << NF_INET_LOCAL_OUT))
+			 (1 << NF_INET_LOCAL_OUT) | \
+			 (1 << NF_INET_LOCAL_IN))
 
 static const struct xt_table nat_table = {
 	.name		= "nat",
@@ -45,7 +46,8 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 	enum ip_conntrack_info ctinfo;
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
-	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
+	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
+		     par->hooknum == NF_INET_LOCAL_IN);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
@@ -99,7 +101,7 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 	return 0;
 }
 
-unsigned int
+static unsigned int
 alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 {
 	/* Force range to this IP; let proto decide mapping for
@@ -141,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
 	.target		= ipt_snat_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
-	.hooks		= 1 << NF_INET_POST_ROUTING,
+	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
 	.checkentry	= ipt_snat_checkentry,
 	.family		= AF_INET,
 };
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 6723c68..95481fe 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -131,13 +131,7 @@ nf_nat_fn(unsigned int hooknum,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			if (hooknum == NF_INET_LOCAL_IN)
-				/* LOCAL_IN hook doesn't have a chain!  */
-				ret = alloc_null_binding(ct, hooknum);
-			else
-				ret = nf_nat_rule_find(skb, hooknum, in, out,
-						       ct);
-
+			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 		} else
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH 2/9] IPVS: one-packet scheduling
From: kaber @ 2010-07-02  9:52 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1278064342-19059-1-git-send-email-kaber@trash.net>

From: Nick Chalk <nick@loadbalancer.org>

Allow one-packet scheduling for UDP connections. When the fwmark-based or
normal virtual service is marked with '-o' or '--ops' options all
connections are created only to schedule one packet. Useful to schedule UDP
packets from same client port to different real servers. Recommended with
RR or WRR schedulers (the connections are not visible with ipvsadm -L).

Signed-off-by: Nick Chalk <nick@loadbalancer.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/ip_vs.h           |    2 ++
 net/netfilter/ipvs/ip_vs_conn.c |   10 +++++++---
 net/netfilter/ipvs/ip_vs_core.c |   20 ++++++++++++++++----
 net/netfilter/ipvs/ip_vs_ctl.c  |   10 ++++++----
 4 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index dfc1703..9708de2 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -19,6 +19,7 @@
  */
 #define IP_VS_SVC_F_PERSISTENT	0x0001		/* persistent port */
 #define IP_VS_SVC_F_HASHED	0x0002		/* hashed entry */
+#define IP_VS_SVC_F_ONEPACKET	0x0004		/* one-packet scheduling */
 
 /*
  *      Destination Server Flags
@@ -85,6 +86,7 @@
 #define IP_VS_CONN_F_SEQ_MASK	0x0600		/* in/out sequence mask */
 #define IP_VS_CONN_F_NO_CPORT	0x0800		/* no client port set yet */
 #define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
+#define IP_VS_CONN_F_ONE_PACKET	0x2000		/* forward only one packet */
 
 #define IP_VS_SCHEDNAME_MAXLEN	16
 #define IP_VS_IFNAME_MAXLEN	16
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d8f7e8e..717e623 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -158,6 +158,9 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	unsigned hash;
 	int ret;
 
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		return 0;
+
 	/* Hash by protocol, client address and port */
 	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
@@ -355,8 +358,9 @@ struct ip_vs_conn *ip_vs_conn_out_get
  */
 void ip_vs_conn_put(struct ip_vs_conn *cp)
 {
-	/* reset it expire in its timeout */
-	mod_timer(&cp->timer, jiffies+cp->timeout);
+	unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
+		0 : cp->timeout;
+	mod_timer(&cp->timer, jiffies+t);
 
 	__ip_vs_conn_put(cp);
 }
@@ -649,7 +653,7 @@ static void ip_vs_conn_expire(unsigned long data)
 	/*
 	 *	unhash it if it is hashed in the conn table
 	 */
-	if (!ip_vs_conn_unhash(cp))
+	if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
 		goto expire_later;
 
 	/*
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1cd6e3f..50907d8 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -194,6 +194,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	struct ip_vs_dest *dest;
 	struct ip_vs_conn *ct;
 	__be16  dport;			/* destination port to forward */
+	__be16  flags;
 	union nf_inet_addr snet;	/* source network of the client,
 					   after masking */
 
@@ -340,6 +341,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		dport = ports[1];
 	}
 
+	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
+		 && iph.protocol == IPPROTO_UDP)?
+		IP_VS_CONN_F_ONE_PACKET : 0;
+
 	/*
 	 *    Create a new connection according to the template
 	 */
@@ -347,7 +352,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 			    &iph.saddr, ports[0],
 			    &iph.daddr, ports[1],
 			    &dest->addr, dport,
-			    0,
+			    flags,
 			    dest);
 	if (cp == NULL) {
 		ip_vs_conn_put(ct);
@@ -377,7 +382,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
-	__be16 _ports[2], *pptr;
+	__be16 _ports[2], *pptr, flags;
 
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -407,6 +412,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		return NULL;
 	}
 
+	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
+		 && iph.protocol == IPPROTO_UDP)?
+		IP_VS_CONN_F_ONE_PACKET : 0;
+
 	/*
 	 *    Create a connection entry.
 	 */
@@ -414,7 +423,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 			    &iph.saddr, pptr[0],
 			    &iph.daddr, pptr[1],
 			    &dest->addr, dest->port ? dest->port : pptr[1],
-			    0,
+			    flags,
 			    dest);
 	if (cp == NULL)
 		return NULL;
@@ -464,6 +473,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
+		__u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
+				iph.protocol == IPPROTO_UDP)?
+				IP_VS_CONN_F_ONE_PACKET : 0;
 		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
 
 		ip_vs_service_put(svc);
@@ -474,7 +486,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 				    &iph.saddr, pptr[0],
 				    &iph.daddr, pptr[1],
 				    &daddr, 0,
-				    IP_VS_CONN_F_BYPASS,
+				    IP_VS_CONN_F_BYPASS | flags,
 				    NULL);
 		if (cp == NULL)
 			return NF_DROP;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 36dc1d8..0f0c079 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1864,14 +1864,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 					   svc->scheduler->name);
 			else
 #endif
-				seq_printf(seq, "%s  %08X:%04X %s ",
+				seq_printf(seq, "%s  %08X:%04X %s %s ",
 					   ip_vs_proto_name(svc->protocol),
 					   ntohl(svc->addr.ip),
 					   ntohs(svc->port),
-					   svc->scheduler->name);
+					   svc->scheduler->name,
+					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
 		} else {
-			seq_printf(seq, "FWM  %08X %s ",
-				   svc->fwmark, svc->scheduler->name);
+			seq_printf(seq, "FWM  %08X %s %s",
+				   svc->fwmark, svc->scheduler->name,
+				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
 		}
 
 		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
-- 
1.7.0.4


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox