Netdev List
 help / color / mirror / Atom feed
* [PATCH 14/26] netfilter: ipset: use nla_parse_nested()
From: kaber @ 2011-02-02 23:11 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296688324-7830-1-git-send-email-kaber@trash.net>

From: Patrick McHardy <kaber@trash.net>

Replace calls of the form:

nla_parse(tb, ATTR_MAX, nla_data(attr), nla_len(attr), policy)

by:

nla_parse_nested(tb, ATTR_MAX, attr, policy)

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_core.c |   42 ++++++++++++++----------------------
 1 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 8a73624..ae0f8b5 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -246,8 +246,7 @@ ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
 
 	if (unlikely(!flag_nested(nla)))
 		return -IPSET_ERR_PROTOCOL;
-	if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, nla_data(nla), nla_len(nla),
-		      ipaddr_policy))
+	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
 		return -IPSET_ERR_PROTOCOL;
 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
 		return -IPSET_ERR_PROTOCOL;
@@ -265,8 +264,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 	if (unlikely(!flag_nested(nla)))
 		return -IPSET_ERR_PROTOCOL;
 
-	if (nla_parse(tb, IPSET_ATTR_IPADDR_MAX, nla_data(nla), nla_len(nla),
-		      ipaddr_policy))
+	if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy))
 		return -IPSET_ERR_PROTOCOL;
 	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
 		return -IPSET_ERR_PROTOCOL;
@@ -666,10 +664,8 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	 * Without holding any locks, create private part.
 	 */
 	if (attr[IPSET_ATTR_DATA] &&
-	    nla_parse(tb, IPSET_ATTR_CREATE_MAX,
-	    	      nla_data(attr[IPSET_ATTR_DATA]),
-	    	      nla_len(attr[IPSET_ATTR_DATA]),
-	    	      set->type->create_policy)) {
+	    nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
+			     set->type->create_policy)) {
 	    	ret = -IPSET_ERR_PROTOCOL;
 	    	goto put_out;
 	}
@@ -1169,10 +1165,9 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 
 	use_lineno = !!attr[IPSET_ATTR_LINENO];
 	if (attr[IPSET_ATTR_DATA]) {
-		if (nla_parse(tb, IPSET_ATTR_ADT_MAX,
-			      nla_data(attr[IPSET_ATTR_DATA]),
-			      nla_len(attr[IPSET_ATTR_DATA]),
-			      set->type->adt_policy))
+		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
+				     attr[IPSET_ATTR_DATA],
+				     set->type->adt_policy))
 			return -IPSET_ERR_PROTOCOL;
 		ret = call_ad(skb, set, tb, IPSET_ADD, flags, use_lineno);
 	} else {
@@ -1182,9 +1177,8 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 			memset(tb, 0, sizeof(tb));
 			if (nla_type(nla) != IPSET_ATTR_DATA ||
 			    !flag_nested(nla) ||
-			    nla_parse(tb, IPSET_ATTR_ADT_MAX,
-			    	      nla_data(nla), nla_len(nla),
-			    	      set->type->adt_policy))
+			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
+					     set->type->adt_policy))
 				return -IPSET_ERR_PROTOCOL;
 			ret = call_ad(skb, set, tb, IPSET_ADD,
 				      flags, use_lineno);
@@ -1224,10 +1218,9 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 
 	use_lineno = !!attr[IPSET_ATTR_LINENO];
 	if (attr[IPSET_ATTR_DATA]) {
-		if (nla_parse(tb, IPSET_ATTR_ADT_MAX,
-			      nla_data(attr[IPSET_ATTR_DATA]),
-			      nla_len(attr[IPSET_ATTR_DATA]),
-			      set->type->adt_policy))
+		if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
+				     attr[IPSET_ATTR_DATA],
+				     set->type->adt_policy))
 			return -IPSET_ERR_PROTOCOL;
 		ret = call_ad(skb, set, tb, IPSET_DEL, flags, use_lineno);
 	} else {
@@ -1237,9 +1230,8 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 			memset(tb, 0, sizeof(*tb));
 			if (nla_type(nla) != IPSET_ATTR_DATA ||
 			    !flag_nested(nla) ||
-			    nla_parse(tb, IPSET_ATTR_ADT_MAX,
-			    	      nla_data(nla), nla_len(nla),
-			    	      set->type->adt_policy))
+			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
+					     set->type->adt_policy))
 				return -IPSET_ERR_PROTOCOL;
 			ret = call_ad(skb, set, tb, IPSET_DEL,
 				      flags, use_lineno);
@@ -1269,10 +1261,8 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
 	if (set == NULL)
 		return -ENOENT;
 
-	if (nla_parse(tb, IPSET_ATTR_ADT_MAX,
-		      nla_data(attr[IPSET_ATTR_DATA]),
-		      nla_len(attr[IPSET_ATTR_DATA]),
-		      set->type->adt_policy))
+	if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
+			     set->type->adt_policy))
 		return -IPSET_ERR_PROTOCOL;
 
 	read_lock_bh(&set->lock);
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 15/26] netfilter: ipset: remove unnecessary includes
From: kaber @ 2011-02-02 23:11 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296688324-7830-1-git-send-email-kaber@trash.net>

From: Patrick McHardy <kaber@trash.net>

None of the set types need uaccess.h since this is handled centrally
in ip_set_core. Most set types additionally don't need bitops.h and
spinlock.h since they use neither. tcp.h is only needed by those
using before(), udp.h is not needed at all.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_bitmap_ip.c      |    1 -
 net/netfilter/ipset/ip_set_bitmap_ipmac.c   |    3 ---
 net/netfilter/ipset/ip_set_bitmap_port.c    |    5 -----
 net/netfilter/ipset/ip_set_hash_ip.c        |    3 ---
 net/netfilter/ipset/ip_set_hash_ipport.c    |    3 ---
 net/netfilter/ipset/ip_set_hash_ipportip.c  |    3 ---
 net/netfilter/ipset/ip_set_hash_ipportnet.c |    3 ---
 net/netfilter/ipset/ip_set_hash_net.c       |    3 ---
 net/netfilter/ipset/ip_set_hash_netport.c   |    3 ---
 9 files changed, 0 insertions(+), 27 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 0474400..bca9699 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -13,7 +13,6 @@
 #include <linux/ip.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
-#include <linux/uaccess.h>
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
 #include <linux/netlink.h>
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index d826332..5e79017 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -15,9 +15,6 @@
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
-#include <linux/uaccess.h>
-#include <linux/bitops.h>
-#include <linux/spinlock.h>
 #include <linux/if_ether.h>
 #include <linux/netlink.h>
 #include <linux/jiffies.h>
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 92074bb..165f09b 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -9,13 +9,8 @@
 
 #include <linux/module.h>
 #include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
-#include <linux/uaccess.h>
-#include <linux/bitops.h>
-#include <linux/spinlock.h>
 #include <linux/netlink.h>
 #include <linux/jiffies.h>
 #include <linux/timer.h>
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index 53964bc..43bcce2 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -12,9 +12,6 @@
 #include <linux/ip.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
-#include <linux/uaccess.h>
-#include <linux/bitops.h>
-#include <linux/spinlock.h>
 #include <linux/random.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index d9b1928..adbe787 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -12,9 +12,6 @@
 #include <linux/ip.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
-#include <linux/uaccess.h>
-#include <linux/bitops.h>
-#include <linux/spinlock.h>
 #include <linux/random.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 80dae9d..22e23ab 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -12,9 +12,6 @@
 #include <linux/ip.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
-#include <linux/uaccess.h>
-#include <linux/bitops.h>
-#include <linux/spinlock.h>
 #include <linux/random.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 8eacd8a..6033e8b 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -12,9 +12,6 @@
 #include <linux/ip.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
-#include <asm/uaccess.h>
-#include <asm/bitops.h>
-#include <linux/spinlock.h>
 #include <linux/random.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index fb0e6a6..c4db202 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -12,9 +12,6 @@
 #include <linux/ip.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
-#include <linux/uaccess.h>
-#include <linux/bitops.h>
-#include <linux/spinlock.h>
 #include <linux/random.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 342250f..34a1656 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -12,9 +12,6 @@
 #include <linux/ip.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
-#include <linux/uaccess.h>
-#include <linux/bitops.h>
-#include <linux/spinlock.h>
 #include <linux/random.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 18/26] IPVS: remove duplicate initialisation or rs_table
From: kaber @ 2011-02-02 23:11 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296688324-7830-1-git-send-email-kaber@trash.net>

From: Simon Horman <horms@verge.net.au>

Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Tested-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_ctl.c |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 98df59a..d7c2fa8 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3515,9 +3515,6 @@ int __net_init __ip_vs_control_init(struct net *net)
 	}
 	spin_lock_init(&ipvs->tot_stats->lock);
 
-	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
-		INIT_LIST_HEAD(&ipvs->rs_table[idx]);
-
 	proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
 	proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
 	proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 17/26] IPVS: use z modifier for sizeof() argument
From: kaber @ 2011-02-02 23:11 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296688324-7830-1-git-send-email-kaber@trash.net>

From: Simon Horman <horms@verge.net.au>

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Tested-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_core.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d889f4f..4d06617 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1887,7 +1887,7 @@ static int __net_init __ip_vs_init(struct net *net)
 	ipvs->gen = atomic_read(&ipvs_netns_cnt);
 	atomic_inc(&ipvs_netns_cnt);
 	net->ipvs = ipvs;
-	printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n",
+	printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
 			 sizeof(struct netns_ipvs), ipvs->gen);
 	return 0;
 }
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 23/26] netfilter: ipset: add missing break statemtns in ip_set_get_ip_port()
From: kaber @ 2011-02-02 23:12 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296688324-7830-1-git-send-email-kaber@trash.net>

From: Patrick McHardy <kaber@trash.net>

Don't fall through in the switch statement, otherwise IPv4 headers
are incorrectly parsed again as IPv6 and the return value will always
be 'false'.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_getport.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 76737bb..4dd2785 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -118,8 +118,10 @@ ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
 	switch (pf) {
 	case AF_INET:
 		ret = ip_set_get_ip4_port(skb, src, port, &proto);
+		break;
 	case AF_INET6:
 		ret = ip_set_get_ip6_port(skb, src, port, &proto);
+		break;
 	default:
 		return false;
 	}
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 22/26] netfilter: ipset: install ipset related header files
From: kaber @ 2011-02-02 23:12 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296688324-7830-1-git-send-email-kaber@trash.net>

From: Patrick McHardy <kaber@trash.net>

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild       |    3 +++
 include/linux/netfilter/ipset/Kbuild |    4 ++++
 include/linux/netfilter/xt_set.h     |    1 +
 3 files changed, 8 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter/ipset/Kbuild

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 89c0d1e..ba19544 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -1,3 +1,5 @@
+header-y += ipset/
+
 header-y += nf_conntrack_common.h
 header-y += nf_conntrack_ftp.h
 header-y += nf_conntrack_sctp.h
@@ -55,6 +57,7 @@ header-y += xt_quota.h
 header-y += xt_rateest.h
 header-y += xt_realm.h
 header-y += xt_recent.h
+header-y += xt_set.h
 header-y += xt_sctp.h
 header-y += xt_socket.h
 header-y += xt_state.h
diff --git a/include/linux/netfilter/ipset/Kbuild b/include/linux/netfilter/ipset/Kbuild
new file mode 100644
index 0000000..601fe71
--- /dev/null
+++ b/include/linux/netfilter/ipset/Kbuild
@@ -0,0 +1,4 @@
+header-y += ip_set.h
+header-y += ip_set_bitmap.h
+header-y += ip_set_hash.h
+header-y += ip_set_list.h
diff --git a/include/linux/netfilter/xt_set.h b/include/linux/netfilter/xt_set.h
index 69b2bd1..081f1de 100644
--- a/include/linux/netfilter/xt_set.h
+++ b/include/linux/netfilter/xt_set.h
@@ -1,6 +1,7 @@
 #ifndef _XT_SET_H
 #define _XT_SET_H
 
+#include <linux/types.h>
 #include <linux/netfilter/ipset/ip_set.h>
 
 /* Revision 0 interface: backward compatible with netfilter/iptables */
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 26/26] netfilter: xtables: add device group match
From: kaber @ 2011-02-02 23:12 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296688324-7830-1-git-send-email-kaber@trash.net>

From: Patrick McHardy <kaber@trash.net>

Add a new 'devgroup' match to match on the device group of the
incoming and outgoing network device of a packet.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild        |    1 +
 include/linux/netfilter/xt_devgroup.h |   21 ++++++++
 net/netfilter/Kconfig                 |    9 ++++
 net/netfilter/Makefile                |    1 +
 net/netfilter/xt_devgroup.c           |   82 +++++++++++++++++++++++++++++++++
 5 files changed, 114 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter/xt_devgroup.h
 create mode 100644 net/netfilter/xt_devgroup.c

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index ba19544..15e83bf 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -37,6 +37,7 @@ header-y += xt_connmark.h
 header-y += xt_conntrack.h
 header-y += xt_cpu.h
 header-y += xt_dccp.h
+header-y += xt_devgroup.h
 header-y += xt_dscp.h
 header-y += xt_esp.h
 header-y += xt_hashlimit.h
diff --git a/include/linux/netfilter/xt_devgroup.h b/include/linux/netfilter/xt_devgroup.h
new file mode 100644
index 0000000..1babde0
--- /dev/null
+++ b/include/linux/netfilter/xt_devgroup.h
@@ -0,0 +1,21 @@
+#ifndef _XT_DEVGROUP_H
+#define _XT_DEVGROUP_H
+
+#include <linux/types.h>
+
+enum xt_devgroup_flags {
+	XT_DEVGROUP_MATCH_SRC	= 0x1,
+	XT_DEVGROUP_INVERT_SRC	= 0x2,
+	XT_DEVGROUP_MATCH_DST	= 0x4,
+	XT_DEVGROUP_INVERT_DST	= 0x8,
+};
+
+struct xt_devgroup_info {
+	__u32	flags;
+	__u32	src_group;
+	__u32	src_mask;
+	__u32	dst_group;
+	__u32	dst_mask;
+};
+
+#endif /* _XT_DEVGROUP_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 06fa9e4..82a6e0d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -738,6 +738,15 @@ config NETFILTER_XT_MATCH_DCCP
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.txt>.  If unsure, say `N'.
 
+config NETFILTER_XT_MATCH_DEVGROUP
+	tristate '"devgroup" match support'
+	depends on NETFILTER_ADVANCED
+	help
+	  This options adds a `devgroup' match, which allows to match on the
+	  device group a network device is assigned to.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_DSCP
 	tristate '"dscp" and "tos" match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1148643..d57a890 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
diff --git a/net/netfilter/xt_devgroup.c b/net/netfilter/xt_devgroup.c
new file mode 100644
index 0000000..d9202cd
--- /dev/null
+++ b/net/netfilter/xt_devgroup.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+#include <linux/netfilter/xt_devgroup.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: Device group match");
+MODULE_ALIAS("ipt_devgroup");
+MODULE_ALIAS("ip6t_devgroup");
+
+static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_devgroup_info *info = par->matchinfo;
+
+	if (info->flags & XT_DEVGROUP_MATCH_SRC &&
+	    (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^
+	     ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0)))
+		return false;
+
+	if (info->flags & XT_DEVGROUP_MATCH_DST &&
+	    (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^
+	     ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0)))
+		return false;
+
+	return true;
+}
+
+static int devgroup_mt_checkentry(const struct xt_mtchk_param *par)
+{
+	const struct xt_devgroup_info *info = par->matchinfo;
+
+	if (info->flags & ~(XT_DEVGROUP_MATCH_SRC | XT_DEVGROUP_INVERT_SRC |
+			    XT_DEVGROUP_MATCH_DST | XT_DEVGROUP_INVERT_DST))
+		return -EINVAL;
+
+	if (info->flags & XT_DEVGROUP_MATCH_SRC &&
+	    par->hook_mask & ~((1 << NF_INET_PRE_ROUTING) |
+			       (1 << NF_INET_LOCAL_IN) |
+			       (1 << NF_INET_FORWARD)))
+		return -EINVAL;
+
+	if (info->flags & XT_DEVGROUP_MATCH_DST &&
+	    par->hook_mask & ~((1 << NF_INET_FORWARD) |
+			       (1 << NF_INET_LOCAL_OUT) |
+			       (1 << NF_INET_POST_ROUTING)))
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct xt_match devgroup_mt_reg __read_mostly = {
+	.name		= "devgroup",
+	.match		= devgroup_mt,
+	.checkentry	= devgroup_mt_checkentry,
+	.matchsize	= sizeof(struct xt_devgroup_info),
+	.family		= NFPROTO_UNSPEC,
+	.me		= THIS_MODULE
+};
+
+static int __init devgroup_mt_init(void)
+{
+	return xt_register_match(&devgroup_mt_reg);
+}
+
+static void __exit devgroup_mt_exit(void)
+{
+	xt_unregister_match(&devgroup_mt_reg);
+}
+
+module_init(devgroup_mt_init);
+module_exit(devgroup_mt_exit);
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 25/26] netfilter: ipset: send error message manually
From: kaber @ 2011-02-02 23:12 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296688324-7830-1-git-send-email-kaber@trash.net>

From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>

When a message carries multiple commands and one of them triggers
an error, we have to report to the userspace which one was that.
The line number of the command plays this role and there's an attribute
reserved in the header part of the message to be filled out with the error
line number. In order not to modify the original message received from
the userspace, we construct a new, complete netlink error message and
modifies the attribute there, then send it.
Netlink is notified not to send its ACK/error message.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipset/ip_set_core.c |   33 ++++++++++++++++++++++++++-------
 1 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index ae0f8b5..8b1a54c 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1098,7 +1098,7 @@ static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = {
 };
 
 static int
-call_ad(struct sk_buff *skb, struct ip_set *set,
+call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
 	struct nlattr *tb[], enum ipset_adt adt,
 	u32 flags, bool use_lineno)
 {
@@ -1118,12 +1118,25 @@ call_ad(struct sk_buff *skb, struct ip_set *set,
 		return 0;
 	if (lineno && use_lineno) {
 		/* Error in restore/batch mode: send back lineno */
-		struct nlmsghdr *nlh = nlmsg_hdr(skb);
+		struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb);
+		struct sk_buff *skb2;
+		struct nlmsgerr *errmsg;
+		size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
 		int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
 		struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
-		struct nlattr *cmdattr = (void *)nlh + min_len;
+		struct nlattr *cmdattr;
 		u32 *errline;
 
+		skb2 = nlmsg_new(payload, GFP_KERNEL);
+		if (skb2 == NULL)
+			return -ENOMEM;
+		rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid,
+				  nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
+		errmsg = nlmsg_data(rep);
+		errmsg->error = ret;
+		memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
+		cmdattr = (void *)&errmsg->msg + min_len;
+
 		nla_parse(cda, IPSET_ATTR_CMD_MAX,
 			  cmdattr, nlh->nlmsg_len - min_len,
 			  ip_set_adt_policy);
@@ -1131,6 +1144,10 @@ call_ad(struct sk_buff *skb, struct ip_set *set,
 		errline = nla_data(cda[IPSET_ATTR_LINENO]);
 
 		*errline = lineno;
+
+		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+		/* Signal netlink not to send its ACK/errmsg.  */
+		return -EINTR;
 	}
 
 	return ret;
@@ -1169,7 +1186,8 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 				     attr[IPSET_ATTR_DATA],
 				     set->type->adt_policy))
 			return -IPSET_ERR_PROTOCOL;
-		ret = call_ad(skb, set, tb, IPSET_ADD, flags, use_lineno);
+		ret = call_ad(ctnl, skb, set, tb, IPSET_ADD, flags,
+			      use_lineno);
 	} else {
 		int nla_rem;
 
@@ -1180,7 +1198,7 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
 					     set->type->adt_policy))
 				return -IPSET_ERR_PROTOCOL;
-			ret = call_ad(skb, set, tb, IPSET_ADD,
+			ret = call_ad(ctnl, skb, set, tb, IPSET_ADD,
 				      flags, use_lineno);
 			if (ret < 0)
 				return ret;
@@ -1222,7 +1240,8 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 				     attr[IPSET_ATTR_DATA],
 				     set->type->adt_policy))
 			return -IPSET_ERR_PROTOCOL;
-		ret = call_ad(skb, set, tb, IPSET_DEL, flags, use_lineno);
+		ret = call_ad(ctnl, skb, set, tb, IPSET_DEL, flags,
+			      use_lineno);
 	} else {
 		int nla_rem;
 
@@ -1233,7 +1252,7 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 			    nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
 					     set->type->adt_policy))
 				return -IPSET_ERR_PROTOCOL;
-			ret = call_ad(skb, set, tb, IPSET_DEL,
+			ret = call_ad(ctnl, skb, set, tb, IPSET_DEL,
 				      flags, use_lineno);
 			if (ret < 0)
 				return ret;
-- 
1.7.2.3


^ permalink raw reply related

* Re: [PATCH 00/26] netfilter: netfilter update
From: David Miller @ 2011-02-02 23:24 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel, netdev
In-Reply-To: <1296688324-7830-1-git-send-email-kaber@trash.net>

From: kaber@trash.net
Date: Thu,  3 Feb 2011 00:11:38 +0100

> following is another netfilter update for net-next-2.6, containing:
> 
> - IP set from Jozsef, with some bugfixes from Jozsef and me on top

W00t!

> - IPVS fixes from Simon
> 
> - a new xtables 'devgroup' match to match on the network device group,
>   from myself
> 
> Please pull from:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6.git master

Pulled, thanks a lot!

^ permalink raw reply

* RE: [PATCH] bna: use device model DMA API
From: Debashis Dutt @ 2011-02-02 23:04 UTC (permalink / raw)
  To: David Miller, ivecera@redhat.com; +Cc: netdev@vger.kernel.org, Rasesh Mody
In-Reply-To: <20110202.150308.246520172.davem@davemloft.net>



-----Original Message-----
From: David Miller [mailto:davem@davemloft.net] 
Sent: Wednesday, February 02, 2011 3:03 PM
To: ivecera@redhat.com
Cc: netdev@vger.kernel.org; Rasesh Mody; Debashis Dutt
Subject: Re: [PATCH] bna: use device model DMA API

From: Ivan Vecera <ivecera@redhat.com>
Date: Wed,  2 Feb 2011 15:37:02 +0100

> Use DMA API as PCI equivalents will be deprecated.
> 
> Signed-off-by: Ivan Vecera <ivecera@redhat.com>

Looks good to me, can we get some review from the BNA maintainers
please?

Ivan, David, 

I just finished reviewing the patches. Looks good to me too.

Thanks
--Debashis

^ permalink raw reply

* Re: [PATCH] bna: use device model DMA API
From: David Miller @ 2011-02-02 23:41 UTC (permalink / raw)
  To: debdut; +Cc: ivecera, netdev, rmody, ddutt
In-Reply-To: <AANLkTinG_DcSPc7ReyxXE6T5fhoQkKmmtMLv4j9BAxMx@mail.gmail.com>

From: Debashis Dutt <debdut@gmail.com>
Date: Wed, 2 Feb 2011 15:12:00 -0800

> On Wed, Feb 2, 2011 at 3:03 PM, David Miller <davem@davemloft.net> wrote:
>>
>> From: Ivan Vecera <ivecera@redhat.com>
>> Date: Wed,  2 Feb 2011 15:37:02 +0100
>>
>> > Use DMA API as PCI equivalents will be deprecated.
>> >
>> > Signed-off-by: Ivan Vecera <ivecera@redhat.com>
>>
>> Looks good to me, can we get some review from the BNA maintainers
>> please?
>> --
> 
> For some reason, ACK from my Brocade mail Id does not show up in the
> netdev mailing list.
> 
> The patch looks good to me too.

Thanks for reviewing, applied.

^ permalink raw reply

* [PATCH 3/4] netfilter: arpt_mangle: fix return values of checkentry
From: kaber @ 2011-02-02 23:45 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296690330-9887-1-git-send-email-kaber@trash.net>

From: Pablo Neira Ayuso <pablo@netfilter.org>

In 135367b "netfilter: xtables: change xt_target.checkentry return type",
the type returned by checkentry was changed from boolean to int, but the
return values where not adjusted.

arptables: Input/output error

This broke arptables with the mangle target since it returns true
under success, which is interpreted by xtables as >0, thus
returning EIO.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arpt_mangle.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index b8ddcc4..a5e52a9 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -60,12 +60,12 @@ static int checkentry(const struct xt_tgchk_param *par)
 
 	if (mangle->flags & ~ARPT_MANGLE_MASK ||
 	    !(mangle->flags & ARPT_MANGLE_MASK))
-		return false;
+		return -EINVAL;
 
 	if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
 	   mangle->target != XT_CONTINUE)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target arpt_mangle_reg __read_mostly = {
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 4/4] netfilter: ecache: always set events bits, filter them later
From: kaber @ 2011-02-02 23:45 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296690330-9887-1-git-send-email-kaber@trash.net>

From: Pablo Neira Ayuso <pablo@netfilter.org>

For the following rule:

iptables -I PREROUTING -t raw -j CT --ctevents assured

The event delivered looks like the following:

 [UPDATE] tcp      6 src=192.168.0.2 dst=192.168.1.2 sport=37041 dport=80 src=192.168.1.2 dst=192.168.1.100 sport=80 dport=37041 [ASSURED]

Note that the TCP protocol state is not included. For that reason
the CT event filtering is not very useful for conntrackd.

To resolve this issue, instead of conditionally setting the CT events
bits based on the ctmask, we always set them and perform the filtering
in the late stage, just before the delivery.

Thus, the event delivered looks like the following:

 [UPDATE] tcp      6 432000 ESTABLISHED src=192.168.0.2 dst=192.168.1.2 sport=37041 dport=80 src=192.168.1.2 dst=192.168.1.100 sport=80 dport=37041 [ASSURED]

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_ecache.h |    3 ---
 net/netfilter/nf_conntrack_ecache.c         |    3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 96ba5f7..349cefe 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -77,9 +77,6 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 	if (e == NULL)
 		return;
 
-	if (!(e->ctmask & (1 << event)))
-		return;
-
 	set_bit(event, &e->cache);
 }
 
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 5702de3..63a1b91 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -63,6 +63,9 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
 		 * this does not harm and it happens very rarely. */
 		unsigned long missed = e->missed;
 
+		if (!((events | missed) & e->ctmask))
+			goto out_unlock;
+
 		ret = notify->fcn(events | missed, &item);
 		if (unlikely(ret < 0 || missed)) {
 			spin_lock_bh(&ct->lock);
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 0/4] netfilter: netfilter fixes
From: kaber @ 2011-02-02 23:45 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev

Hi Dave,

following are a couple of netfilter fixes for 2.6.38, containing:

- a missing conntrack reference count increment during ctnetlink table
  dumping, leading to a crash, from Pablo

- a fix for mismatches in the IPv6 iprange match, from Thomas Jacob

- a fix to make arp_tables mangling work again after some cleanups,
  from Pablo

- a fix for missing information in ctnetlink when events are filtered
  using the CT target, from Pablo

Please pull from:

git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-2.6.git master

Thanks!

^ permalink raw reply

* [PATCH 2/4] netfilter: xt_iprange: Incorrect xt_iprange boundary check for IPv6
From: kaber @ 2011-02-02 23:45 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296690330-9887-1-git-send-email-kaber@trash.net>

From: Thomas Jacob <jacob@internet24.de>

iprange_ipv6_sub was substracting 2 unsigned ints and then casting
the result to int to find out whether they are lt, eq or gt each
other, this doesn't work if the full 32 bits of each part
can be used in IPv6 addresses. Patch should remedy that without
significant performance penalties. Also number of ntohl
calls can be reduced this way (Jozsef Kadlecsik).

Signed-off-by: Thomas Jacob <jacob@internet24.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_iprange.c |   16 +++++++---------
 1 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 88f7c35..73c33a4 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -53,15 +53,13 @@ iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par)
 }
 
 static inline int
-iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
+iprange_ipv6_lt(const struct in6_addr *a, const struct in6_addr *b)
 {
 	unsigned int i;
-	int r;
 
 	for (i = 0; i < 4; ++i) {
-		r = ntohl(a->s6_addr32[i]) - ntohl(b->s6_addr32[i]);
-		if (r != 0)
-			return r;
+		if (a->s6_addr32[i] != b->s6_addr32[i])
+			return ntohl(a->s6_addr32[i]) < ntohl(b->s6_addr32[i]);
 	}
 
 	return 0;
@@ -75,15 +73,15 @@ iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	bool m;
 
 	if (info->flags & IPRANGE_SRC) {
-		m  = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0;
-		m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0;
+		m  = iprange_ipv6_lt(&iph->saddr, &info->src_min.in6);
+		m |= iprange_ipv6_lt(&info->src_max.in6, &iph->saddr);
 		m ^= !!(info->flags & IPRANGE_SRC_INV);
 		if (m)
 			return false;
 	}
 	if (info->flags & IPRANGE_DST) {
-		m  = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0;
-		m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0;
+		m  = iprange_ipv6_lt(&iph->daddr, &info->dst_min.in6);
+		m |= iprange_ipv6_lt(&info->dst_max.in6, &iph->daddr);
 		m ^= !!(info->flags & IPRANGE_DST_INV);
 		if (m)
 			return false;
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 1/4] netfilter: ctnetlink: fix missing refcount increment during dumps
From: kaber @ 2011-02-02 23:45 UTC (permalink / raw)
  To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1296690330-9887-1-git-send-email-kaber@trash.net>

From: Pablo Neira Ayuso <pablo@netfilter.org>

In 13ee6ac netfilter: fix race in conntrack between dump_table and
destroy, we recovered spinlocks to protect the dump of the conntrack
table according to reports from Stephen and acknowledgments on the
issue from Eric.

In that patch, the refcount bump that allows to keep a reference
to the current ct object was removed. However, we still decrement
the refcount for that object in the output path of
ctnetlink_dump_table():

        if (last)
                nf_ct_put(last)

Cc: Stephen Hemminger <stephen.hemminger@vyatta.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 93297aa..eead9db 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -667,6 +667,7 @@ restart:
 			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
 						cb->nlh->nlmsg_seq,
 						IPCTNL_MSG_CT_NEW, ct) < 0) {
+				nf_conntrack_get(&ct->ct_general);
 				cb->args[1] = (unsigned long)ct;
 				goto out;
 			}
-- 
1.7.2.3


^ permalink raw reply related

* Re: [PATCH 0/4] netfilter: netfilter fixes
From: David Miller @ 2011-02-02 23:52 UTC (permalink / raw)
  To: kaber; +Cc: netfilter-devel, netdev
In-Reply-To: <1296690330-9887-1-git-send-email-kaber@trash.net>

From: kaber@trash.net
Date: Thu,  3 Feb 2011 00:45:26 +0100

> following are a couple of netfilter fixes for 2.6.38, containing:
> 
> - a missing conntrack reference count increment during ctnetlink table
>   dumping, leading to a crash, from Pablo
> 
> - a fix for mismatches in the IPv6 iprange match, from Thomas Jacob
> 
> - a fix to make arp_tables mangling work again after some cleanups,
>   from Pablo
> 
> - a fix for missing information in ctnetlink when events are filtered
>   using the CT target, from Pablo
> 
> Please pull from:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-2.6.git master

Pulled, thanks Patrick.

^ permalink raw reply

* [iproute2 PATCH 1/3]: xfrm security context support
From: Joy Latten @ 2011-02-02 23:31 UTC (permalink / raw)
  To: netdev; +Cc: shemminger

In the Linux kernel, ipsec policy and SAs can include a
security context to support MAC networking. This feature
is often referred to as "labeled ipsec".

This patchset adds security context support into ip xfrm 
such that a security context can be included when 
add/delete/display SAs and policies with the ip command.
The user provides the security context when adding 
SAs and policies. If a policy or SA contains a security
context, the changes allow the security context to be displayed.

For example, 
ip xfrm state
src 10.1.1.6 dst 10.1.1.2
	proto esp spi 0x00000301 reqid 0 mode transport
	replay-window 0 
	auth hmac(digest_null) 0x3078
	enc cbc(des3_ede) 0x6970763672656164796c6f676f33646573636263696e3031
	security context root:system_r:unconfined_t:s0 


Please  let me know if all is ok with the patchset.
Thanks!!

regards,
Joy


Signed-off-by:  Joy Latten <latten@austin.ibm.com>
---
 ip/ipxfrm.c |   28 ++++++++++++++++++++++++++++
 ip/xfrm.h   |    3 ++-
 2 files changed, 30 insertions(+), 1 deletions(-)

diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c
index 9753822..cc4dc80 100644
--- a/ip/ipxfrm.c
+++ b/ip/ipxfrm.c
@@ -850,6 +850,20 @@ void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo,
 		xfrm_lifetime_print(&xsinfo->lft, &xsinfo->curlft, fp, buf);
 		xfrm_stats_print(&xsinfo->stats, fp, buf);
 	}
+
+	if (tb[XFRMA_SEC_CTX]) {
+		struct xfrm_user_sec_ctx *sctx;
+
+		fprintf(fp, "\tsecurity context ");
+
+		if (RTA_PAYLOAD(tb[XFRMA_SEC_CTX]) < sizeof(*sctx))
+			fprintf(fp, "(ERROR truncated)");
+
+		sctx = (struct xfrm_user_sec_ctx *)RTA_DATA(tb[XFRMA_SEC_CTX]);
+
+		fprintf(fp, "%s %s", (char *)(sctx + 1), _SL_);
+	}
+
 }
 
 void xfrm_policy_info_print(struct xfrm_userpolicy_info *xpinfo,
@@ -862,6 +876,20 @@ void xfrm_policy_info_print(struct xfrm_userpolicy_info *xpinfo,
 
 	xfrm_selector_print(&xpinfo->sel, preferred_family, fp, title);
 
+	if (tb[XFRMA_SEC_CTX]) {
+		struct xfrm_user_sec_ctx *sctx;
+
+		fprintf(fp, "\tsecurity context ");
+
+		if (RTA_PAYLOAD(tb[XFRMA_SEC_CTX]) < sizeof(*sctx))
+			fprintf(fp, "(ERROR truncated)");
+
+		sctx = (struct xfrm_user_sec_ctx *)RTA_DATA(tb[XFRMA_SEC_CTX]);
+
+		fprintf(fp, "%s ", (char *)(sctx + 1));
+		fprintf(fp, "%s", _SL_);
+	}
+
 	if (prefix)
 		STRBUF_CAT(buf, prefix);
 	STRBUF_CAT(buf, "\t");
diff --git a/ip/xfrm.h b/ip/xfrm.h
index d3ca5c5..784a201 100644
--- a/ip/xfrm.h
+++ b/ip/xfrm.h
@@ -154,5 +154,6 @@ int xfrm_reqid_parse(__u32 *reqid, int *argcp, char ***argvp);
 int xfrm_selector_parse(struct xfrm_selector *sel, int *argcp, char ***argvp);
 int xfrm_lifetime_cfg_parse(struct xfrm_lifetime_cfg *lft,
 			    int *argcp, char ***argvp);
-
+int xfrm_sctx_parse(char *ctxstr, char *context,
+		    struct xfrm_user_sec_ctx *sctx);
 #endif
-- 
1.5.5.6


^ permalink raw reply related

* [iproute2 PATCH 2/3]: xfrm security context support
From: Joy Latten @ 2011-02-02 23:32 UTC (permalink / raw)
  To: netdev; +Cc: shemminger

Adds security context support to ip xfrm policy.

Signed-off-by: Joy Latten <latten@austin.ibm.com>
---
 ip/xfrm_policy.c |   62 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/ip/xfrm_policy.c b/ip/xfrm_policy.c
index dcb3da4..9ef5c09 100644
--- a/ip/xfrm_policy.c
+++ b/ip/xfrm_policy.c
@@ -48,14 +48,15 @@
 #define NLMSG_BUF_SIZE 4096
 #define RTA_BUF_SIZE 2048
 #define XFRM_TMPLS_BUF_SIZE 1024
+#define CTX_BUF_SIZE 256
 
 static void usage(void) __attribute__((noreturn));
 
 static void usage(void)
 {
-	fprintf(stderr, "Usage: ip xfrm policy { add | update } dir DIR SELECTOR [ index INDEX ] [ ptype PTYPE ]\n");
+	fprintf(stderr, "Usage: ip xfrm policy { add | update } dir DIR SELECTOR [ ctx SEC_CTX ][ index INDEX ] [ ptype PTYPE ]\n");
 	fprintf(stderr, "        [ action ACTION ] [ priority PRIORITY ] [ flag FLAG-LIST ] [ LIMIT-LIST ] [ TMPL-LIST ] [mark MARK [mask MASK]]\n");
-	fprintf(stderr, "Usage: ip xfrm policy { delete | get } dir DIR [ SELECTOR | index INDEX ] [ ptype PTYPE ] [mark MARK [mask MASK]]\n");
+	fprintf(stderr, "Usage: ip xfrm policy { delete | get } dir DIR [ SELECTOR | index INDEX ] [ ctx SEC_CTX ][ ptype PTYPE ] [mark MARK [mask MASK]]\n");
 	fprintf(stderr, "Usage: ip xfrm policy { deleteall | list } [ dir DIR ] [ SELECTOR ]\n");
 	fprintf(stderr, "        [ index INDEX ] [ action ACTION ] [ priority PRIORITY ]  [ flag FLAG-LIST ]\n");
 	fprintf(stderr, "Usage: ip xfrm policy flush [ ptype PTYPE ]\n");
@@ -222,6 +223,23 @@ static int xfrm_tmpl_parse(struct xfrm_user_tmpl *tmpl,
 	return 0;
 }
 
+int xfrm_sctx_parse(char *ctxstr, char *s,
+			   struct xfrm_user_sec_ctx *sctx)
+{
+	int slen;
+
+	slen = strlen(s) + 1;
+
+	sctx->exttype = XFRMA_SEC_CTX;
+	sctx->ctx_doi = 1;
+	sctx->ctx_alg = 1;
+	sctx->ctx_len = slen;
+	sctx->len = sizeof(struct xfrm_user_sec_ctx) + slen;
+	memcpy(ctxstr, s, slen);
+
+	return 0;
+}
+
 static int xfrm_policy_modify(int cmd, unsigned flags, int argc, char **argv)
 {
 	struct rtnl_handle rth;
@@ -233,14 +251,20 @@ static int xfrm_policy_modify(int cmd, unsigned flags, int argc, char **argv)
 	char *dirp = NULL;
 	char *selp = NULL;
 	char *ptypep = NULL;
+	char *sctxp = NULL;
 	struct xfrm_userpolicy_type upt;
 	char tmpls_buf[XFRM_TMPLS_BUF_SIZE];
 	int tmpls_len = 0;
 	struct xfrm_mark mark = {0, 0};
+	struct {
+		struct xfrm_user_sec_ctx sctx;
+		char	str[CTX_BUF_SIZE];
+	} ctx;
 
 	memset(&req, 0, sizeof(req));
 	memset(&upt, 0, sizeof(upt));
 	memset(&tmpls_buf, 0, sizeof(tmpls_buf));
+	memset(&ctx, 0, sizeof(ctx));
 
 	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.xpinfo));
 	req.n.nlmsg_flags = NLM_F_REQUEST|flags;
@@ -260,6 +284,15 @@ static int xfrm_policy_modify(int cmd, unsigned flags, int argc, char **argv)
 
 			NEXT_ARG();
 			xfrm_policy_dir_parse(&req.xpinfo.dir, &argc, &argv);
+		} else if (strcmp(*argv, "ctx") == 0) {
+			char *context;
+
+			if (sctxp)
+				duparg("ctx", *argv);
+			sctxp = *argv;
+			NEXT_ARG();
+			context = *argv;
+			xfrm_sctx_parse((char *)&ctx.str, context, &ctx.sctx);
 		} else if (strcmp(*argv, "mark") == 0) {
 			xfrm_parse_mark(&mark, &argc, &argv);
 		} else if (strcmp(*argv, "index") == 0) {
@@ -347,6 +380,10 @@ static int xfrm_policy_modify(int cmd, unsigned flags, int argc, char **argv)
 		}
 	}
 
+	if (sctxp) {
+		addattr_l(&req.n, sizeof(req), XFRMA_SEC_CTX,
+			  (void *)&ctx, ctx.sctx.len);
+	}
 
 	if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0)
 		exit(1);
@@ -528,11 +565,18 @@ static int xfrm_policy_get_or_delete(int argc, char **argv, int delete,
 	char *selp = NULL;
 	char *indexp = NULL;
 	char *ptypep = NULL;
+	char *sctxp = NULL;
 	struct xfrm_userpolicy_type upt;
 	struct xfrm_mark mark = {0, 0};
+	struct {
+		struct xfrm_user_sec_ctx sctx;
+		char    str[CTX_BUF_SIZE];
+	} ctx;
+
 
 	memset(&req, 0, sizeof(req));
 	memset(&upt, 0, sizeof(upt));
+	memset(&ctx, 0, sizeof(ctx));
 
 	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.xpid));
 	req.n.nlmsg_flags = NLM_F_REQUEST;
@@ -547,6 +591,15 @@ static int xfrm_policy_get_or_delete(int argc, char **argv, int delete,
 			NEXT_ARG();
 			xfrm_policy_dir_parse(&req.xpid.dir, &argc, &argv);
 
+		} else if (strcmp(*argv, "ctx") == 0) {
+			char *context;
+
+			if (sctxp)
+				duparg("ctx", *argv);
+			sctxp = *argv;
+			NEXT_ARG();
+			context = *argv;
+			xfrm_sctx_parse((char *)&ctx.str, context, &ctx.sctx);
 		} else if (strcmp(*argv, "mark") == 0) {
 			xfrm_parse_mark(&mark, &argc, &argv);
 		} else if (strcmp(*argv, "index") == 0) {
@@ -610,6 +663,11 @@ static int xfrm_policy_get_or_delete(int argc, char **argv, int delete,
 		}
 	}
 
+	if (sctxp) {
+		addattr_l(&req.n, sizeof(req), XFRMA_SEC_CTX,
+			  (void *)&ctx, ctx.sctx.len);
+	}
+
 	if (rtnl_talk(&rth, &req.n, 0, 0, res_nlbuf, NULL, NULL) < 0)
 		exit(2);
 
-- 
1.5.5.6


^ permalink raw reply related

* [iproute2 PATCH 3/3]: xfrm security context support
From: Joy Latten @ 2011-02-02 23:32 UTC (permalink / raw)
  To: netdev; +Cc: shemminger

Adds security context support to ip xfrm state.

Signed-off-by: Joy Latten <latten@austin.ibm.com>
---
 ip/xfrm_state.c |   22 +++++++++++++++++++++-
 1 files changed, 21 insertions(+), 1 deletions(-)

diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index 38d4039..165888d 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -50,12 +50,13 @@
 #define NLMSG_BUF_SIZE 4096
 #define RTA_BUF_SIZE 2048
 #define XFRM_ALGO_KEY_BUF_SIZE 512
+#define CTX_BUF_SIZE 256
 
 static void usage(void) __attribute__((noreturn));
 
 static void usage(void)
 {
-	fprintf(stderr, "Usage: ip xfrm state { add | update } ID [ XFRM_OPT ] [ mode MODE ]\n");
+	fprintf(stderr, "Usage: ip xfrm state { add | update } ID [ XFRM_OPT ] [ ctx SEC_CTX ] [ mode MODE ]\n");
 	fprintf(stderr, "        [ reqid REQID ] [ seq SEQ ] [ replay-window SIZE ] [ flag FLAG-LIST ]\n");
 	fprintf(stderr, "        [ encap ENCAP ] [ sel SELECTOR ] [ replay-seq SEQ ]\n");
 	fprintf(stderr, "        [ replay-oseq SEQ ] [ LIMIT-LIST ]\n");
@@ -246,10 +247,16 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 	char *aalgop = NULL;
 	char *calgop = NULL;
 	char *coap = NULL;
+	char *sctxp = NULL;
 	struct xfrm_mark mark = {0, 0};
+	struct {
+		struct xfrm_user_sec_ctx sctx;
+		char    str[CTX_BUF_SIZE];
+	} ctx;
 
 	memset(&req, 0, sizeof(req));
 	memset(&replay, 0, sizeof(replay));
+	memset(&ctx, 0, sizeof(ctx));
 
 	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.xsinfo));
 	req.n.nlmsg_flags = NLM_F_REQUEST|flags;
@@ -333,6 +340,19 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 
 			addattr_l(&req.n, sizeof(req.buf), XFRMA_COADDR,
 				  (void *)&xcoa, sizeof(xcoa));
+		} else if (strcmp(*argv, "ctx") == 0) {
+			char *context;
+
+			if (sctxp)
+				duparg("ctx", *argv);
+			sctxp = *argv;
+
+			NEXT_ARG();
+			context = *argv;
+
+			xfrm_sctx_parse((char *)&ctx.str, context, &ctx.sctx);
+			addattr_l(&req.n, sizeof(req.buf), XFRMA_SEC_CTX,
+				  (void *)&ctx, ctx.sctx.len);
 		} else {
 			/* try to assume ALGO */
 			int type = xfrm_algotype_getbyname(*argv);
-- 
1.5.5.6


^ permalink raw reply related

* 2.6.38-rc3-git1: Reported regressions 2.6.36 -> 2.6.37
From: Rafael J. Wysocki @ 2011-02-03  0:03 UTC (permalink / raw)
  To: Linux Kernel Mailing List
  Cc: Maciej Rutecki, Florian Mickler, Andrew Morton, Linus Torvalds,
	Kernel Testers List, Network Development, Linux ACPI,
	Linux PM List, Linux SCSI List, Linux Wireless List, DRI

This message contains a list of some post-2.6.36 regressions introduced before
2.6.37, for which there are no fixes in the mainline known to the tracking team.
If any of them have been fixed already, please let us know.

If you know of any other unresolved post-2.6.36 regressions, please let us know
either and we'll add them to the list.  Also, please let us know if any
of the entries below are invalid.

Each entry from the list will be sent additionally in an automatic reply to
this message with CCs to the people involved in reporting and handling the
issue.


Listed regressions statistics:

  Date          Total  Pending  Unresolved
  ----------------------------------------
  2011-02-03      118       36          31
  2010-12-30       85       32          26
  2010-12-19       73       28          24
  2010-12-03       55       25          19
  2010-11-19       39       29          25


Unresolved regressions
----------------------

Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=27892
Subject		: SNB: GPU hang with Slip xscreensaver
Submitter	: Takashi Iwai <tiwai@suse.de>
Date		: 2011-01-31 12:06 (3 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=27842
Subject		: [regression?] hang with 2.6.37 on a BTRFS test machine
Submitter	: Martin Steigerwald <Martin@lichtvoll.de>
Date		: 2011-01-23 12:06 (11 days old)
Message-ID	: <201101231306.23069.Martin@lichtvoll.de>
References	: http://marc.info/?l=linux-kernel&m=129578445613283&w=2


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=27642
Subject		: 2.6.37 says WARNING: at arch/x86/kernel/apic/apic.c:1287 setup_local_APIC+0x18f/0x263()
Submitter	: Rob Landley <rlandley@parallels.com>
Date		: 2011-01-18 13:11 (16 days old)
Message-ID	: <4D359188.3040408@parallels.com>
References	: http://marc.info/?l=linux-kernel&m=129535632319892&w=2


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=27532
Subject		: ath9k prevents CPU from entering lower C-states
Submitter	: Thomas Bächler <thomas@archlinux.org>
Date		: 2011-01-24 22:43 (10 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=27402
Subject		: Atheros adapter no longer loads firmware
Submitter	: Michal Vaner <vorner@ucw.cz>
Date		: 2011-01-23 15:29 (11 days old)
First-Bad-Commit: http://git.kernel.org/linus/be93112accb42c5586a459683d71975cc70673ca


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=27152
Subject		: VGA output broken at cold boot
Submitter	: Takashi Iwai <tiwai@suse.de>
Date		: 2011-01-20 13:26 (14 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=27132
Subject		: flush-btrfs gets into an infinite loop
Submitter	: Artem Anisimov <aanisimov@inbox.ru>
Date		: 2011-01-20 11:51 (14 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=26942
Subject		: radeon: screen distortion on resume
Submitter	: Brett Witherspoon <spoonb@exherbo.org>
Date		: 2011-01-17 13:23 (17 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=26932
Subject		: [SNB mobile] Oops in DRM intel driver, esp. during S3/S4 stress test
Submitter	: Matthias Hopf <mat@mshopf.de>
Date		: 2011-01-17 11:45 (17 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=26842
Subject		: [2.6.37 regression] threads with CPU affinity cannot be killed
Submitter	: tim blechmann <tim@klingt.org>
Date		: 2011-01-16 13:39 (18 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=26802
Subject		: b43: Suspend failed
Submitter	: Patrick Matthäi <patrick@linux-dev.org>
Date		: 2011-01-15 18:56 (19 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=26582
Subject		: NULL pointer dereference on pipe creation
Submitter	: Ferenc Wágner <wferi@niif.hu>
Date		: 2011-01-12 13:30 (22 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=26552
Subject		: Screen flickering with 2.6.37 [ATI X1600]
Submitter	: Andrea Iob <andrea_iob@yahoo.it>
Date		: 2011-01-11 22:53 (23 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=26212
Subject		: kernel NULL pointer dereference in pxa3xx_nand_probe
Submitter	: Sven Neumann <s.neumann@raumfeld.com>
Date		: 2011-01-05 11:43 (29 days old)
Message-ID	: <1294227801.3996.62.camel@sven>
References	: http://marc.info/?l=linux-kernel&m=129422903703756&w=2


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=26112
Subject		: [LogFS] [2.6.37-rc8+] Kernel BUG at logfs/readwrite.c:297!
Submitter	: Prasad Joshi <prasadjoshi124@gmail.com>
Date		: 2011-01-02 21:22 (32 days old)
Message-ID	: <AANLkTinpoM8FuG8UkF88xs_V37dz_wgE8t-s0JPzaS-w@mail.gmail.com>
References	: http://marc.info/?l=linux-kernel&m=129400335910652&w=2


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=26102
Subject		: [2.6.37-rc8] BUG kmalloc-256: Poison overwritten.
Submitter	: Pawel Sikora <pluto@agmk.net>
Date		: 2010-12-30 15:08 (35 days old)
Message-ID	: <201012301608.40859.pluto@agmk.net>
References	: http://marc.info/?l=linux-kernel&m=129372388925679&w=2


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=25922
Subject		: IdeaPad Y530 brightness keys not functioning
Submitter	: Tomasz <tm.temp@gmx.com>
Date		: 2010-12-30 12:48 (35 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=25832
Subject		: kernel crashes upon resume if usb devices are removed when suspended
Submitter	: rocko <rockorequin@hotmail.com>
Date		: 2010-12-29 11:47 (36 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=25732
Subject		: i915 turns picture green when display switched off-on
Submitter	: Tõnu Raitviir <jussuf@linux.ee>
Date		: 2010-12-27 22:14 (38 days old)
First-Bad-Commit: http://git.kernel.org/linus/3c17fe4b8f40a112a85758a9ab2aebf772bdd647
Handled-By	: Chris Wilson <chris@chris-wilson.co.uk>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=25432
Subject		: Alpha fails to build with gcc 4.4
Submitter	: Ben Hutchings <ben@decadent.org.uk>
Date		: 2010-12-22 01:55 (43 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=25402
Subject		: kernel (2.6.37-8-generic_amd64) panic on boot (with message "map_single: bounce buffer is not DMA'ble) - possible regression !!!
Submitter	: carlos <carlos.palma@ono.com>
Date		: 2010-12-21 19:58 (44 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=25072
Subject		: backlight level is not maintened during LID close/open
Submitter	: Lukas Hejtmanek <xhejtman@fi.muni.cz>
Date		: 2010-12-17 11:56 (48 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=24882
Subject		: PM/Hibernate: Memory corruption patch introduces regression (2.6.36.2)
Submitter	:  <akwatts@ymail.com>
Date		: 2010-12-14 04:00 (51 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=24822
Subject		: Embedded DisplayPort is detected wrongly on HP ProBook 5320m
Submitter	: Takashi Iwai <tiwai@suse.de>
Date		: 2010-12-13 11:09 (52 days old)
Handled-By	: Chris Wilson <chris@chris-wilson.co.uk>


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=24762
Subject		: BUG at perf_ctx_adjust_freq (kernel/perf_event.c:1582)
Submitter	: Chris Wilson <chris@chris-wilson.co.uk>
Date		: 2010-12-10 12:00 (55 days old)
Message-ID	: <c6d829$pqibha@fmsmga001.fm.intel.com>
References	: http://marc.info/?l=linux-kernel&m=129198247531612&w=2


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=24582
Subject		: Kernel Oops at tty_buffer_request_room when using pppd program (2.6.37-rc4)
Submitter	: baoyb <baoyb@avit.org.cn>
Date		: 2010-12-08 13:55 (57 days old)
Message-ID	: <EF6DDE218DB34702B1FA84D6CD7EA771@baoyb>
References	: http://marc.info/?l=linux-kernel&m=129181763525738&w=2


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=22942
Subject		: [2.6.37-rc1, OOM] virtblk: OOM in do_virtblk_request()
Submitter	: Dave Chinner <david@fromorbit.com>
Date		: 2010-11-05 1:30 (90 days old)
Message-ID	: <20101105013003.GE13830@dastard>
References	: http://marc.info/?l=linux-kernel&m=128892062917641&w=2


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=22912
Subject		: spi_lm70llp module crash on unload (2.6.37-rc1)
Submitter	: Randy Dunlap <randy.dunlap@oracle.com>
Date		: 2010-11-05 0:16 (90 days old)
Message-ID	: <20101104171620.00d8c95d.randy.dunlap@oracle.com>
References	: http://marc.info/?l=linux-kernel&m=128891627913647&w=2


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=22882
Subject		: (2.6.37-rc1) amd64-agp module crashed on second load
Submitter	: Randy Dunlap <randy.dunlap@oracle.com>
Date		: 2010-11-05 0:13 (90 days old)
Message-ID	: <20101104171333.fea1f498.randy.dunlap@oracle.com>
References	: http://marc.info/?l=linux-kernel&m=128891605213447&w=2


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=22642
Subject		: 2.6.37-rc1: Disk takes 10 seconds to resume - MacBook2,1
Submitter	: Tobias <devnull@plzk.org>
Date		: 2010-11-10 19:33 (85 days old)


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=22542
Subject		: [2.6.37-rc1] drm:i195 errors
Submitter	: Paul Rolland <rol@witbe.net>
Date		: 2010-11-02 14:58 (93 days old)
Message-ID	: <20101102155813.09cb2c6e@tux.DEF.witbe.net>
References	: http://marc.info/?l=linux-kernel&m=128870991628970&w=2


Regressions with patches
------------------------

Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=27202
Subject		: Remote control of saa7134-based tv card "ASUSTeK P7131 Hybrid" stopped working in 2.6.37
Submitter	:  <acaizzo@gmail.com>
Date		: 2011-01-20 17:23 (14 days old)
First-Bad-Commit: http://git.kernel.org/linus/4651918a4afdd49bdea21d2f919b189ef17a6399
Patch		: https://bugzilla.kernel.org/attachment.cgi?id=44532


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=26242
Subject		: BUG: unable to handle kernel NULL pointer dereference at   (null)
Submitter	: Steffen Michalke <StMichalke@web.de>
Date		: 2011-01-06 20:59 (28 days old)
Patch		: http://www.spinics.net/lists/linux-btrfs/msg08051.html


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=25822
Subject		: [BUG] kernel BUG at mm/truncate.c:479! on 2.6.37-rc8
Submitter	: Gurudas Pai <gurudas.pai@oracle.com>
Date		: 2010-12-29 6:58 (36 days old)
Message-ID	: <4D1AD935.1020504@oracle.com>
References	: http://marc.info/?l=linux-kernel&m=129360511222037&w=2
Patch		: https://lkml.org/lkml/2010/12/29/131


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=25442
Subject		: ixp4xx defines FREQ macro; conflicts with gspca/ov519 driver
Submitter	: Ben Hutchings <ben@decadent.org.uk>
Date		: 2010-12-22 02:02 (43 days old)
Handled-By	: Ben Hutchings <ben@decadent.org.uk>
Patch		: https://bugzilla.kernel.org/attachment.cgi?id=41252


Bug-Entry	: http://bugzilla.kernel.org/show_bug.cgi?id=23472
Subject		: 2.6.37-rc2 vs. 2.6.36 laptop backlight changes?
Submitter	: Patrick Schaaf <netdev@bof.de>
Date		: 2010-11-17 13:41 (78 days old)
Message-ID	: <1290001262.5727.2.camel@lat1>
References	: http://marc.info/?l=linux-kernel&m=129000127920912&w=2
Handled-By	: Indan Zupancic <indan@nul.nu>
Patch		: https://bugzilla.kernel.org/attachment.cgi?id=43482


For details, please visit the bug entries and follow the links given in
references.

As you can see, there is a Bugzilla entry for each of the listed regressions.
There also is a Bugzilla entry used for tracking the regressions introduced
between 2.6.36 and 2.6.37, unresolved as well as resolved, at:

http://bugzilla.kernel.org/show_bug.cgi?id=21782

Please let the tracking team know if there are any Bugzilla entries that
should be added to the list in there.

Thanks!

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v4 0/2] iproute2: support for device groups
From: Stephen Hemminger @ 2011-02-03  0:14 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: Vlad Dogaru, netdev
In-Reply-To: <4D49E252.1080006@trash.net>

On Thu, 03 Feb 2011 00:01:38 +0100
Patrick McHardy <kaber@trash.net> wrote:

> Am 02.02.2011 19:23, schrieb Vlad Dogaru:
> > This patch series adds userspace support for network device groups.
> > There is support for setting device groups, listing only interfaces of a
> > specific group, and setting basic device parameters for all interfaces
> > in a group.
> > 
> > Changes since version 3:
> >  * drop devgroup keyword. There is a single keyword for specifying
> >    groups now.
> >  * store group names internally as a hash, similar to routing table
> >    names. This is more efficient for batch mode operations
> 
> Looks good to me, thanks.

Will apply after 2.6.38 is released.

-- 

^ permalink raw reply

* [PATCH] tcp: Increase the initial congestion window to 10.
From: David Miller @ 2011-02-03  1:07 UTC (permalink / raw)
  To: netdev; +Cc: dccp, therbert


Signed-off-by: David S. Miller <davem@davemloft.net>
---

I've left the DCCP code to keep using RFC3390 logic, if they
wish to adopt this change in their code they can do so by
simply deleting the rfc33390_bytes_to_packets() function and
using TCP_INIT_CWND in their assignment.

 include/net/tcp.h      |   12 +++---------
 net/dccp/ccids/ccid2.c |    9 +++++++++
 net/ipv4/tcp_input.c   |    2 +-
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9179111..7118668 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -196,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* TCP thin-stream limits */
 #define TCP_THIN_LINEAR_RETRIES 6       /* After 6 linear retries, do exp. backoff */
 
+/* TCP initial congestion window */
+#define TCP_INIT_CWND		10
+
 extern struct inet_timewait_death_row tcp_death_row;
 
 /* sysctl variables for tcp */
@@ -799,15 +802,6 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
 /* Use define here intentionally to get WARN_ON location shown at the caller */
 #define tcp_verify_left_out(tp)	WARN_ON(tcp_left_out(tp) > tp->packets_out)
 
-/*
- * Convert RFC 3390 larger initial window into an equivalent number of packets.
- * This is based on the numbers specified in RFC 5681, 3.1.
- */
-static inline u32 rfc3390_bytes_to_packets(const u32 smss)
-{
-	return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
-}
-
 extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh);
 extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
 
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index e96d5e8..fadecd2 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -583,6 +583,15 @@ done:
 	dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
 }
 
+/*
+ * Convert RFC 3390 larger initial window into an equivalent number of packets.
+ * This is based on the numbers specified in RFC 5681, 3.1.
+ */
+static inline u32 rfc3390_bytes_to_packets(const u32 smss)
+{
+	return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
+}
+
 static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 {
 	struct ccid2_hc_tx_sock *hc = ccid_priv(ccid);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb7f82e..2f692ce 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -817,7 +817,7 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
 	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
 
 	if (!cwnd)
-		cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
+		cwnd = TCP_INIT_CWND;
 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
 }
 
-- 
1.7.4


^ permalink raw reply related

* [PATCH] sfq: deadlock in error path
From: Stephen Hemminger @ 2011-02-03  1:19 UTC (permalink / raw)
  To: David Miller, Eric Dumazet; +Cc: netdev

The change to allow divisor to be a parameter (in 2.6.38-rc1)
 commit 817fb15dfd988d8dda916ee04fa506f0c466b9d6
introduced a possible deadlock caught by sparse.

The scheduler tree lock was left locked in the case of an incorrect
divisor value. Simplest fix is to move test outside of lock
which also solves problem of partial update.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

--- a/net/sched/sch_sfq.c	2011-02-02 17:12:06.338204106 -0800
+++ b/net/sched/sch_sfq.c	2011-02-02 17:13:39.127205019 -0800
@@ -491,17 +491,18 @@ static int sfq_change(struct Qdisc *sch,
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
 
+	if (ctl->divisor &&
+	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
+		return -EINVAL;
+
 	sch_tree_lock(sch);
 	q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
 	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 	q->perturb_period = ctl->perturb_period * HZ;
 	if (ctl->limit)
 		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
-	if (ctl->divisor) {
-		if (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)
-			return -EINVAL;
+	if (ctl->divisor)
 		q->divisor = ctl->divisor;
-	}
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > q->limit)
 		sfq_drop(sch);

^ permalink raw reply

* [PATCH net-next] CHOKe flow scheduler (0.11)
From: Stephen Hemminger @ 2011-02-03  1:21 UTC (permalink / raw)
  To: Eric Dumazet, David Miller; +Cc: Patrick McHardy, netdev
In-Reply-To: <1295563611.2613.41.camel@edumazet-laptop>

Subject: sched: CHOKe flow scheduler

CHOKe ("CHOose and Kill" or "CHOose and Keep") is an alternative
packet scheduler based on the Random Exponential Drop (RED) algorithm.

The core idea is:
  For every packet arrival:
  	Calculate Qave
	if (Qave < minth) 
	     Queue the new packet
	else 
	     Select randomly a packet from the queue 
	     if (both packets from same flow)
	     then Drop both the packets
	     else if (Qave > maxth)
	          Drop packet
	     else
	       	  Admit packet with proability p (same as RED)

See also:
  Rong Pan, Balaji Prabhakar, Konstantinos Psounis, "CHOKe: a stateless active
   queue management scheme for approximating fair bandwidth allocation", 
  Proceeding of INFOCOM'2000, March 2000.

Help from:
     Eric Dumazet <eric.dumazet@gmail.com>
     Patrick McHardy <kaber@trash.net>

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

---
0.11 - incorporates Eric's change to use rxhash

 include/linux/pkt_sched.h |   29 +
 net/sched/Kconfig         |   11 
 net/sched/Makefile        |    2 
 net/sched/sch_choke.c     |  676 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 718 insertions(+)

--- a/net/sched/Kconfig	2011-01-31 09:01:35.000000000 -0800
+++ b/net/sched/Kconfig	2011-02-02 17:00:36.798764819 -0800
@@ -217,6 +217,17 @@ config NET_SCH_MQPRIO
 
 	  If unsure, say N.
 
+config NET_SCH_CHOKE
+	tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
+	help
+	  Say Y here if you want to use the CHOKe packet scheduler (CHOose
+	  and Keep for responsive flows, CHOose and Kill for unresponsive
+	  flows). This is a variation of RED which trys to penalize flows
+	  that monopolize the queue.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called sch_choke.
+
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
 	depends on NET_CLS_ACT
--- a/net/sched/Makefile	2011-01-31 09:01:35.000000000 -0800
+++ b/net/sched/Makefile	2011-02-02 17:01:00.987025820 -0800
@@ -33,6 +33,8 @@ obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
 obj-$(CONFIG_NET_SCH_MQPRIO)	+= sch_mqprio.o
+obj-$(CONFIG_NET_SCH_CHOKE)	+= sch_choke.o
+
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
 obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ b/net/sched/sch_choke.c	2011-02-02 17:08:57.208163848 -0800
@@ -0,0 +1,676 @@
+/*
+ * net/sched/sch_choke.c	CHOKE scheduler
+ *
+ * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
+ * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/reciprocal_div.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+#include <net/red.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+
+/*
+   CHOKe stateless AQM for fair bandwidth allocation
+   =================================================
+
+   CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
+   unresponsive flows) is a variant of RED that penalizes misbehaving flows but
+   maintains no flow state. The difference from RED is an additional step
+   during the enqueuing process. If average queue size is over the
+   low threshold (qmin), a packet is chosen at random from the queue.
+   If both the new and chosen packet are from the same flow, both
+   are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
+   needs to access packets in queue randomly. It has a minimal class
+   interface to allow overriding the builtin flow classifier with
+   filters.
+
+   Source:
+   R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
+   Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
+   IEEE INFOCOM, 2000.
+
+   A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
+   Characteristics", IEEE/ACM Transactions on Networking, 2004
+
+ */
+
+/* Upper bound on size of sk_buff table (packets) */
+#define CHOKE_MAX_QUEUE	(128*1024 - 1)
+
+struct choke_sched_data {
+/* Parameters */
+	u32		 limit;
+	unsigned char	 flags;
+
+	struct red_parms parms;
+
+/* Variables */
+	struct tcf_proto *filter_list;
+	struct {
+		u32	prob_drop;	/* Early probability drops */
+		u32	prob_mark;	/* Early probability marks */
+		u32	forced_drop;	/* Forced drops, qavg > max_thresh */
+		u32	forced_mark;	/* Forced marks, qavg > max_thresh */
+		u32	pdrop;          /* Drops due to queue limits */
+		u32	other;          /* Drops due to drop() calls */
+		u32	matched;	/* Drops to flow match */
+	} stats;
+
+	unsigned int	 head;
+	unsigned int	 tail;
+
+	unsigned int	 tab_mask; /* size - 1 */
+
+	struct sk_buff **tab;
+};
+
+/* deliver a random number between 0 and N - 1 */
+static u32 random_N(unsigned int N)
+{
+	return reciprocal_divide(random32(), N);
+}
+
+/* number of elements in queue including holes */
+static unsigned int choke_len(const struct choke_sched_data *q)
+{
+	return (q->tail - q->head) & q->tab_mask;
+}
+
+/* Is ECN parameter configured */
+static int use_ecn(const struct choke_sched_data *q)
+{
+	return q->flags & TC_RED_ECN;
+}
+
+/* Should packets over max just be dropped (versus marked) */
+static int use_harddrop(const struct choke_sched_data *q)
+{
+	return q->flags & TC_RED_HARDDROP;
+}
+
+/* Move head pointer forward to skip over holes */
+static void choke_zap_head_holes(struct choke_sched_data *q)
+{
+	do {
+		q->head = (q->head + 1) & q->tab_mask;
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->head] == NULL);
+}
+
+/* Move tail pointer backwards to reuse holes */
+static void choke_zap_tail_holes(struct choke_sched_data *q)
+{
+	do {
+		q->tail = (q->tail - 1) & q->tab_mask;
+		if (q->head == q->tail)
+			break;
+	} while (q->tab[q->tail] == NULL);
+}
+
+/* Drop packet from queue array by creating a "hole" */
+static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb = q->tab[idx];
+
+	q->tab[idx] = NULL;
+
+	if (idx == q->head)
+		choke_zap_head_holes(q);
+	if (idx == q->tail)
+		choke_zap_tail_holes(q);
+
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
+	qdisc_drop(skb, sch);
+	qdisc_tree_decrease_qlen(sch, 1);
+	--sch->q.qlen;
+}
+
+/*
+ * Compare flow of two packets
+ *  Returns true only if source and destination address and port match.
+ *          false for special cases
+ */
+static bool choke_match_flow(struct sk_buff *skb1,
+			     struct sk_buff *skb2)
+{
+	int off1, off2, poff;
+	const u32 *ports1, *ports2;
+	u8 ip_proto;
+	__u32 hash1;
+
+	if (skb1->protocol != skb2->protocol)
+		return false;
+
+	/* Use hash value as quick check
+	 * Assumes that __skb_get_rxhash makes IP header and ports linear
+	 */
+	hash1 = skb_get_rxhash(skb1);
+	if (!hash1 || hash1 != skb_get_rxhash(skb2))
+		return false;
+
+	/* Probably match, but be sure to avoid hash collisions */
+	off1 = skb_network_offset(skb1);
+	off2 = skb_network_offset(skb2);
+
+	switch (skb1->protocol) {
+	case __constant_htons(ETH_P_IP): {
+		const struct iphdr *ip1, *ip2;
+
+		ip1 = (const struct iphdr *) (skb1->data + off1);
+		ip2 = (const struct iphdr *) (skb2->data + off2);
+
+		ip_proto = ip1->protocol;
+		if (ip_proto != ip2->protocol ||
+		    ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr)
+			return false;
+
+		if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET))
+			ip_proto = 0;
+		off1 += ip1->ihl * 4;
+		off2 += ip2->ihl * 4;
+		break;
+	}
+
+	case __constant_htons(ETH_P_IPV6): {
+		const struct ipv6hdr *ip1, *ip2;
+
+		ip1 = (const struct ipv6hdr *) (skb1->data + off1);
+		ip2 = (const struct ipv6hdr *) (skb2->data + off2);
+
+		ip_proto = ip1->nexthdr;
+		if (ip_proto != ip2->nexthdr ||
+		    ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) ||
+		    ipv6_addr_cmp(&ip1->daddr, &ip2->daddr))
+			return false;
+		off1 += 40;
+		off2 += 40;
+	}
+
+	default: /* Maybe compare MAC header here? */
+		return false;
+	}
+
+	poff = proto_ports_offset(ip_proto);
+	if (poff < 0)
+		return true;
+
+	off1 += poff;
+	off2 += poff;
+
+	ports1 = (__force u32 *)(skb1->data + off1);
+	ports2 = (__force u32 *)(skb2->data + off2);
+	return *ports1 == *ports2;
+}
+
+static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
+{
+	*(unsigned int *)(qdisc_skb_cb(skb)->data) = classid;
+}
+
+static u16 choke_get_classid(const struct sk_buff *skb)
+{
+	return *(unsigned int *)(qdisc_skb_cb(skb)->data);
+}
+
+/*
+ * Classify flow using either:
+ *  1. pre-existing classification result in skb
+ *  2. fast internal classification
+ *  3. use TC filter based classification
+ */
+static bool choke_classify(struct sk_buff *skb,
+			   struct Qdisc *sch, int *qerr)
+
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct tcf_result res;
+	int result;
+
+	result = tc_classify(skb, q->filter_list, &res);
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+		case TC_ACT_SHOT:
+			return false;
+		}
+#endif
+		choke_set_classid(skb, TC_H_MIN(res.classid));
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * Select a packet at random from queue
+ * HACK: since queue can have holes from previous deletion; retry several
+ *   times to find a random skb but then just give up and return the head
+ * Will return NULL if queue is empty (q->head == q->tail)
+ */
+static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
+					 unsigned int *pidx)
+{
+	struct sk_buff *skb;
+	int retrys = 3;
+
+	do {
+		*pidx = (q->head + random_N(choke_len(q))) & q->tab_mask;
+		skb = q->tab[*pidx];
+		if (skb)
+			return skb;
+	} while (--retrys > 0);
+
+	return q->tab[*pidx = q->head];
+}
+
+/*
+ * Compare new packet with random packet in queue
+ * returns true if matched and sets *pidx
+ */
+static bool choke_match_random(const struct choke_sched_data *q,
+			       struct sk_buff *nskb,
+			       unsigned int *pidx)
+{
+	struct sk_buff *oskb;
+
+	if (q->head == q->tail)
+		return false;
+
+	oskb = choke_peek_random(q, pidx);
+	if (q->filter_list)
+		return choke_get_classid(nskb) == choke_get_classid(oskb);
+
+	return choke_match_flow(oskb, nskb);
+}
+
+static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct red_parms *p = &q->parms;
+	int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+
+	if (q->filter_list) {
+		/* If using external classifiers, get result and record it. */
+		if (!choke_classify(skb, sch, &ret))
+			goto other_drop;	/* Packet was eaten by filter */
+	}
+
+	/* Compute average queue usage (see RED) */
+	p->qavg = red_calc_qavg(p, sch->q.qlen);
+	if (red_is_idling(p))
+		red_end_of_idle_period(p);
+
+	/* Is queue small? */
+	if (p->qavg <= p->qth_min)
+		p->qcount = -1;
+	else {
+		unsigned int idx;
+
+		/* Draw a packet at random from queue and compare flow */
+		if (choke_match_random(q, skb, &idx)) {
+			q->stats.matched++;
+			choke_drop_by_idx(sch, idx);
+			goto congestion_drop;
+		}
+
+		/* Queue is large, always mark/drop */
+		if (p->qavg > p->qth_max) {
+			p->qcount = -1;
+
+			sch->qstats.overlimits++;
+			if (use_harddrop(q) || !use_ecn(q) ||
+			    !INET_ECN_set_ce(skb)) {
+				q->stats.forced_drop++;
+				goto congestion_drop;
+			}
+
+			q->stats.forced_mark++;
+		} else if (++p->qcount) {
+			if (red_mark_probability(p, p->qavg)) {
+				p->qcount = 0;
+				p->qR = red_random(p);
+
+				sch->qstats.overlimits++;
+				if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
+					q->stats.prob_drop++;
+					goto congestion_drop;
+				}
+
+				q->stats.prob_mark++;
+			}
+		} else
+			p->qR = red_random(p);
+	}
+
+	/* Admit new packet */
+	if (sch->q.qlen < q->limit) {
+		q->tab[q->tail] = skb;
+		q->tail = (q->tail + 1) & q->tab_mask;
+		++sch->q.qlen;
+		sch->qstats.backlog += qdisc_pkt_len(skb);
+		return NET_XMIT_SUCCESS;
+	}
+
+	q->stats.pdrop++;
+	sch->qstats.drops++;
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+
+ congestion_drop:
+	qdisc_drop(skb, sch);
+	return NET_XMIT_CN;
+
+ other_drop:
+	if (ret & __NET_XMIT_BYPASS)
+		sch->qstats.drops++;
+	kfree_skb(skb);
+	return ret;
+}
+
+static struct sk_buff *choke_dequeue(struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct sk_buff *skb;
+
+	if (q->head == q->tail) {
+		if (!red_is_idling(&q->parms))
+			red_start_of_idle_period(&q->parms);
+		return NULL;
+	}
+
+	skb = q->tab[q->head];
+	q->tab[q->head] = NULL;
+	choke_zap_head_holes(q);
+	--sch->q.qlen;
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
+	qdisc_bstats_update(sch, skb);
+
+	return skb;
+}
+
+static unsigned int choke_drop(struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	unsigned int len;
+
+	len = qdisc_queue_drop(sch);
+	if (len > 0)
+		q->stats.other++;
+	else {
+		if (!red_is_idling(&q->parms))
+			red_start_of_idle_period(&q->parms);
+	}
+
+	return len;
+}
+
+static void choke_reset(struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+
+	red_restart(&q->parms);
+}
+
+static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
+	[TCA_CHOKE_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
+	[TCA_CHOKE_STAB]	= { .len = RED_STAB_SIZE },
+};
+
+
+static void choke_free(void *addr)
+{
+	if (addr) {
+		if (is_vmalloc_addr(addr))
+			vfree(addr);
+		else
+			kfree(addr);
+	}
+}
+
+static int choke_change(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct nlattr *tb[TCA_CHOKE_MAX + 1];
+	const struct tc_red_qopt *ctl;
+	int err;
+	struct sk_buff **old = NULL;
+	unsigned int mask;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_CHOKE_PARMS] == NULL ||
+	    tb[TCA_CHOKE_STAB] == NULL)
+		return -EINVAL;
+
+	ctl = nla_data(tb[TCA_CHOKE_PARMS]);
+
+	if (ctl->limit > CHOKE_MAX_QUEUE)
+		return -EINVAL;
+
+	mask = roundup_pow_of_two(ctl->limit + 1) - 1;
+	if (mask != q->tab_mask) {
+		struct sk_buff **ntab;
+
+		ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
+		if (!ntab)
+			ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
+		if (!ntab)
+			return -ENOMEM;
+
+		sch_tree_lock(sch);
+		old = q->tab;
+		if (old) {
+			unsigned int oqlen = sch->q.qlen, tail = 0;
+
+			while (q->head != q->tail) {
+				struct sk_buff *skb = q->tab[q->head];
+
+				q->head = (q->head + 1) & q->tab_mask;
+				if (!skb)
+					continue;
+				if (tail < mask) {
+					ntab[tail++] = skb;
+					continue;
+				}
+				sch->qstats.backlog -= qdisc_pkt_len(skb);
+				--sch->q.qlen;
+				qdisc_drop(skb, sch);
+			}
+			qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
+			q->head = 0;
+			q->tail = tail;
+		}
+
+		q->tab_mask = mask;
+		q->tab = ntab;
+	} else
+		sch_tree_lock(sch);
+
+	q->flags = ctl->flags;
+	q->limit = ctl->limit;
+
+	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
+		      ctl->Plog, ctl->Scell_log,
+		      nla_data(tb[TCA_CHOKE_STAB]));
+
+	if (q->head == q->tail)
+		red_end_of_idle_period(&q->parms);
+
+	sch_tree_unlock(sch);
+	choke_free(old);
+	return 0;
+}
+
+static int choke_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	return choke_change(sch, opt);
+}
+
+static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct nlattr *opts = NULL;
+	struct tc_red_qopt opt = {
+		.limit		= q->limit,
+		.flags		= q->flags,
+		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
+		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
+		.Wlog		= q->parms.Wlog,
+		.Plog		= q->parms.Plog,
+		.Scell_log	= q->parms.Scell_log,
+	};
+
+	opts = nla_nest_start(skb, TCA_OPTIONS);
+	if (opts == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
+	return nla_nest_end(skb, opts);
+
+nla_put_failure:
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
+}
+
+static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+	struct tc_choke_xstats st = {
+		.early	= q->stats.prob_drop + q->stats.forced_drop,
+		.marked	= q->stats.prob_mark + q->stats.forced_mark,
+		.pdrop	= q->stats.pdrop,
+		.other	= q->stats.other,
+		.matched = q->stats.matched,
+	};
+
+	return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+
+static void choke_destroy(struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(&q->filter_list);
+	choke_free(q->tab);
+}
+
+static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
+{
+	return NULL;
+}
+
+static unsigned long choke_get(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static void choke_put(struct Qdisc *q, unsigned long cl)
+{
+}
+
+static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
+				u32 classid)
+{
+	return 0;
+}
+
+static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
+			  struct sk_buff *skb, struct tcmsg *tcm)
+{
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	return 0;
+}
+
+static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	if (!arg->stop) {
+		if (arg->fn(sch, 1, arg) < 0) {
+			arg->stop = 1;
+			return;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops choke_class_ops = {
+	.leaf		=	choke_leaf,
+	.get		=	choke_get,
+	.put		=	choke_put,
+	.tcf_chain	=	choke_find_tcf,
+	.bind_tcf	=	choke_bind,
+	.unbind_tcf	=	choke_put,
+	.dump		=	choke_dump_class,
+	.walk		=	choke_walk,
+};
+
+static struct sk_buff *choke_peek_head(struct Qdisc *sch)
+{
+	struct choke_sched_data *q = qdisc_priv(sch);
+
+	return (q->head != q->tail) ? q->tab[q->head] : NULL;
+}
+
+static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
+	.id		=	"choke",
+	.priv_size	=	sizeof(struct choke_sched_data),
+
+	.enqueue	=	choke_enqueue,
+	.dequeue	=	choke_dequeue,
+	.peek		=	choke_peek_head,
+	.drop		=	choke_drop,
+	.init		=	choke_init,
+	.destroy	=	choke_destroy,
+	.reset		=	choke_reset,
+	.change		=	choke_change,
+	.dump		=	choke_dump,
+	.dump_stats	=	choke_dump_stats,
+	.owner		=	THIS_MODULE,
+};
+
+static int __init choke_module_init(void)
+{
+	return register_qdisc(&choke_qdisc_ops);
+}
+
+static void __exit choke_module_exit(void)
+{
+	unregister_qdisc(&choke_qdisc_ops);
+}
+
+module_init(choke_module_init)
+module_exit(choke_module_exit)
+
+MODULE_LICENSE("GPL");
--- a/include/linux/pkt_sched.h	2011-01-31 09:01:32.000000000 -0800
+++ b/include/linux/pkt_sched.h	2011-02-02 17:00:36.802764862 -0800
@@ -247,6 +247,35 @@ struct tc_gred_sopt {
 	__u16		pad1;
 };
 
+/* CHOKe section */
+
+enum {
+	TCA_CHOKE_UNSPEC,
+	TCA_CHOKE_PARMS,
+	TCA_CHOKE_STAB,
+	__TCA_CHOKE_MAX,
+};
+
+#define TCA_CHOKE_MAX (__TCA_CHOKE_MAX - 1)
+
+struct tc_choke_qopt {
+	__u32		limit;		/* Hard queue length (packets)	*/
+	__u32		qth_min;	/* Min average threshold (packets) */
+	__u32		qth_max;	/* Max average threshold (packets) */
+	unsigned char   Wlog;		/* log(W)		*/
+	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
+	unsigned char   Scell_log;	/* cell size for idle damping */
+	unsigned char	flags;		/* see RED flags */
+};
+
+struct tc_choke_xstats {
+	__u32		early;          /* Early drops */
+	__u32		pdrop;          /* Drops due to queue limits */
+	__u32		other;          /* Drops due to drop() calls */
+	__u32		marked;         /* Marked packets */
+	__u32		matched;	/* Drops due to flow match */
+};
+
 /* HTB section */
 #define TC_HTB_NUMPRIO		8
 #define TC_HTB_MAXDEPTH		8

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox