Netdev List
 help / color / mirror / Atom feed
* netfilter 02/31: ebtables: Use %pM conversion specifier
From: Patrick McHardy @ 2009-09-10 16:11 UTC (permalink / raw)
  To: davem; +Cc: netdev, Patrick McHardy, netfilter-devel
In-Reply-To: <20090910161142.31179.5256.sendpatchset@x2.localnet>

commit be39ee11cd1f67b51ac8e71d177a981eb34f2ab2
Author: Tobias Klauser <klto@zhaw.ch>
Date:   Mon Aug 10 10:10:55 2009 +0200

    netfilter: ebtables: Use %pM conversion specifier
    
    ebt_log uses its own implementation of print_mac to print MAC addresses.
    This patch converts it to use the %pM conversion specifier for printk.
    
    Signed-off-by: Tobias Klauser <klto@zhaw.ch>
    Signed-off-by: Patrick McHardy <kaber@trash.net>

diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index a94f3cc..e4ea3fd 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -50,14 +50,6 @@ struct arppayload
 	unsigned char ip_dst[4];
 };
 
-static void print_MAC(const unsigned char *p)
-{
-	int i;
-
-	for (i = 0; i < ETH_ALEN; i++, p++)
-		printk("%02x%c", *p, i == ETH_ALEN - 1 ? ' ':':');
-}
-
 static void
 print_ports(const struct sk_buff *skb, uint8_t protocol, int offset)
 {
@@ -88,14 +80,11 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
 	unsigned int bitmask;
 
 	spin_lock_bh(&ebt_log_lock);
-	printk("<%c>%s IN=%s OUT=%s MAC source = ", '0' + loginfo->u.log.level,
-	       prefix, in ? in->name : "", out ? out->name : "");
-
-	print_MAC(eth_hdr(skb)->h_source);
-	printk("MAC dest = ");
-	print_MAC(eth_hdr(skb)->h_dest);
-
-	printk("proto = 0x%04x", ntohs(eth_hdr(skb)->h_proto));
+	printk("<%c>%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
+	       '0' + loginfo->u.log.level, prefix,
+	       in ? in->name : "", out ? out->name : "",
+	       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+	       ntohs(eth_hdr(skb)->h_proto));
 
 	if (loginfo->type == NF_LOG_TYPE_LOG)
 		bitmask = loginfo->u.log.logflags;
@@ -171,12 +160,8 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
 				printk(" INCOMPLETE ARP payload");
 				goto out;
 			}
-			printk(" ARP MAC SRC=");
-			print_MAC(ap->mac_src);
-			printk(" ARP IP SRC=%pI4", ap->ip_src);
-			printk(" ARP MAC DST=");
-			print_MAC(ap->mac_dst);
-			printk(" ARP IP DST=%pI4", ap->ip_dst);
+			printk(" ARP MAC SRC=%pM ARP IP SRC=%pI4 ARP MAC DST=%pM ARP IP DST=%pI4",
+					ap->mac_src, ap->ip_src, ap->mac_dst, ap->ip_dst);
 		}
 	}
 out:

^ permalink raw reply related

* netfilter 04/31: xtables: remove xt_CONNMARK v0
From: Patrick McHardy @ 2009-09-10 16:11 UTC (permalink / raw)
  To: davem; +Cc: netdev, Patrick McHardy, netfilter-devel
In-Reply-To: <20090910161142.31179.5256.sendpatchset@x2.localnet>

commit e973a70ca033bfcd4d8b59d1f66bfc1e782e1276
Author: Jan Engelhardt <jengelh@medozas.de>
Date:   Fri Jun 12 18:42:12 2009 +0200

    netfilter: xtables: remove xt_CONNMARK v0
    
    Superseded by xt_CONNMARK v1 (v2.6.24-2917-g0dc8c76).
    
    Signed-off-by: Jan Engelhardt <jengelh@medozas.de>

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 3aa4a77..7eccf94 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -238,9 +238,6 @@ What (Why):
 	- "forwarding" header files like ipt_mac.h in
 	  include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/
 
-	- xt_CONNMARK match revision 0
-	  (superseded by xt_CONNMARK match revision 1)
-
 	- xt_MARK target revisions 0 and 1
 	  (superseded by xt_MARK match revision 2)
 
diff --git a/include/linux/netfilter/xt_CONNMARK.h b/include/linux/netfilter/xt_CONNMARK.h
index 7635c8f..0a85458 100644
--- a/include/linux/netfilter/xt_CONNMARK.h
+++ b/include/linux/netfilter/xt_CONNMARK.h
@@ -18,12 +18,6 @@ enum {
 	XT_CONNMARK_RESTORE
 };
 
-struct xt_connmark_target_info {
-	unsigned long mark;
-	unsigned long mask;
-	__u8 mode;
-};
-
 struct xt_connmark_tginfo1 {
 	__u32 ctmark, ctmask, nfmask;
 	__u8 mode;
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index d6e5ab4..5934570 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -36,45 +36,6 @@ MODULE_ALIAS("ip6t_CONNMARK");
 #include <net/netfilter/nf_conntrack_ecache.h>
 
 static unsigned int
-connmark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_connmark_target_info *markinfo = par->targinfo;
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	u_int32_t diff;
-	u_int32_t mark;
-	u_int32_t newmark;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (ct) {
-		switch(markinfo->mode) {
-		case XT_CONNMARK_SET:
-			newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
-			if (newmark != ct->mark) {
-				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, ct);
-			}
-			break;
-		case XT_CONNMARK_SAVE:
-			newmark = (ct->mark & ~markinfo->mask) |
-				  (skb->mark & markinfo->mask);
-			if (ct->mark != newmark) {
-				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, ct);
-			}
-			break;
-		case XT_CONNMARK_RESTORE:
-			mark = skb->mark;
-			diff = (ct->mark ^ mark) & markinfo->mask;
-			skb->mark = mark ^ diff;
-			break;
-		}
-	}
-
-	return XT_CONTINUE;
-}
-
-static unsigned int
 connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct xt_connmark_tginfo1 *info = par->targinfo;
@@ -112,30 +73,6 @@ connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool connmark_tg_check_v0(const struct xt_tgchk_param *par)
-{
-	const struct xt_connmark_target_info *matchinfo = par->targinfo;
-
-	if (matchinfo->mode == XT_CONNMARK_RESTORE) {
-		if (strcmp(par->table, "mangle") != 0) {
-			printk(KERN_WARNING "CONNMARK: restore can only be "
-			       "called from \"mangle\" table, not \"%s\"\n",
-			       par->table);
-			return false;
-		}
-	}
-	if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
-		printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
-		return false;
-	}
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
-		return false;
-	}
-	return true;
-}
-
 static bool connmark_tg_check(const struct xt_tgchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
@@ -151,74 +88,25 @@ static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
 	nf_ct_l3proto_module_put(par->family);
 }
 
-#ifdef CONFIG_COMPAT
-struct compat_xt_connmark_target_info {
-	compat_ulong_t	mark, mask;
-	u_int8_t	mode;
-	u_int8_t	__pad1;
-	u_int16_t	__pad2;
-};
-
-static void connmark_tg_compat_from_user_v0(void *dst, void *src)
-{
-	const struct compat_xt_connmark_target_info *cm = src;
-	struct xt_connmark_target_info m = {
-		.mark	= cm->mark,
-		.mask	= cm->mask,
-		.mode	= cm->mode,
-	};
-	memcpy(dst, &m, sizeof(m));
-}
-
-static int connmark_tg_compat_to_user_v0(void __user *dst, void *src)
-{
-	const struct xt_connmark_target_info *m = src;
-	struct compat_xt_connmark_target_info cm = {
-		.mark	= m->mark,
-		.mask	= m->mask,
-		.mode	= m->mode,
-	};
-	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
-}
-#endif /* CONFIG_COMPAT */
-
-static struct xt_target connmark_tg_reg[] __read_mostly = {
-	{
-		.name		= "CONNMARK",
-		.revision	= 0,
-		.family		= NFPROTO_UNSPEC,
-		.checkentry	= connmark_tg_check_v0,
-		.destroy	= connmark_tg_destroy,
-		.target		= connmark_tg_v0,
-		.targetsize	= sizeof(struct xt_connmark_target_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_connmark_target_info),
-		.compat_from_user = connmark_tg_compat_from_user_v0,
-		.compat_to_user	= connmark_tg_compat_to_user_v0,
-#endif
-		.me		= THIS_MODULE
-	},
-	{
-		.name           = "CONNMARK",
-		.revision       = 1,
-		.family         = NFPROTO_UNSPEC,
-		.checkentry     = connmark_tg_check,
-		.target         = connmark_tg,
-		.targetsize     = sizeof(struct xt_connmark_tginfo1),
-		.destroy        = connmark_tg_destroy,
-		.me             = THIS_MODULE,
-	},
+static struct xt_target connmark_tg_reg __read_mostly = {
+	.name           = "CONNMARK",
+	.revision       = 1,
+	.family         = NFPROTO_UNSPEC,
+	.checkentry     = connmark_tg_check,
+	.target         = connmark_tg,
+	.targetsize     = sizeof(struct xt_connmark_tginfo1),
+	.destroy        = connmark_tg_destroy,
+	.me             = THIS_MODULE,
 };
 
 static int __init connmark_tg_init(void)
 {
-	return xt_register_targets(connmark_tg_reg,
-	       ARRAY_SIZE(connmark_tg_reg));
+	return xt_register_target(&connmark_tg_reg);
 }
 
 static void __exit connmark_tg_exit(void)
 {
-	xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg));
+	xt_unregister_target(&connmark_tg_reg);
 }
 
 module_init(connmark_tg_init);

^ permalink raw reply related

* netfilter 03/31: xtables: remove xt_TOS v0
From: Patrick McHardy @ 2009-09-10 16:11 UTC (permalink / raw)
  To: davem; +Cc: netdev, Patrick McHardy, netfilter-devel
In-Reply-To: <20090910161142.31179.5256.sendpatchset@x2.localnet>

commit 7cd1837b5d24417eca667d674a97bea936849785
Author: Jan Engelhardt <jengelh@medozas.de>
Date:   Fri Jun 12 18:36:33 2009 +0200

    netfilter: xtables: remove xt_TOS v0
    
    Superseded by xt_TOS v1 (v2.6.24-2396-g5c350e5).
    
    Signed-off-by: Jan Engelhardt <jengelh@medozas.de>

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index f8cd450..3aa4a77 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -235,9 +235,6 @@ Who:	Thomas Gleixner <tglx@linutronix.de>
 ---------------------------
 
 What (Why):
-	- include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files
-	  (superseded by xt_TOS/xt_tos target & match)
-
 	- "forwarding" header files like ipt_mac.h in
 	  include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/
 
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index 3a7105b..86d81a2 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -9,7 +9,6 @@ header-y += ipt_NFQUEUE.h
 header-y += ipt_REJECT.h
 header-y += ipt_SAME.h
 header-y += ipt_TCPMSS.h
-header-y += ipt_TOS.h
 header-y += ipt_TTL.h
 header-y += ipt_ULOG.h
 header-y += ipt_addrtype.h
@@ -40,7 +39,6 @@ header-y += ipt_sctp.h
 header-y += ipt_state.h
 header-y += ipt_string.h
 header-y += ipt_tcpmss.h
-header-y += ipt_tos.h
 header-y += ipt_ttl.h
 
 unifdef-y += ip_queue.h
diff --git a/include/linux/netfilter_ipv4/ipt_TOS.h b/include/linux/netfilter_ipv4/ipt_TOS.h
deleted file mode 100644
index 6bf9e1f..0000000
--- a/include/linux/netfilter_ipv4/ipt_TOS.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _IPT_TOS_H_target
-#define _IPT_TOS_H_target
-
-#ifndef IPTOS_NORMALSVC
-#define IPTOS_NORMALSVC 0
-#endif
-
-struct ipt_tos_target_info {
-	u_int8_t tos;
-};
-
-#endif /*_IPT_TOS_H_target*/
diff --git a/include/linux/netfilter_ipv4/ipt_tos.h b/include/linux/netfilter_ipv4/ipt_tos.h
deleted file mode 100644
index a21f5df..0000000
--- a/include/linux/netfilter_ipv4/ipt_tos.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _IPT_TOS_H
-#define _IPT_TOS_H
-
-struct ipt_tos_info {
-    u_int8_t tos;
-    u_int8_t invert;
-};
-
-#ifndef IPTOS_NORMALSVC
-#define IPTOS_NORMALSVC 0
-#endif
-
-#endif /*_IPT_TOS_H*/
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 6a347e7..74ce892 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -18,7 +18,6 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_DSCP.h>
-#include <linux/netfilter_ipv4/ipt_TOS.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification");
@@ -73,41 +72,6 @@ static bool dscp_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-tos_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct ipt_tos_target_info *info = par->targinfo;
-	struct iphdr *iph = ip_hdr(skb);
-	u_int8_t oldtos;
-
-	if ((iph->tos & IPTOS_TOS_MASK) != info->tos) {
-		if (!skb_make_writable(skb, sizeof(struct iphdr)))
-			return NF_DROP;
-
-		iph      = ip_hdr(skb);
-		oldtos   = iph->tos;
-		iph->tos = (iph->tos & IPTOS_PREC_MASK) | info->tos;
-		csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
-	}
-
-	return XT_CONTINUE;
-}
-
-static bool tos_tg_check_v0(const struct xt_tgchk_param *par)
-{
-	const struct ipt_tos_target_info *info = par->targinfo;
-	const uint8_t tos = info->tos;
-
-	if (tos != IPTOS_LOWDELAY && tos != IPTOS_THROUGHPUT &&
-	    tos != IPTOS_RELIABILITY && tos != IPTOS_MINCOST &&
-	    tos != IPTOS_NORMALSVC) {
-		printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
-		return false;
-	}
-
-	return true;
-}
-
-static unsigned int
 tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct xt_tos_target_info *info = par->targinfo;
@@ -168,16 +132,6 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 	},
 	{
 		.name		= "TOS",
-		.revision	= 0,
-		.family		= NFPROTO_IPV4,
-		.table		= "mangle",
-		.target		= tos_tg_v0,
-		.targetsize	= sizeof(struct ipt_tos_target_info),
-		.checkentry	= tos_tg_check_v0,
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "TOS",
 		.revision	= 1,
 		.family		= NFPROTO_IPV4,
 		.table		= "mangle",
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index c3f8085..0280d3a 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -15,7 +15,6 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_dscp.h>
-#include <linux/netfilter_ipv4/ipt_tos.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: DSCP/TOS field match");
@@ -55,14 +54,6 @@ static bool dscp_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static bool
-tos_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const struct ipt_tos_info *info = par->matchinfo;
-
-	return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
-}
-
 static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct xt_tos_match_info *info = par->matchinfo;
@@ -94,14 +85,6 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 	},
 	{
 		.name		= "tos",
-		.revision	= 0,
-		.family		= NFPROTO_IPV4,
-		.match		= tos_mt_v0,
-		.matchsize	= sizeof(struct ipt_tos_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "tos",
 		.revision	= 1,
 		.family		= NFPROTO_IPV4,
 		.match		= tos_mt,

^ permalink raw reply related

* netfilter 01/31: nf_conntrack: add SCTP support for SO_ORIGINAL_DST
From: Patrick McHardy @ 2009-09-10 16:11 UTC (permalink / raw)
  To: davem; +Cc: netdev, Patrick McHardy, netfilter-devel
In-Reply-To: <20090910161142.31179.5256.sendpatchset@x2.localnet>

commit 549812799c8495451e71ebd9f6a862b33120a35a
Author: Rafael Laufer <rlaufer@cs.ucla.edu>
Date:   Mon Aug 10 10:08:27 2009 +0200

    netfilter: nf_conntrack: add SCTP support for SO_ORIGINAL_DST
    
    Signed-off-by: Patrick McHardy <kaber@trash.net>

diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 7d2ead7..05a9bc8 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -256,11 +256,11 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 	tuple.dst.u3.ip = inet->daddr;
 	tuple.dst.u.tcp.port = inet->dport;
 	tuple.src.l3num = PF_INET;
-	tuple.dst.protonum = IPPROTO_TCP;
+	tuple.dst.protonum = sk->sk_protocol;
 
-	/* We only do TCP at the moment: is there a better way? */
-	if (strcmp(sk->sk_prot->name, "TCP")) {
-		pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n");
+	/* We only do TCP and SCTP at the moment: is there a better way? */
+	if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
+		pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
 		return -ENOPROTOOPT;
 	}
 

^ permalink raw reply related

* netfilter 00/31: netfilter 2.6.32 update
From: Patrick McHardy @ 2009-09-10 16:11 UTC (permalink / raw)
  To: davem; +Cc: netdev, Patrick McHardy, netfilter-devel

Hi Dave,

following is my netfilter update for 2.6.32, containing:

- the scheduled removal of old x_tables match and target revisions from Jan

- the scheduled removal of old redirecting ip_tables header files from Jan

- x_tables cleanups and smaller improvements from Jan

- SCTP support for SO_ORIGINAL_DST from Rafael Laufer

- handling of ICMPv6 messages in IPVS from Julius Volz

- a patch to log packets dropped by conntrack helpers from myself

- patches to constify netlink message attributes in netfilter from myself

- a fix for bridge netfilter in_device refcount leaks from Eric

- a fix for conntrack cleanup in non-init namespaces from Alexey

- a fix for an ebt_ulog inverted return value from myself

- a fix for atomic operations in IPVS from Simon

- a fix for a read outside array bounds in ip6t_eui from myself

- a fix for inverted logic for persistent NAT mappings from Maximilian Engelhardt

Most of the fixes are for regressions, I'll pass all those on to -stable
once the patches hit mainline.

Please apply or pull from:

git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6.git master

Thanks!


 Documentation/feature-removal-schedule.txt     |   25 ----
 include/linux/netfilter/nfnetlink.h            |    3 +-
 include/linux/netfilter/x_tables.h             |    4 +-
 include/linux/netfilter/xt_CONNMARK.h          |    6 -
 include/linux/netfilter/xt_MARK.h              |   17 ---
 include/linux/netfilter/xt_connmark.h          |    5 -
 include/linux/netfilter/xt_conntrack.h         |   36 -----
 include/linux/netfilter/xt_mark.h              |    5 -
 include/linux/netfilter_arp/arp_tables.h       |    2 +-
 include/linux/netfilter_bridge/ebtables.h      |    2 +-
 include/linux/netfilter_ipv4/Kbuild            |   32 -----
 include/linux/netfilter_ipv4/ip_tables.h       |    2 +-
 include/linux/netfilter_ipv4/ipt_CLASSIFY.h    |    7 -
 include/linux/netfilter_ipv4/ipt_CONNMARK.h    |   19 ---
 include/linux/netfilter_ipv4/ipt_DSCP.h        |   18 ---
 include/linux/netfilter_ipv4/ipt_ECN.h         |    4 +-
 include/linux/netfilter_ipv4/ipt_MARK.h        |   18 ---
 include/linux/netfilter_ipv4/ipt_NFQUEUE.h     |   16 ---
 include/linux/netfilter_ipv4/ipt_TCPMSS.h      |    9 --
 include/linux/netfilter_ipv4/ipt_TOS.h         |   12 --
 include/linux/netfilter_ipv4/ipt_comment.h     |   10 --
 include/linux/netfilter_ipv4/ipt_connbytes.h   |   18 ---
 include/linux/netfilter_ipv4/ipt_connmark.h    |    7 -
 include/linux/netfilter_ipv4/ipt_conntrack.h   |   28 ----
 include/linux/netfilter_ipv4/ipt_dccp.h        |   15 --
 include/linux/netfilter_ipv4/ipt_dscp.h        |   21 ---
 include/linux/netfilter_ipv4/ipt_ecn.h         |    4 +-
 include/linux/netfilter_ipv4/ipt_esp.h         |   10 --
 include/linux/netfilter_ipv4/ipt_hashlimit.h   |   14 --
 include/linux/netfilter_ipv4/ipt_helper.h      |    7 -
 include/linux/netfilter_ipv4/ipt_iprange.h     |   21 ---
 include/linux/netfilter_ipv4/ipt_length.h      |    7 -
 include/linux/netfilter_ipv4/ipt_limit.h       |    8 -
 include/linux/netfilter_ipv4/ipt_mac.h         |    7 -
 include/linux/netfilter_ipv4/ipt_mark.h        |    9 --
 include/linux/netfilter_ipv4/ipt_multiport.h   |   15 --
 include/linux/netfilter_ipv4/ipt_owner.h       |   20 ---
 include/linux/netfilter_ipv4/ipt_physdev.h     |   17 ---
 include/linux/netfilter_ipv4/ipt_pkttype.h     |    7 -
 include/linux/netfilter_ipv4/ipt_policy.h      |   23 ----
 include/linux/netfilter_ipv4/ipt_recent.h      |   21 ---
 include/linux/netfilter_ipv4/ipt_sctp.h        |  105 ---------------
 include/linux/netfilter_ipv4/ipt_state.h       |   15 --
 include/linux/netfilter_ipv4/ipt_string.h      |   10 --
 include/linux/netfilter_ipv4/ipt_tcpmss.h      |    7 -
 include/linux/netfilter_ipv4/ipt_tos.h         |   13 --
 include/linux/netfilter_ipv6/Kbuild            |   12 +--
 include/linux/netfilter_ipv6/ip6_tables.h      |    2 +-
 include/linux/netfilter_ipv6/ip6t_MARK.h       |    9 --
 include/linux/netfilter_ipv6/ip6t_esp.h        |   10 --
 include/linux/netfilter_ipv6/ip6t_length.h     |    8 -
 include/linux/netfilter_ipv6/ip6t_limit.h      |    8 -
 include/linux/netfilter_ipv6/ip6t_mac.h        |    7 -
 include/linux/netfilter_ipv6/ip6t_mark.h       |    9 --
 include/linux/netfilter_ipv6/ip6t_multiport.h  |   14 --
 include/linux/netfilter_ipv6/ip6t_owner.h      |   18 ---
 include/linux/netfilter_ipv6/ip6t_physdev.h    |   17 ---
 include/linux/netfilter_ipv6/ip6t_policy.h     |   23 ----
 include/linux/netlink.h                        |   15 +-
 include/net/netfilter/nf_nat_core.h            |    2 +-
 include/net/netlink.h                          |    4 +-
 include/net/rtnetlink.h                        |    2 +-
 net/bridge/br_netfilter.c                      |    2 +-
 net/bridge/netfilter/ebt_log.c                 |   29 +---
 net/bridge/netfilter/ebt_ulog.c                |    2 +-
 net/bridge/netfilter/ebtable_broute.c          |    2 +-
 net/bridge/netfilter/ebtable_filter.c          |    8 +-
 net/bridge/netfilter/ebtable_nat.c             |    6 +-
 net/bridge/netfilter/ebtables.c                |   13 +-
 net/ipv4/netfilter/arp_tables.c                |   47 +++++--
 net/ipv4/netfilter/arptable_filter.c           |    4 +-
 net/ipv4/netfilter/ip_tables.c                 |   51 +++++---
 net/ipv4/netfilter/iptable_filter.c            |   10 +-
 net/ipv4/netfilter/iptable_mangle.c            |   16 +-
 net/ipv4/netfilter/iptable_raw.c               |   10 +-
 net/ipv4/netfilter/iptable_security.c          |   12 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |   22 ++--
 net/ipv4/netfilter/nf_nat_core.c               |    8 +-
 net/ipv4/netfilter/nf_nat_rule.c               |    6 +-
 net/ipv4/netfilter/nf_nat_standalone.c         |    8 +-
 net/ipv6/netfilter/ip6_tables.c                |   48 +++++--
 net/ipv6/netfilter/ip6t_eui64.c                |    9 +-
 net/ipv6/netfilter/ip6table_filter.c           |   10 +-
 net/ipv6/netfilter/ip6table_mangle.c           |   16 +-
 net/ipv6/netfilter/ip6table_raw.c              |   10 +-
 net/ipv6/netfilter/ip6table_security.c         |   12 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |   18 ++-
 net/netfilter/ipvs/ip_vs_core.c                |   29 +++--
 net/netfilter/ipvs/ip_vs_wrr.c                 |    7 +-
 net/netfilter/nf_conntrack_core.c              |    8 +-
 net/netfilter/nf_conntrack_netlink.c           |   54 +++++---
 net/netfilter/nfnetlink.c                      |    2 +-
 net/netfilter/nfnetlink_log.c                  |    6 +-
 net/netfilter/nfnetlink_queue.c                |    9 +-
 net/netfilter/x_tables.c                       |    7 +-
 net/netfilter/xt_CONNMARK.c                    |  134 ++------------------
 net/netfilter/xt_DSCP.c                        |   46 -------
 net/netfilter/xt_MARK.c                        |  163 ++----------------------
 net/netfilter/xt_connmark.c                    |  101 ++-------------
 net/netfilter/xt_conntrack.c                   |  155 +----------------------
 net/netfilter/xt_dscp.c                        |   17 ---
 net/netfilter/xt_iprange.c                     |   45 +------
 net/netfilter/xt_mark.c                        |   86 ++-----------
 net/netfilter/xt_osf.c                         |    6 +-
 net/netfilter/xt_owner.c                       |  130 ++-----------------
 net/netlink/af_netlink.c                       |    2 +-
 net/sched/act_api.c                            |    2 +-
 107 files changed, 373 insertions(+), 1856 deletions(-)
 delete mode 100644 include/linux/netfilter_ipv4/ipt_CLASSIFY.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_CONNMARK.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_DSCP.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_MARK.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_NFQUEUE.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_TCPMSS.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_TOS.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_comment.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_connbytes.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_connmark.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_conntrack.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_dccp.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_dscp.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_esp.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_hashlimit.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_helper.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_iprange.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_length.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_limit.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_mac.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_mark.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_multiport.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_owner.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_physdev.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_pkttype.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_policy.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_recent.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_sctp.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_state.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_string.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_tcpmss.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_tos.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_MARK.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_esp.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_length.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_limit.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_mac.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_mark.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_multiport.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_owner.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_physdev.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_policy.h

Alexey Dobriyan (1):
      netfilter: nf_conntrack: netns fix re reliable conntrack event delivery

Eric Dumazet (1):
      netfilter: bridge: refcount fix

Jan Engelhardt (19):
      netfilter: xtables: remove xt_TOS v0
      netfilter: xtables: remove xt_CONNMARK v0
      netfilter: xtables: remove xt_MARK v0, v1
      netfilter: xtables: remove xt_connmark v0
      netfilter: xtables: remove xt_conntrack v0
      netfilter: xtables: remove xt_iprange v0
      netfilter: xtables: remove xt_mark v0
      netfilter: xtables: remove xt_owner v0
      netfilter: xtables: remove redirecting header files
      netfilter: conntrack: switch hook PFs to nfproto
      netfilter: xtables: switch hook PFs to nfproto
      netfilter: xtables: switch table AFs to nfproto
      netfilter: xtables: realign struct xt_target_param
      netfilter: iptables: remove unused datalen variable
      netfilter: xtables: use memcmp in unconditional check
      netfilter: xtables: ignore unassigned hooks in check_entry_size_and_hooks
      netfilter: xtables: check for unconditionality of policies
      netfilter: xtables: check for standard verdicts in policies
      netfilter: xtables: mark initial tables constant

Julius Volz (1):
      IPVS: Add handling of incoming ICMPV6 messages

Maximilian Engelhardt (1):
      netfilter: nf_nat: fix inverted logic for persistent NAT mappings

Patrick McHardy (6):
      Merge branch 'master' of git://dev.medozas.de/linux
      netfilter: nf_conntrack: log packets dropped by helpers
      netlink: constify nlmsghdr arguments
      netfilter: nfnetlink: constify message attributes and headers
      netfilter: ip6t_eui: fix read outside array bounds
      netfilter: ebt_ulog: fix checkentry return value

Rafael Laufer (1):
      netfilter: nf_conntrack: add SCTP support for SO_ORIGINAL_DST

Simon Horman (1):
      ipvs: Use atomic operations atomicly

Tobias Klauser (1):
      netfilter: ebtables: Use %pM conversion specifier

^ permalink raw reply

* Re: [PATCH] ipv6: Add IFA_F_DADFAILED flag
From: Jens Rosenboom @ 2009-09-10 16:11 UTC (permalink / raw)
  To: Brian Haley; +Cc: david Miller, netdev@vger.kernel.org, YOSHIFUJI Hideaki
In-Reply-To: <4AA84B3C.4000401@hp.com>

On Wed, 2009-09-09 at 20:41 -0400, Brian Haley wrote:
> Jens Rosenboom wrote:
> > On Tue, 2009-09-08 at 11:18 -0400, Brian Haley wrote:
> >> Jens Rosenboom wrote:
> >>>> --- a/net/ipv6/addrconf.c
> >>>> +++ b/net/ipv6/addrconf.c
> >>>> @@ -1376,7 +1376,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
> >>>>  	if (ifp->flags&IFA_F_PERMANENT) {
> >>>>  		spin_lock_bh(&ifp->lock);
> >>>>  		addrconf_del_timer(ifp);
> >>>> -		ifp->flags |= IFA_F_TENTATIVE;
> >>>> +		ifp->flags |= IFA_F_DADFAILED;
> >>> I think you still have to set IFA_F_TENTATIVE here, too, otherwise
> >>> ipv6_dev_get_saddr() will use this address. 		
> >> The tentative bit is still set from when this address was added back
> >> in ipv6_add_addr() from what I can tell, re-setting it here is actually
> >> unnecessary.  At least /sbin/ip was still showing it set during my
> >> testing.
> > 
> > There is the possibility of a race when the dad_timer expires at the
> > same time the NA triggering DAD failure is received. There isn't a big
> > chance to see that during real world testing, though.
> 
> Ok, how does this look?  I changed it to set the tentative flag as it did
> before, plus clear the dad_failed flag if the device got restarted,
> triggering DAD to happen again for any tentative address, that was an
> oversight on my part.

Looks fine to me so far, can you also send the patch for userspace? That
would making testing this a bit easier. ;-)

> I'd still like to know if using this last ifa_flag is going to be an issue,
> I actually finished a similar patch that uses a new IFA_ADDRFLAGS structure
> to pass in/out this additional info.

IMHO you should stick to this version, if any future feature needs
another bit, it may happen also to need two of them and so will need a
new structure then anyway, but why not keep it simple for now?


^ permalink raw reply

* Re: [PATCH 00/12] Gigaset driver patches for 2.6.32
From: Tilman Schmidt @ 2009-09-10 14:05 UTC (permalink / raw)
  To: David Miller, dwalker-zu3NM2574RrQT0dZR+AlfA
  Cc: hjlipp-S0/GAf8tV78, netdev-u79uwXL29TY76Z2rM5mHXA,
	tilman-ZTO5kqT2PaM, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	i4ldeveloper-JX7+OpRa80SjiSfgN6Y1Ib39b6g2fGNp

Von: "David Miller" <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
An: dwalker-zu3NM2574RrQT0dZR+AlfA@public.gmane.org
Cc: tilman-ZTO5kqT2PaM@public.gmane.org; linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org; i4ldeveloper-JX7+OpRa80SjiSfgN6Y1Ib39b6g2fGNp@public.gmane.org; hjlipp-S0/GAf8tV78@public.gmane.org
Gesendet: 10.09.09 05:51
Betreff: Re: [PATCH 00/12] Gigaset driver patches for 2.6.32

> From: Daniel Walker <dwalker-zu3NM2574RrQT0dZR+AlfA@public.gmane.org>
> Date: Wed, 09 Sep 2009 20:47:57 -0700

> On Thu, 2009-09-10 at 00:32 +0200, Tilman Schmidt wrote:
>> Daniel Walker wrote 07.09.09 16:30:
>> > Yeah, it looks like the whole file needs a checkpatch clean up..
>> Sounds
>> like your not willing to do that?
>> 
>> It's not a question of willingness. You may notice I did a lot of
>> cleanup work already. But it's very time consuming work, and there has
>> been more important work to attend to first.
>> 
>> > Usually if a checkpatch cleanup comes
>> first prior to all your other changes , it doesn't usually cloud the
>> rest of the changes..
>> 
>> Sure. But that would mean postponing the merging of bugfixes until
>> someone finds the time to do a complete checkpatch cleanup of the
>> affected code. I don't think that's a sensible approach.
> 
> You shouldn't be adding any new checkpatch errors, but you currently
> are .. Just clean up the individual patches w/o the entire gigaset
> driver, that should be do-able (it's even a basic submission
> requirement). The other issue is that your adding new files which aren't
> clean, those can certainly be cleaned up.

> Right, this is a very reasonable request.

Then so be it. Will do as soon as time permits.

T.

^ permalink raw reply

* Re: [PATCH RFC] tun: export underlying socket
From: Michael S. Tsirkin @ 2009-09-10 13:27 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev, herbert
In-Reply-To: <4AA8FCD9.3040600@gmail.com>

On Thu, Sep 10, 2009 at 03:19:21PM +0200, Eric Dumazet wrote:
> Michael S. Tsirkin a écrit :
> > Tun device looks similar to a packet socket
> > in that both pass complete frames from/to userspace.
> > 
> > This patch fills in enough fields in the socket underlying tun driver
> > to support sendmsg/recvmsg operations, and exports access to this socket
> > to modules.
> > 
> > This way, code using raw sockets to inject packets
> > into a physical device, can support injecting
> > packets into host network stack almost without modification.
> > 
> > First user of this interface will be vhost virtualization
> > accelerator.
> > 
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > ---
> > 
> > This patch is on top of net-next master.
> > An alternative approach would be to add an ioctl to tun, to export the
> > underlying socket to userspace: a uniform way to work with a network
> > device and the host stack might be useful there, as well.
> > Kernel users could then do sockfd_lookup to get the socket.
> > I decided against it for now as it requires more code.
> > Please comment.
> > 
> >  drivers/net/tun.c      |   78 +++++++++++++++++++++++++++++++++++++++++++----
> >  include/linux/if_tun.h |   14 ++++++++
> >  2 files changed, 85 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> > index 589a44a..76f5faa 100644
> > --- a/drivers/net/tun.c
> > +++ b/drivers/net/tun.c
> > @@ -151,6 +151,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
> >  	err = 0;
> >  	tfile->tun = tun;
> >  	tun->tfile = tfile;
> > +	tun->socket.file = file;
> >  	dev_hold(tun->dev);
> >  	sock_hold(tun->socket.sk);
> >  	atomic_inc(&tfile->count);
> > @@ -165,6 +166,7 @@ static void __tun_detach(struct tun_struct *tun)
> >  	/* Detach from net device */
> >  	netif_tx_lock_bh(tun->dev);
> >  	tun->tfile = NULL;
> > +	tun->socket.file = NULL;
> >  	netif_tx_unlock_bh(tun->dev);
> >  
> >  	/* Drop read queue */
> > @@ -750,7 +752,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
> >  	len = min_t(int, skb->len, len);
> >  
> >  	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
> > -	total += len;
> > +	total += skb->len;
> 
> Why are you changing this ?

Because this function is now used in both read() and recvmsg(), and
recvmsg with MSG_TRUNC reports the full packet length.

> This is very strange that read() can return
> a bigger length than what was asked by user...

Of course. Note how tun_chr_aio_read below does
	ret = min_t(ssize_t, ret, count);
so there's no change for read() at all. OK?

> >  
> >  	tun->dev->stats.tx_packets++;
> >  	tun->dev->stats.tx_bytes += len;
> > @@ -758,12 +760,10 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
> >  	return total;
> >  }
> >  
> > -static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
> > -			    unsigned long count, loff_t pos)
> > +static ssize_t tun_do_read(struct tun_struct *tun,
> > +			   struct kiocb *iocb, const struct iovec *iv,
> > +			   unsigned long count, int noblock)
> >  {
> > -	struct file *file = iocb->ki_filp;
> > -	struct tun_file *tfile = file->private_data;
> > -	struct tun_struct *tun = __tun_get(tfile);
> >  	DECLARE_WAITQUEUE(wait, current);
> >  	struct sk_buff *skb;
> >  	ssize_t len, ret = 0;
> > @@ -785,7 +785,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
> >  
> >  		/* Read frames from the queue */
> >  		if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
> > -			if (file->f_flags & O_NONBLOCK) {
> > +			if (noblock) {
> >  				ret = -EAGAIN;
> >  				break;
> >  			}
> > @@ -813,6 +813,21 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
> >  	remove_wait_queue(&tun->socket.wait, &wait);
> >  
> >  out:
> > +	return ret;
> > +}
> > +
> > +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
> > +			    unsigned long count, loff_t pos)
> > +{
> > +	struct file *file = iocb->ki_filp;
> > +	struct tun_file *tfile = file->private_data;
> > +	struct tun_struct *tun = __tun_get(tfile);
> > +	ssize_t ret;
> > +
> > +	if (!tun)
> > +		return -EBADFD;
> > +	ret = tun_do_read(tun, iocb, iv, count, file->f_flags & O_NONBLOCK);
> > +	ret = min_t(ssize_t, ret, count);
> >  	tun_put(tun);
> >  	return ret;
> >  }
> > @@ -865,6 +880,37 @@ static void tun_sock_destruct(struct sock *sk)
> >  	free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev);
> >  }
> >  
> > +static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
> > +		       struct msghdr *m, size_t total_len)
> > +{
> > +	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> > +	return tun_get_user(tun, m->msg_iov, total_len,
> > +			    m->msg_flags & MSG_DONTWAIT);
> > +}
> > +
> > +static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
> > +		       struct msghdr *m, size_t total_len,
> > +		       int flags)
> > +{
> > +	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> > +	int ret;
> > +	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
> > +		return -EINVAL;
> > +	ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
> > +			  flags & MSG_DONTWAIT);
> > +	if (ret > total_len) {
> > +		m->msg_flags |= MSG_TRUNC;
> > +		ret = flags & MSG_TRUNC ? ret : total_len;
> > +	}
> > +	return ret;
> > +}
> > +
> > +/* Ops structure to mimic raw sockets with tun */
> > +static const struct proto_ops tun_socket_ops = {
> > +	.sendmsg = tun_sendmsg,
> > +	.recvmsg = tun_recvmsg,
> > +};
> > +
> >  static struct proto tun_proto = {
> >  	.name		= "tun",
> >  	.owner		= THIS_MODULE,
> > @@ -982,6 +1028,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
> >  			goto err_free_dev;
> >  
> >  		init_waitqueue_head(&tun->socket.wait);
> > +		tun->socket.ops = &tun_socket_ops;
> >  		sock_init_data(&tun->socket, sk);
> >  		sk->sk_write_space = tun_sock_write_space;
> >  		sk->sk_sndbuf = INT_MAX;
> > @@ -1483,6 +1530,23 @@ static void tun_cleanup(void)
> >  	rtnl_link_unregister(&tun_link_ops);
> >  }
> >  
> > +/* Get an underlying socket object from tun file.  Returns error unless file is
> > + * attached to a device.  The returned object works like a packet socket, it
> > + * can be used for sock_sendmsg/sock_recvmsg.  The caller is responsible for
> > + * holding a reference to the file for as long as the socket is in use. */
> > +struct socket *tun_get_socket(struct file *file)
> > +{
> > +	struct tun_struct *tun;
> > +	if (file->f_op != &tun_fops)
> > +		return ERR_PTR(-EINVAL);
> > +	tun = tun_get(file);
> > +	if (!tun)
> > +		return ERR_PTR(-EBADFD);
> > +	tun_put(tun);
> > +	return &tun->socket;
> > +}
> > +EXPORT_SYMBOL_GPL(tun_get_socket);
> > +
> >  module_init(tun_init);
> >  module_exit(tun_cleanup);
> >  MODULE_DESCRIPTION(DRV_DESCRIPTION);
> > diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
> > index 3f5fd52..404abe0 100644
> > --- a/include/linux/if_tun.h
> > +++ b/include/linux/if_tun.h
> > @@ -86,4 +86,18 @@ struct tun_filter {
> >  	__u8   addr[0][ETH_ALEN];
> >  };
> >  
> > +#ifdef __KERNEL__
> > +#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
> > +struct socket *tun_get_socket(struct file *);
> > +#else
> > +#include <linux/err.h>
> > +#include <linux/errno.h>
> > +struct file;
> > +struct socket;
> > +static inline struct socket *tun_get_socket(struct file *f)
> > +{
> > +	return ERR_PTR(-EINVAL);
> > +}
> > +#endif /* CONFIG_TUN */
> > +#endif /* __KERNEL__ */
> >  #endif /* __IF_TUN_H */

^ permalink raw reply

* Re: [PATCH RFC] tun: export underlying socket
From: Eric Dumazet @ 2009-09-10 13:19 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: David Miller, m.s.tsirkin, netdev, herbert
In-Reply-To: <20090910125929.GA32593@redhat.com>

Michael S. Tsirkin a écrit :
> Tun device looks similar to a packet socket
> in that both pass complete frames from/to userspace.
> 
> This patch fills in enough fields in the socket underlying tun driver
> to support sendmsg/recvmsg operations, and exports access to this socket
> to modules.
> 
> This way, code using raw sockets to inject packets
> into a physical device, can support injecting
> packets into host network stack almost without modification.
> 
> First user of this interface will be vhost virtualization
> accelerator.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> 
> This patch is on top of net-next master.
> An alternative approach would be to add an ioctl to tun, to export the
> underlying socket to userspace: a uniform way to work with a network
> device and the host stack might be useful there, as well.
> Kernel users could then do sockfd_lookup to get the socket.
> I decided against it for now as it requires more code.
> Please comment.
> 
>  drivers/net/tun.c      |   78 +++++++++++++++++++++++++++++++++++++++++++----
>  include/linux/if_tun.h |   14 ++++++++
>  2 files changed, 85 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 589a44a..76f5faa 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -151,6 +151,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
>  	err = 0;
>  	tfile->tun = tun;
>  	tun->tfile = tfile;
> +	tun->socket.file = file;
>  	dev_hold(tun->dev);
>  	sock_hold(tun->socket.sk);
>  	atomic_inc(&tfile->count);
> @@ -165,6 +166,7 @@ static void __tun_detach(struct tun_struct *tun)
>  	/* Detach from net device */
>  	netif_tx_lock_bh(tun->dev);
>  	tun->tfile = NULL;
> +	tun->socket.file = NULL;
>  	netif_tx_unlock_bh(tun->dev);
>  
>  	/* Drop read queue */
> @@ -750,7 +752,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
>  	len = min_t(int, skb->len, len);
>  
>  	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
> -	total += len;
> +	total += skb->len;

Why are you changing this ? This is very strange that read() can return
a bigger length than what was asked by user...

>  
>  	tun->dev->stats.tx_packets++;
>  	tun->dev->stats.tx_bytes += len;
> @@ -758,12 +760,10 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
>  	return total;
>  }
>  
> -static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
> -			    unsigned long count, loff_t pos)
> +static ssize_t tun_do_read(struct tun_struct *tun,
> +			   struct kiocb *iocb, const struct iovec *iv,
> +			   unsigned long count, int noblock)
>  {
> -	struct file *file = iocb->ki_filp;
> -	struct tun_file *tfile = file->private_data;
> -	struct tun_struct *tun = __tun_get(tfile);
>  	DECLARE_WAITQUEUE(wait, current);
>  	struct sk_buff *skb;
>  	ssize_t len, ret = 0;
> @@ -785,7 +785,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
>  
>  		/* Read frames from the queue */
>  		if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
> -			if (file->f_flags & O_NONBLOCK) {
> +			if (noblock) {
>  				ret = -EAGAIN;
>  				break;
>  			}
> @@ -813,6 +813,21 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
>  	remove_wait_queue(&tun->socket.wait, &wait);
>  
>  out:
> +	return ret;
> +}
> +
> +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
> +			    unsigned long count, loff_t pos)
> +{
> +	struct file *file = iocb->ki_filp;
> +	struct tun_file *tfile = file->private_data;
> +	struct tun_struct *tun = __tun_get(tfile);
> +	ssize_t ret;
> +
> +	if (!tun)
> +		return -EBADFD;
> +	ret = tun_do_read(tun, iocb, iv, count, file->f_flags & O_NONBLOCK);
> +	ret = min_t(ssize_t, ret, count);
>  	tun_put(tun);
>  	return ret;
>  }
> @@ -865,6 +880,37 @@ static void tun_sock_destruct(struct sock *sk)
>  	free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev);
>  }
>  
> +static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
> +		       struct msghdr *m, size_t total_len)
> +{
> +	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> +	return tun_get_user(tun, m->msg_iov, total_len,
> +			    m->msg_flags & MSG_DONTWAIT);
> +}
> +
> +static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
> +		       struct msghdr *m, size_t total_len,
> +		       int flags)
> +{
> +	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> +	int ret;
> +	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
> +		return -EINVAL;
> +	ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
> +			  flags & MSG_DONTWAIT);
> +	if (ret > total_len) {
> +		m->msg_flags |= MSG_TRUNC;
> +		ret = flags & MSG_TRUNC ? ret : total_len;
> +	}
> +	return ret;
> +}
> +
> +/* Ops structure to mimic raw sockets with tun */
> +static const struct proto_ops tun_socket_ops = {
> +	.sendmsg = tun_sendmsg,
> +	.recvmsg = tun_recvmsg,
> +};
> +
>  static struct proto tun_proto = {
>  	.name		= "tun",
>  	.owner		= THIS_MODULE,
> @@ -982,6 +1028,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
>  			goto err_free_dev;
>  
>  		init_waitqueue_head(&tun->socket.wait);
> +		tun->socket.ops = &tun_socket_ops;
>  		sock_init_data(&tun->socket, sk);
>  		sk->sk_write_space = tun_sock_write_space;
>  		sk->sk_sndbuf = INT_MAX;
> @@ -1483,6 +1530,23 @@ static void tun_cleanup(void)
>  	rtnl_link_unregister(&tun_link_ops);
>  }
>  
> +/* Get an underlying socket object from tun file.  Returns error unless file is
> + * attached to a device.  The returned object works like a packet socket, it
> + * can be used for sock_sendmsg/sock_recvmsg.  The caller is responsible for
> + * holding a reference to the file for as long as the socket is in use. */
> +struct socket *tun_get_socket(struct file *file)
> +{
> +	struct tun_struct *tun;
> +	if (file->f_op != &tun_fops)
> +		return ERR_PTR(-EINVAL);
> +	tun = tun_get(file);
> +	if (!tun)
> +		return ERR_PTR(-EBADFD);
> +	tun_put(tun);
> +	return &tun->socket;
> +}
> +EXPORT_SYMBOL_GPL(tun_get_socket);
> +
>  module_init(tun_init);
>  module_exit(tun_cleanup);
>  MODULE_DESCRIPTION(DRV_DESCRIPTION);
> diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
> index 3f5fd52..404abe0 100644
> --- a/include/linux/if_tun.h
> +++ b/include/linux/if_tun.h
> @@ -86,4 +86,18 @@ struct tun_filter {
>  	__u8   addr[0][ETH_ALEN];
>  };
>  
> +#ifdef __KERNEL__
> +#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
> +struct socket *tun_get_socket(struct file *);
> +#else
> +#include <linux/err.h>
> +#include <linux/errno.h>
> +struct file;
> +struct socket;
> +static inline struct socket *tun_get_socket(struct file *f)
> +{
> +	return ERR_PTR(-EINVAL);
> +}
> +#endif /* CONFIG_TUN */
> +#endif /* __KERNEL__ */
>  #endif /* __IF_TUN_H */


^ permalink raw reply

* [PATCH] dca: registering requesters in multiple dca domains
From: Maciej Sosnowski @ 2009-09-10 13:05 UTC (permalink / raw)
  To: dan.j.williams; +Cc: linux-kernel, netdev

This patch enables DCA support on multiple-IOH/multiple-IIO architectures.
It modifies dca module by replacing single dca_providers list
with dca_domains list, each domain containing separate list of providers.
This approach lets dca driver manage multiple domains, i.e. sets of providers
and requesters mapped back to the same PCI root complex device.
The driver takes care to register each requester to a provider
from the same domain.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
---

 drivers/dca/dca-core.c |  122 +++++++++++++++++++++++++++++++++++++++++++-----
 drivers/dma/ioat/pci.c |    2 -
 include/linux/dca.h    |   11 ++++
 3 files changed, 120 insertions(+), 15 deletions(-)

diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 25b743a..7e318de 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -28,7 +28,7 @@ #include <linux/notifier.h>
 #include <linux/device.h>
 #include <linux/dca.h>
 
-#define DCA_VERSION "1.8"
+#define DCA_VERSION "1.12.1"
 
 MODULE_VERSION(DCA_VERSION);
 MODULE_LICENSE("GPL");
@@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation");
 
 static DEFINE_SPINLOCK(dca_lock);
 
-static LIST_HEAD(dca_providers);
+static LIST_HEAD(dca_domains);
 
-static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
 {
-	struct dca_provider *dca, *ret = NULL;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_bus *bus = pdev->bus;
 
-	list_for_each_entry(dca, &dca_providers, node) {
-		if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
-			ret = dca;
-			break;
-		}
+	while (bus->parent)
+		bus = bus->parent;
+
+	return bus;
+}
+
+static struct dca_domain *dca_allocate_domain(struct pci_bus *rc)
+{
+	struct dca_domain *domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_NOWAIT);
+	if (!domain)
+		return NULL;
+
+	INIT_LIST_HEAD(&domain->dca_providers);
+	domain->pci_rc = rc;
+
+	return domain;
+}
+
+static void dca_free_domain(struct dca_domain *domain)
+{
+	list_del(&domain->node);
+	kfree(domain);
+}
+
+static struct dca_domain *dca_find_domain(struct pci_bus *rc)
+{
+	struct dca_domain *domain;
+
+	list_for_each_entry(domain, &dca_domains, node)
+		if (domain->pci_rc == rc)
+			return domain;
+
+	return NULL;
+}
+
+static struct dca_domain *dca_get_domain(struct device *dev)
+{
+	struct pci_bus *rc;
+	struct dca_domain *domain;
+
+	rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(rc);
+
+	if (!domain) {
+		domain = dca_allocate_domain(rc);
+		if (domain)
+			list_add(&domain->node, &dca_domains);
+	}
+
+	return domain;
+}
+
+static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+{
+	struct dca_provider *dca;
+	struct pci_bus *rc;
+	struct dca_domain *domain;
+
+	if (dev) {
+		rc = dca_pci_rc_from_dev(dev);
+		domain = dca_find_domain(rc);
+		if (!domain)
+			return NULL;
+	} else {
+		if (!list_empty(&dca_domains))
+			domain = list_first_entry(&dca_domains,
+						  struct dca_domain,
+						  node);
+		else
+			return NULL;
 	}
 
-	return ret;
+	list_for_each_entry(dca, &domain->dca_providers, node)
+		if ((!dev) || (dca->ops->dev_managed(dca, dev)))
+			return dca;
+
+	return NULL;
 }
 
 /**
@@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev
 	struct dca_provider *dca;
 	int err, slot = -ENODEV;
 	unsigned long flags;
+	struct pci_bus *pci_rc;
+	struct dca_domain *domain;
 
 	if (!dev)
 		return -EFAULT;
@@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev
 		return -EEXIST;
 	}
 
-	list_for_each_entry(dca, &dca_providers, node) {
+	pci_rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(pci_rc);
+	if (!domain) {
+		spin_unlock_irqrestore(&dca_lock, flags);
+		return -ENODEV;
+	}
+
+	list_for_each_entry(dca, &domain->dca_providers, node) {
 		slot = dca->ops->add_requester(dca, dev);
 		if (slot >= 0)
 			break;
@@ -222,13 +303,19 @@ int register_dca_provider(struct dca_pro
 {
 	int err;
 	unsigned long flags;
+	struct dca_domain *domain;
 
 	err = dca_sysfs_add_provider(dca, dev);
 	if (err)
 		return err;
 
 	spin_lock_irqsave(&dca_lock, flags);
-	list_add(&dca->node, &dca_providers);
+	domain = dca_get_domain(dev);
+	if (!domain) {
+		spin_unlock_irqrestore(&dca_lock, flags);
+		return -ENODEV;
+	}
+	list_add(&dca->node, &domain->dca_providers);
 	spin_unlock_irqrestore(&dca_lock, flags);
 
 	blocking_notifier_call_chain(&dca_provider_chain,
@@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider)
  * unregister_dca_provider - remove a dca provider
  * @dca - struct created by alloc_dca_provider()
  */
-void unregister_dca_provider(struct dca_provider *dca)
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
 {
 	unsigned long flags;
+	struct pci_bus *pci_rc;
+	struct dca_domain *domain;
 
 	blocking_notifier_call_chain(&dca_provider_chain,
 				     DCA_PROVIDER_REMOVE, NULL);
 
 	spin_lock_irqsave(&dca_lock, flags);
+
 	list_del(&dca->node);
+
+	pci_rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(pci_rc);
+	if (list_empty(&domain->dca_providers))
+		dca_free_domain(domain);
+
 	spin_unlock_irqrestore(&dca_lock, flags);
 
 	dca_sysfs_remove_provider(dca);
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index c788fa2..d545fae 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -175,7 +175,7 @@ static void __devexit ioat_remove(struct
 
 	dev_err(&pdev->dev, "Removing dma and dca services\n");
 	if (device->dca) {
-		unregister_dca_provider(device->dca);
+		unregister_dca_provider(device->dca, &pdev->dev);
 		free_dca_provider(device->dca);
 		device->dca = NULL;
 	}
diff --git a/include/linux/dca.h b/include/linux/dca.h
index 9c20c7e..d27a7a0 100644
--- a/include/linux/dca.h
+++ b/include/linux/dca.h
@@ -20,6 +20,9 @@
  */
 #ifndef DCA_H
 #define DCA_H
+
+#include <linux/pci.h>
+
 /* DCA Provider API */
 
 /* DCA Notifier Interface */
@@ -36,6 +39,12 @@ struct dca_provider {
 	int			 id;
 };
 
+struct dca_domain {
+	struct list_head	node;
+	struct list_head	dca_providers;
+	struct pci_bus		*pci_rc;
+};
+
 struct dca_ops {
 	int	(*add_requester)    (struct dca_provider *, struct device *);
 	int	(*remove_requester) (struct dca_provider *, struct device *);
@@ -47,7 +56,7 @@ struct dca_ops {
 struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
 void free_dca_provider(struct dca_provider *dca);
 int register_dca_provider(struct dca_provider *dca, struct device *dev);
-void unregister_dca_provider(struct dca_provider *dca);
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
 
 static inline void *dca_priv(struct dca_provider *dca)
 {


^ permalink raw reply related

* [PATCH RFC] tun: export underlying socket
From: Michael S. Tsirkin @ 2009-09-10 12:59 UTC (permalink / raw)
  To: David Miller, m.s.tsirkin; +Cc: mst, netdev, herbert

Tun device looks similar to a packet socket
in that both pass complete frames from/to userspace.

This patch fills in enough fields in the socket underlying tun driver
to support sendmsg/recvmsg operations, and exports access to this socket
to modules.

This way, code using raw sockets to inject packets
into a physical device, can support injecting
packets into host network stack almost without modification.

First user of this interface will be vhost virtualization
accelerator.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

This patch is on top of net-next master.
An alternative approach would be to add an ioctl to tun, to export the
underlying socket to userspace: a uniform way to work with a network
device and the host stack might be useful there, as well.
Kernel users could then do sockfd_lookup to get the socket.
I decided against it for now as it requires more code.
Please comment.

 drivers/net/tun.c      |   78 +++++++++++++++++++++++++++++++++++++++++++----
 include/linux/if_tun.h |   14 ++++++++
 2 files changed, 85 insertions(+), 7 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 589a44a..76f5faa 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -151,6 +151,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 	err = 0;
 	tfile->tun = tun;
 	tun->tfile = tfile;
+	tun->socket.file = file;
 	dev_hold(tun->dev);
 	sock_hold(tun->socket.sk);
 	atomic_inc(&tfile->count);
@@ -165,6 +166,7 @@ static void __tun_detach(struct tun_struct *tun)
 	/* Detach from net device */
 	netif_tx_lock_bh(tun->dev);
 	tun->tfile = NULL;
+	tun->socket.file = NULL;
 	netif_tx_unlock_bh(tun->dev);
 
 	/* Drop read queue */
@@ -750,7 +752,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
 	len = min_t(int, skb->len, len);
 
 	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
-	total += len;
+	total += skb->len;
 
 	tun->dev->stats.tx_packets++;
 	tun->dev->stats.tx_bytes += len;
@@ -758,12 +760,10 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
 	return total;
 }
 
-static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
-			    unsigned long count, loff_t pos)
+static ssize_t tun_do_read(struct tun_struct *tun,
+			   struct kiocb *iocb, const struct iovec *iv,
+			   unsigned long count, int noblock)
 {
-	struct file *file = iocb->ki_filp;
-	struct tun_file *tfile = file->private_data;
-	struct tun_struct *tun = __tun_get(tfile);
 	DECLARE_WAITQUEUE(wait, current);
 	struct sk_buff *skb;
 	ssize_t len, ret = 0;
@@ -785,7 +785,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 
 		/* Read frames from the queue */
 		if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
-			if (file->f_flags & O_NONBLOCK) {
+			if (noblock) {
 				ret = -EAGAIN;
 				break;
 			}
@@ -813,6 +813,21 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 	remove_wait_queue(&tun->socket.wait, &wait);
 
 out:
+	return ret;
+}
+
+static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
+			    unsigned long count, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct tun_file *tfile = file->private_data;
+	struct tun_struct *tun = __tun_get(tfile);
+	ssize_t ret;
+
+	if (!tun)
+		return -EBADFD;
+	ret = tun_do_read(tun, iocb, iv, count, file->f_flags & O_NONBLOCK);
+	ret = min_t(ssize_t, ret, count);
 	tun_put(tun);
 	return ret;
 }
@@ -865,6 +880,37 @@ static void tun_sock_destruct(struct sock *sk)
 	free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev);
 }
 
+static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *m, size_t total_len)
+{
+	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+	return tun_get_user(tun, m->msg_iov, total_len,
+			    m->msg_flags & MSG_DONTWAIT);
+}
+
+static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *m, size_t total_len,
+		       int flags)
+{
+	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+	int ret;
+	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
+		return -EINVAL;
+	ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
+			  flags & MSG_DONTWAIT);
+	if (ret > total_len) {
+		m->msg_flags |= MSG_TRUNC;
+		ret = flags & MSG_TRUNC ? ret : total_len;
+	}
+	return ret;
+}
+
+/* Ops structure to mimic raw sockets with tun */
+static const struct proto_ops tun_socket_ops = {
+	.sendmsg = tun_sendmsg,
+	.recvmsg = tun_recvmsg,
+};
+
 static struct proto tun_proto = {
 	.name		= "tun",
 	.owner		= THIS_MODULE,
@@ -982,6 +1028,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 			goto err_free_dev;
 
 		init_waitqueue_head(&tun->socket.wait);
+		tun->socket.ops = &tun_socket_ops;
 		sock_init_data(&tun->socket, sk);
 		sk->sk_write_space = tun_sock_write_space;
 		sk->sk_sndbuf = INT_MAX;
@@ -1483,6 +1530,23 @@ static void tun_cleanup(void)
 	rtnl_link_unregister(&tun_link_ops);
 }
 
+/* Get an underlying socket object from tun file.  Returns error unless file is
+ * attached to a device.  The returned object works like a packet socket, it
+ * can be used for sock_sendmsg/sock_recvmsg.  The caller is responsible for
+ * holding a reference to the file for as long as the socket is in use. */
+struct socket *tun_get_socket(struct file *file)
+{
+	struct tun_struct *tun;
+	if (file->f_op != &tun_fops)
+		return ERR_PTR(-EINVAL);
+	tun = tun_get(file);
+	if (!tun)
+		return ERR_PTR(-EBADFD);
+	tun_put(tun);
+	return &tun->socket;
+}
+EXPORT_SYMBOL_GPL(tun_get_socket);
+
 module_init(tun_init);
 module_exit(tun_cleanup);
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3f5fd52..404abe0 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -86,4 +86,18 @@ struct tun_filter {
 	__u8   addr[0][ETH_ALEN];
 };
 
+#ifdef __KERNEL__
+#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
+struct socket *tun_get_socket(struct file *);
+#else
+#include <linux/err.h>
+#include <linux/errno.h>
+struct file;
+struct socket;
+static inline struct socket *tun_get_socket(struct file *f)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif /* CONFIG_TUN */
+#endif /* __KERNEL__ */
 #endif /* __IF_TUN_H */
-- 
1.6.2.5

^ permalink raw reply related

* Re: [PATCH 2/3] ucc_geth: Rearrange some code to avoid forward declarations
From: Timur Tabi @ 2009-09-10 13:00 UTC (permalink / raw)
  To: Anton Vorontsov
  Cc: David Miller, Andy Fleming, Li Yang, Kumar Gala, netdev,
	linuxppc-dev
In-Reply-To: <20090910020140.GB31083@oksana.dev.rtsoft.ru>

Anton Vorontsov wrote:
> We'll need ugeth_disable() and ugeth_enable() calls earlier in the
> file, so rearrange some code to avoid forward declarations.
> 
> The patch doesn't contain any functional changes.
> 
> Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>

Acked-by: Timur Tabi <timur@freescale.com>

I'm generally not qualified to review ucc_geth patches, but I'm in favor of this one.  I hate forward declarations.

-- 
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

* radvd 1.5 released
From: Pekka Savola @ 2009-09-10 12:00 UTC (permalink / raw)
  To: netdev, radvd-announce-l

Hello,

A new version of radvd has been released.  This fixes two regressions 
introduced a couple of years back: radvd might end up segfaulting or 
infinite looping if cable is plugged on/off, or if the cable is off 
when starting and IgnoreIfMissing is configured, the interface might 
continue being ignored.

Special thanks to Reuben Hawkins and Teemu Torma for debugging 
these problems and working on patches.

Get it at: http://www.litech.org/radvd/

-- 
Pekka Savola                 "You each name yourselves king, yet the
Netcore Oy                    kingdom bleeds."
Systems. Networks. Security. -- George R.R. Martin: A Clash of Kings


^ permalink raw reply

* Re: igb bandwidth allocation configuration
From: Patrick McHardy @ 2009-09-10 11:55 UTC (permalink / raw)
  To: Simon Horman; +Cc: e1000-devel, netdev
In-Reply-To: <4AA8E2CE.2080707@trash.net>

[-- Attachment #1: Type: text/plain, Size: 1123 bytes --]

Patrick McHardy wrote:
> Simon Horman wrote:
>>
>> I have been looking into adding support the 82586's per-PF/VF
>> bandwidth allocation to the igb driver. It seems that the trickiest
>> part is working out how to expose things to user-space.
>>
>> ...
>> Internally it seems that actually the limits are applied to HW Tx queues
>> rather than directly VMs. There are 16 such queues. Accordingly it might
>> be useful to design an interface to set limits per-queue using ethtool.
>> But this would seem to also require exposing which queues are associated
>> with which PF/VF.
> 
> Just an idea since I don't know much about this stuff:
> 
> Since we now have the mq packet scheduler, which exposes the device
> queues as qdisc classes, how about adding driver-specific configuration
> attributes that are passed to the driver by the mq scheduler? This
> would allow to configure per-queue bandwidth limits using regular TC
> commands and also use those limits without VFs for any kind of traffic.
> Drivers not supporting this would refuse unsupported options.

Attached patch demonstrates the idea. Compile-tested only.


[-- Attachment #2: x --]
[-- Type: text/plain, Size: 3012 bytes --]

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a44118b..388841c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -178,6 +178,7 @@ enum {
 struct neighbour;
 struct neigh_parms;
 struct sk_buff;
+struct nlattr;
 
 struct netif_rx_stats
 {
@@ -636,6 +637,12 @@ struct net_device_ops {
 	int			(*ndo_fcoe_ddp_done)(struct net_device *dev,
 						     u16 xid);
 #endif
+	int			(*ndo_queue_config)(struct net_device *dev,
+						    unsigned int qnum,
+						    const struct nlattr *nla[]);
+	int			(*ndo_get_queue_config)(struct net_device *dev,
+							struct sk_buff *skb,
+							unsigned int qnum);
 };
 
 /*
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index d51a2b3..742db43 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -518,4 +518,14 @@ struct tc_drr_stats
 	__u32	deficit;
 };
 
+/* MQ */
+
+enum
+{
+	TCA_MQ_UNSPEC,
+	__TCA_MQ_MAX
+};
+
+#define TCA_MQ_MAX	(__TCA_MQ_MAX - 1)
+
 #endif
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index dd5ee02..13132b9 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -171,15 +171,61 @@ static void mq_put(struct Qdisc *sch, unsigned long cl)
 	return;
 }
 
+static const struct nla_policy mq_policy[TCA_MQ_MAX + 1] = {
+	/* nothing so far */
+};
+
+static int mq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+			   struct nlattr **tca, unsigned long *arg)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct nlattr *tb[TCA_MQ_MAX + 1];
+	unsigned long ntx;
+	int err;
+
+	if (*arg == 0)
+		return -EOPNOTSUPP;
+	if (mq_queue_get(sch, *arg))
+		return -ENOENT;
+	ntx = *arg - 1;
+
+	if (tca == NULL)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_MQ_MAX, tca[TCA_OPTIONS], mq_policy);
+	if (err < 0)
+		return err;
+
+	if (dev->netdev_ops->ndo_queue_config == NULL)
+		return -EOPNOTSUPP;
+	return dev->netdev_ops->ndo_queue_config(dev, ntx, (void *)tb);
+}
+
 static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
 			 struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+	struct net_device *dev = qdisc_dev(sch);
+	struct nlattr *nest;
 
 	tcm->tcm_parent = TC_H_ROOT;
 	tcm->tcm_handle |= TC_H_MIN(cl);
 	tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
-	return 0;
+
+	if (dev->netdev_ops->ndo_get_queue_config) {
+		nest = nla_nest_start(skb, TCA_OPTIONS);
+		if (nest == NULL)
+			goto nla_put_failure;
+		if (dev->netdev_ops->ndo_get_queue_config(dev, skb, cl - 1) < 0)
+			goto nla_put_failure;
+		nla_nest_end(skb, nest);
+	}
+
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
 }
 
 static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
@@ -214,6 +260,7 @@ static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
 
 static const struct Qdisc_class_ops mq_class_ops = {
 	.select_queue	= mq_select_queue,
+	.change		= mq_change_class,
 	.graft		= mq_graft,
 	.leaf		= mq_leaf,
 	.get		= mq_get,

^ permalink raw reply related

* Re: net_sched 07/07: add classful multiqueue dummy scheduler
From: Patrick McHardy @ 2009-09-10 11:28 UTC (permalink / raw)
  To: Jarek Poplawski; +Cc: Eric Dumazet, netdev
In-Reply-To: <20090909195238.GA3043@ami.dom.local>

Jarek Poplawski wrote:
> On Wed, Sep 09, 2009 at 06:02:59PM +0200, Patrick McHardy wrote:
>>>>>>> +	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
>>>>>>> +		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
>>>>>>> +		spin_lock_bh(qdisc_lock(qdisc));
>>>>>>> +		sch->q.qlen		+= qdisc->q.qlen;
>>>>>>> +		sch->bstats.bytes	+= qdisc->bstats.bytes;
>>>>>>> +		sch->bstats.packets	+= qdisc->bstats.packets;
>>>>>>> +		sch->qstats.qlen	+= qdisc->qstats.qlen;
>>>>>> Like in Christoph's case, we should probably use q.qlen instead.
>>>>> Its done a few lines above. This simply sums up all members of qstats.
>>>> AFAICS these members are updated only in tc_fill_qdisc, starting from
>>>> the root, so they might be not up-to-date at the moment, unless I miss
>>>> something.
>>> Yes, we might need an q->ops->update_stats(struct Qdisc *sch) method, and
>>> to recursively call it from mq_update_stats()
>> Unless I'm missing something, that shouldn't be necessary since
>> sch->q.qlen contains the correct sum of all child qdiscs and
>> this is used by tc_fill_qdisc to update qstats.qlen.
> 
> You're perfectly right! (And the code is perfectly misleading.;-)

I'll remove the misleading (and unnecessary) line of code, thanks Jarek.

^ permalink raw reply

* Re: igb bandwidth allocation configuration
From: Patrick McHardy @ 2009-09-10 11:28 UTC (permalink / raw)
  To: Simon Horman; +Cc: e1000-devel, netdev
In-Reply-To: <20090910081844.GA5421@verge.net.au>

Simon Horman wrote:
> Hi,
> 
> I have been looking into adding support the 82586's per-PF/VF
> bandwidth allocation to the igb driver. It seems that the trickiest
> part is working out how to expose things to user-space.
> 
> I was thinking along the lines of an ethtool option as follows:
> 
> 	ethtool --bandwidth ethN LIMIT...
> 
> 	where:
> 		* There is one LIMIT per PF/VF.
> 		  The 82576 can have up to 7 VFs per PF,
> 		  so there would be up to 8 LIMITS
> 		* A keyword (none?) can be used to denote that
> 		  bandwidth allocation should be disabled for the
> 		  corresponding VM
> 		* Otherwise LIMITS are in Megabits/s
> 
> This may get a bit combersome if there are a lot of VFs per PF,
> perhaps a better syntax would be:
> 
> 	ethtool --bandwidth ethN M=LIMIT...
> 
> 	where:
> 		* LIMIT is as above
> 		* M is some key to denote which VF/PF is
> 		  having its limit set.
> 
> Internally it seems that actually the limits are applied to HW Tx queues
> rather than directly VMs. There are 16 such queues. Accordingly it might
> be useful to design an interface to set limits per-queue using ethtool.
> But this would seem to also require exposing which queues are associated
> with which PF/VF.

Just an idea since I don't know much about this stuff:

Since we now have the mq packet scheduler, which exposes the device
queues as qdisc classes, how about adding driver-specific configuration
attributes that are passed to the driver by the mq scheduler? This
would allow to configure per-queue bandwidth limits using regular TC
commands and also use those limits without VFs for any kind of traffic.
Drivers not supporting this would refuse unsupported options.


^ permalink raw reply

* Re: TCP kernel tables overflowing after sustained 1000 new connections per second
From: Andi Kleen @ 2009-09-10  9:24 UTC (permalink / raw)
  To: David Miller; +Cc: paulsheer, linux-kernel, roque, netdev
In-Reply-To: <20090909.170824.141343404.davem@davemloft.net>


> On a gigabit local LAN I can set the timeouts very low to encourage
> port reuse. A well known configuration issue with all OS's - just search
> for MyOS+TIMED_WAIT on google. No problems here.

The timeouts are what they are for a reason to detect old packets in
the network and prevent data corruption. That's why the RFCs require
them. 

Unless you never run on WANs or have very strong data integry checking
in your application (e.g. SSL) it's normally not a good idea to mess
with them.

When you run out of port space you should use more local IP addresses.

Possibly if you don't have problems with firewalls you could
also increase the port space, but that's still limited.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply

* Re: [PATCH RESEND] bonding: remap muticast addresses without using dev_close() and dev_open()
From: Moni Shoua @ 2009-09-10  8:47 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Jay Vosburgh, David Miller, Jason Gunthorpe, netdev,
	bonding-devel
In-Reply-To: <4AA8B19F.2080704@voltaire.com>

Or Gerlitz wrote:
> Moni Shoua wrote:
>> This patch fixes commit e36b9d16c6a6d0f59803b3ef04ff3c22c3844c10. The
>> approach there is to call dev_close()/dev_open() whenever the device
>> type is changed in order to remap the device IP multicast addresses to
>> HW multicast addresses. This approach suffers from 2 drawbacks [...]
>> The fix here is to directly remap the IP multicast addresses to HW
>> multicast addresses for a bonding device that changes its type, and
>> nothing else.
> 
> Moni,
> 
> The approach and patch look good. First, I think it may be more easier
> to review and maintain if you separate this to two patches, the first
> simply reverting e36b9d16c6a6d0f59803b3ef04ff3c22c3844c10 and the second
> the approach suggested by this patch. Second, I think you may be able to
> do well with only one event, see next
> 
I don't need to revert the entire patch. Only the dev_open() and dev_close() functions need to be removed and it is quite easy to review it in one patch.
>> @@ -1460,14 +1460,17 @@ int bond_enslave(struct net_device *bond_dev,
>> struct net_device *slave_dev)
>>       */
>>      if (bond->slave_cnt == 0) {
>>          if (bond_dev->type != slave_dev->type) {
>> -            dev_close(bond_dev);
>>              pr_debug("%s: change device type from %d to %d\n",
>>                  bond_dev->name, bond_dev->type, slave_dev->type);
>> +
>> +            netdev_bonding_change(bond_dev, NETDEV_BONDING_OLDTYPE);
>> +
>>              if (slave_dev->type != ARPHRD_ETHER)
>>                  bond_setup_by_slave(bond_dev, slave_dev);
>>              else
>>                  ether_setup(bond_dev);
>> -            dev_open(bond_dev);
>> +
>> +            netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE);
>>          }
> can't you achieve the same impact if just calling
> netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE) after doing the
> setup_by_slave, and have the stack call ip_mc_unmap(...) and then
> ip_mc_map(...) ???
> 
I thought about it but the function arp_mc_map() which is called before and after the change in dev->type, relies on the value of dev->type. I could write the patch with one event after the type has changed and passing the old device type somehow (field prev_type in struct net_device?) but the resulted code will look clumsy (at least to me).

> Or.
> 
> 
> -- 
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


^ permalink raw reply

* igb bandwidth allocation configuration
From: Simon Horman @ 2009-09-10  8:18 UTC (permalink / raw)
  To: e1000-devel, netdev

Hi,

I have been looking into adding support the 82586's per-PF/VF
bandwidth allocation to the igb driver. It seems that the trickiest
part is working out how to expose things to user-space.

I was thinking along the lines of an ethtool option as follows:

	ethtool --bandwidth ethN LIMIT...

	where:
		* There is one LIMIT per PF/VF.
		  The 82576 can have up to 7 VFs per PF,
		  so there would be up to 8 LIMITS
		* A keyword (none?) can be used to denote that
		  bandwidth allocation should be disabled for the
		  corresponding VM
		* Otherwise LIMITS are in Megabits/s

This may get a bit combersome if there are a lot of VFs per PF,
perhaps a better syntax would be:

	ethtool --bandwidth ethN M=LIMIT...

	where:
		* LIMIT is as above
		* M is some key to denote which VF/PF is
		  having its limit set.

Internally it seems that actually the limits are applied to HW Tx queues
rather than directly VMs. There are 16 such queues. Accordingly it might
be useful to design an interface to set limits per-queue using ethtool.
But this would seem to also require exposing which queues are associated
with which PF/VF.


------------------------------------------------------------------------------
Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day 
trial. Simplify your report design, integration and deployment - and focus on 
what you do best, core application coding. Discover what's new with 
Crystal Reports now.  http://p.sf.net/sfu/bobj-july

^ permalink raw reply

* Re: L2 switching in igb
From: Or Gerlitz @ 2009-09-10  8:04 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Kirsher, Jeffrey T, Fischer, Anna, netdev, David Miller,
	Stephen Hemminger
In-Reply-To: <5f2db9d90909032135l26cfdba6n52329f6be75c16a5@mail.gmail.com>

Alexander Duyck wrote:
> The suggestion I received from Dave and Stephen was to consider an rtnl_link_ops for
> configuring the VFs, but I still have issues trying to visualize how that would work since I don't want the VFs spawning in the host/hypervisor OS as network devices.
Note that VEPA mode is a characteristic of the PF, correct? and the PF 
resides in the host kernel. Also, as I wrote you earlier, I do see many 
schemes where a VF spawned in the host kernel IS very useful, and as 
such I'd be happy to continue the discussion on the approach suggested 
by Dave and Stephen, can you provide a pointer? (thanks).

Or.


^ permalink raw reply

* Re: [PATCH RESEND] bonding: remap muticast addresses without using dev_close() and dev_open()
From: Or Gerlitz @ 2009-09-10  7:58 UTC (permalink / raw)
  To: Moni Shoua
  Cc: Jay Vosburgh, David Miller, Jason Gunthorpe, netdev,
	bonding-devel
In-Reply-To: <4AA39E42.9070702@Voltaire.COM>

Moni Shoua wrote:
> This patch fixes commit e36b9d16c6a6d0f59803b3ef04ff3c22c3844c10. The approach there is to call dev_close()/dev_open() whenever the device type is changed in order to remap the device IP multicast addresses to HW multicast addresses. This approach suffers from 2 drawbacks [...] The fix here is to directly remap the IP multicast addresses to HW multicast addresses for a bonding device that changes its type, and nothing else.

Moni,

The approach and patch look good. First, I think it may be more easier 
to review and maintain if you separate this to two patches, the first 
simply reverting e36b9d16c6a6d0f59803b3ef04ff3c22c3844c10 and the second 
the approach suggested by this patch. Second, I think you may be able to 
do well with only one event, see next

> @@ -1460,14 +1460,17 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
>  	 */
>  	if (bond->slave_cnt == 0) {
>  		if (bond_dev->type != slave_dev->type) {
> -			dev_close(bond_dev);
>  			pr_debug("%s: change device type from %d to %d\n",
>  				bond_dev->name, bond_dev->type, slave_dev->type);
> +
> +			netdev_bonding_change(bond_dev, NETDEV_BONDING_OLDTYPE);
> +
>  			if (slave_dev->type != ARPHRD_ETHER)
>  				bond_setup_by_slave(bond_dev, slave_dev);
>  			else
>  				ether_setup(bond_dev);
> -			dev_open(bond_dev);
> +
> +			netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE);
>  		}
can't you achieve the same impact if just calling 
netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE) after doing the 
setup_by_slave, and have the stack call ip_mc_unmap(...) and then 
ip_mc_map(...) ???

Or.



^ permalink raw reply

* Re: r8169 ethernet hangs after a pm-suspend (and resume)
From: Alex Bennee @ 2009-09-10  6:49 UTC (permalink / raw)
  To: Francois Romieu; +Cc: lkml, netdev
In-Reply-To: <20090909092822.GA18355@electric-eye.fr.zoreil.com>

2009/9/9 Francois Romieu <romieu@fr.zoreil.com>:
> Alex Bennee <kernel-hacker@bennee.com> :
> [...]
>> I've just recently gotten suspend working on my system. Unfortunately
>> after the resume event I loose access to the network.
>> As far as the system is concerned the network is configured properly
>> but every attempt to ping local nodes fails with "Host not reachable".
>
> Can the problem be described as "gigabit link setting does not survive
> suspend/resume" ?

Further experimentation shows the failure is intermittent. The
following dmesg shows a successful resume with working 'net:

[  475.800017] ACPI: Waking up from system sleep state S3
[  475.800726] HDA Intel 0000:00:1b.0: restoring config space at
offset 0x1 (was 0x100006, writing 0x100002)
[  475.800747] pcieport-driver 0000:00:1c.0: restoring config space at
offset 0xf (was 0x60100, writing 0x6010a)
[  475.800762] pcieport-driver 0000:00:1c.0: restoring config space at
offset 0x1 (was 0x100107, writing 0x100507)
[  475.800799] pci 0000:00:1d.0: restoring config space at offset 0x1
(was 0x2800005, writing 0x2800001)
[  475.800819] pci 0000:00:1d.1: restoring config space at offset 0x1
(was 0x2800005, writing 0x2800001)
[  475.800840] pci 0000:00:1d.2: restoring config space at offset 0x1
(was 0x2800005, writing 0x2800001)
[  475.800861] pci 0000:00:1d.3: restoring config space at offset 0x1
(was 0x2800005, writing 0x2800001)
[  475.800889] pci 0000:00:1d.7: restoring config space at offset 0x1
(was 0x2900006, writing 0x2900002)
[  475.800967] PIIX_IDE 0000:00:1f.1: restoring config space at offset
0x1 (was 0x2880005, writing 0x2800005)
[  475.801050] r8169 0000:02:00.0: restoring config space at offset
0x3 (was 0x4, writing 0x8)
[  475.801056] r8169 0000:02:00.0: restoring config space at offset
0x1 (was 0x100007, writing 0x100407)
[  475.803466] i915 0000:00:02.0: PCI INT A -> GSI 16 (level, low) -> IRQ 16
[  475.803470] i915 0000:00:02.0: setting latency timer to 64
[  475.864097] [drm] DAC-6: set mode 1440x900 2a
[  475.936922] [drm] TMDS-8: set mode 1680x1050 2b
[  476.108887] HDA Intel 0000:00:1b.0: PCI INT A -> GSI 19 (level,
low) -> IRQ 19
[  476.108892] HDA Intel 0000:00:1b.0: setting latency timer to 64
[  476.548200] pci 0000:00:1d.7: PME# disabled
[  476.548207] pci 0000:00:1e.0: setting latency timer to 64
[  476.548216] PIIX_IDE 0000:00:1f.1: PCI INT A -> GSI 18 (level, low) -> IRQ 18
[  476.548223] PIIX_IDE 0000:00:1f.1: setting latency timer to 64
[  476.548235] ata_piix 0000:00:1f.2: PCI INT B -> GSI 17 (level, low) -> IRQ 17
[  476.548248] ata_piix 0000:00:1f.2: setting latency timer to 64
[  476.548352] r8169 0000:02:00.0: PME# disabled
[  476.564404] r8169: eth0: link up

And now compare with a return from suspend that failed:

[12397.816024] ACPI: Waking up from system sleep state S3
[12397.816693] agpgart-intel 0000:00:00.0: restoring config space at
offset 0x1 (was 0x30900006, writing 0x20900006)
[12397.816737] HDA Intel 0000:00:1b.0: restoring config space at
offset 0x1 (was 0x100006, writing 0x100002)
[12397.816757] pcieport-driver 0000:00:1c.0: restoring config space at
offset 0xf (was 0x60100, writing 0x6010a)
[12397.816768] pcieport-driver 0000:00:1c.0: restoring config space at
offset 0x7 (was 0x2000e0e0, writing 0xe0e0)
[12397.816776] pcieport-driver 0000:00:1c.0: restoring config space at
offset 0x1 (was 0x100107, writing 0x100507)
[12397.816813] uhci_hcd 0000:00:1d.0: restoring config space at offset
0x1 (was 0x2800005, writing 0x2800001)
[12397.816835] uhci_hcd 0000:00:1d.1: restoring config space at offset
0x1 (was 0x2800005, writing 0x2800001)
[12397.816856] uhci_hcd 0000:00:1d.2: restoring config space at offset
0x1 (was 0x2800005, writing 0x2800001)
[12397.816877] uhci_hcd 0000:00:1d.3: restoring config space at offset
0x1 (was 0x2800005, writing 0x2800001)
[12397.816906] pci 0000:00:1d.7: restoring config space at offset 0x1
(was 0x2900006, writing 0x2900002)
[12397.816929] pci 0000:00:1e.0: restoring config space at offset 0x7
(was 0x2280d0d0, writing 0xa280d0d0)
[12397.816987] PIIX_IDE 0000:00:1f.1: restoring config space at offset
0x1 (was 0x2880005, writing 0x2800005)
[12397.832040] r8169 0000:02:00.0: restoring config space at offset
0xf (was 0xffffffff, writing 0x10a)
[12397.832045] r8169 0000:02:00.0: restoring config space at offset
0xe (was 0xffffffff, writing 0x0)
[12397.832050] r8169 0000:02:00.0: restoring config space at offset
0xd (was 0xffffffff, writing 0x40)
[12397.832055] r8169 0000:02:00.0: restoring config space at offset
0xc (was 0xffffffff, writing 0xdffc0000)
[12397.832061] r8169 0000:02:00.0: restoring config space at offset
0xb (was 0xffffffff, writing 0x81aa1043)
[12397.832066] r8169 0000:02:00.0: restoring config space at offset
0xa (was 0xffffffff, writing 0x0)
[12397.832071] r8169 0000:02:00.0: restoring config space at offset
0x9 (was 0xffffffff, writing 0x0)
[12397.832076] r8169 0000:02:00.0: restoring config space at offset
0x8 (was 0xffffffff, writing 0xdeff000c)
[12397.832081] r8169 0000:02:00.0: restoring config space at offset
0x7 (was 0xffffffff, writing 0x0)
[12397.832086] r8169 0000:02:00.0: restoring config space at offset
0x6 (was 0xffffffff, writing 0xdffff004)
[12397.832091] r8169 0000:02:00.0: restoring config space at offset
0x5 (was 0xffffffff, writing 0x0)
[12397.832096] r8169 0000:02:00.0: restoring config space at offset
0x4 (was 0xffffffff, writing 0xe801)
[12397.832101] r8169 0000:02:00.0: restoring config space at offset
0x3 (was 0xffffffff, writing 0x8)
[12397.832106] r8169 0000:02:00.0: restoring config space at offset
0x2 (was 0xffffffff, writing 0x2000002)
[12397.832111] r8169 0000:02:00.0: restoring config space at offset
0x1 (was 0xffffffff, writing 0x100407)
[12397.832117] r8169 0000:02:00.0: restoring config space at offset
0x0 (was 0xffffffff, writing 0x816810ec)
[12397.834527] i915 0000:00:02.0: PCI INT A -> GSI 16 (level, low) -> IRQ 16
[12397.834531] i915 0000:00:02.0: setting latency timer to 64
[12397.895209] [drm] DAC-6: set mode 1440x900 2a
[12397.968038] [drm] TMDS-8: set mode 1680x1050 2b
[12398.140006] HDA Intel 0000:00:1b.0: PCI INT A -> GSI 19 (level,
low) -> IRQ 19
[12398.140011] HDA Intel 0000:00:1b.0: setting latency timer to 64
[12398.580194] uhci_hcd 0000:00:1d.0: PCI INT A -> GSI 20 (level, low) -> IRQ 20
[12398.580200] uhci_hcd 0000:00:1d.0: setting latency timer to 64
[12398.580224] usb usb2: root hub lost power or was reset
[12398.580250] uhci_hcd 0000:00:1d.1: PCI INT B -> GSI 17 (level, low) -> IRQ 17
[12398.580255] uhci_hcd 0000:00:1d.1: setting latency timer to 64
[12398.580273] usb usb3: root hub lost power or was reset
[12398.580291] uhci_hcd 0000:00:1d.2: PCI INT C -> GSI 18 (level, low) -> IRQ 18
[12398.580296] uhci_hcd 0000:00:1d.2: setting latency timer to 64
[12398.580314] usb usb4: root hub lost power or was reset
[12398.580332] uhci_hcd 0000:00:1d.3: PCI INT D -> GSI 19 (level, low) -> IRQ 19
[12398.580337] uhci_hcd 0000:00:1d.3: setting latency timer to 64
[12398.580355] usb usb5: root hub lost power or was reset
[12398.580374] pci 0000:00:1d.7: PME# disabled
[12398.580380] pci 0000:00:1e.0: setting latency timer to 64
[12398.580387] PIIX_IDE 0000:00:1f.1: PCI INT A -> GSI 18 (level, low) -> IRQ 18
[12398.580394] PIIX_IDE 0000:00:1f.1: setting latency timer to 64
[12398.580403] ata_piix 0000:00:1f.2: PCI INT B -> GSI 17 (level, low) -> IRQ 17
[12398.580407] ata_piix 0000:00:1f.2: setting latency timer to 64
[12398.580512] r8169 0000:02:00.0: PME# disabled
[12398.660050] firewire_core: skipped bus generations, destroying all nodes
[12398.664833] hda: host max PIO4 wanted PIO255(auto-tune) selected PIO4
[12398.665625] hda: skipping word 93 validity check
[12398.665627] hda: UDMA/66 mode selected
[12398.687404] sd 0:0:0:0: [sda] Starting disk
[12399.419164] r8169: eth0: link up

which has an oops further on:

[12434.816100] ------------[ cut here ]------------
[12434.816111] WARNING: at net/sched/sch_generic.c:246
dev_watchdog+0x132/0x1da()
[12434.816114] Hardware name: System Product Name
[12434.816117] NETDEV WATCHDOG: eth0 (r8169): transmit queue 0 timed out
[12434.816120] Modules linked in: bridge stp llc bnep rfcomm l2cap
bluetooth ipv6 snd_pcm_oss snd_mixer_oss snd_seq_oss
snd_seq_midi_event snd_seq snd_seq_device kvm_intel kvm acpi_cpufreq
snd_hda_codec_analog snd_hda_intel uhci_hcd snd_hda_codec snd_hwdep
snd_pcm snd_timer ide_cd_mod firewire_ohci firewire_core snd soundcore
usbcore r8169 cdrom processor crc_itu_t nls_base snd_page_alloc mii
evdev thermal pcspkr unix [last unloaded: ehci_hcd]
[12434.816164] Pid: 0, comm: swapper Not tainted
2.6.31-rc9-ajb-00012-g3ff323f-dirty #86
[12434.816167] Call Trace:
[12434.816169]  <IRQ>  [<ffffffff812aa117>] ? dev_watchdog+0x132/0x1da
[12434.816180]  [<ffffffff8103eb72>] warn_slowpath_common+0x7c/0xa9
[12434.816185]  [<ffffffff8103ec1e>] warn_slowpath_fmt+0x69/0x6b
[12434.816190]  [<ffffffff81039e47>] ? default_wake_function+0x12/0x14
[12434.816195]  [<ffffffff8102c24c>] ? __wake_up_common+0x4b/0x7b
[12434.816200]  [<ffffffff8102f793>] ? __wake_up+0x48/0x54
[12434.816205]  [<ffffffff81298b7d>] ? netdev_drivername+0x48/0x4f
[12434.816209]  [<ffffffff812aa117>] dev_watchdog+0x132/0x1da
[12434.816214]  [<ffffffff810510f2>] ? __queue_work+0x3a/0x43
[12434.816218]  [<ffffffff812a9fe5>] ? dev_watchdog+0x0/0x1da
[12434.816223]  [<ffffffff81048d76>] run_timer_softirq+0x198/0x20d
[12434.816229]  [<ffffffff8101d0c6>] ? lapic_next_event+0x1d/0x21
[12434.816234]  [<ffffffff8104464f>] __do_softirq+0xd6/0x19a
[12434.816239]  [<ffffffff8100c19c>] call_softirq+0x1c/0x28
[12434.816242]  [<ffffffff8100d51d>] do_softirq+0x39/0x77
[12434.816246]  [<ffffffff8104430c>] irq_exit+0x44/0x7e
[12434.816252]  [<ffffffff81305914>] smp_apic_timer_interrupt+0x8d/0x9b
[12434.816258]  [<ffffffff8100bb73>] apic_timer_interrupt+0x13/0x20
[12434.816260]  <EOI>  [<ffffffff810117ac>] ? mwait_idle+0xb9/0xf0
[12434.816269]  [<ffffffff81303df5>] ? atomic_notifier_call_chain+0x13/0x15
[12434.816273]  [<ffffffff8100a30a>] ? cpu_idle+0x57/0x98
[12434.816278]  [<ffffffff812f0612>] ? rest_init+0x66/0x68
[12434.816283]  [<ffffffff815299da>] ? start_kernel+0x343/0x34e
[12434.816288]  [<ffffffff8152903a>] ? x86_64_start_reservations+0xaa/0xae
[12434.816292]  [<ffffffff8152911f>] ? x86_64_start_kernel+0xe1/0xe8
[12434.816295] ---[ end trace 1353478188007667 ]---
[12435.635167] r8169: eth0: link up

At this point even unloading and reloading the r8169 module couldn't
bring the network back. I even tried unloading the module, doing a
pm-hibernate and restore reload and still nothing which was odd as I
though the power cycle should have un-wedged any hardware.

A couple of questions:

1. It seems the failure case has a lot more "restoring config space"
going on. Is this a wider range problem that just happens to hit r8169
harder?

2. Is the oops a red herring or could the failure to resume be because
the shutdown occurs before the hardware has flushed all in flight
packets?


-- 
Alex, homepage: http://www.bennee.com/~alex/
http://www.half-llama.co.uk

^ permalink raw reply

* Re: [iproute2] tc action mirred    question
From: Xiaofei Wu @ 2009-09-10  6:06 UTC (permalink / raw)
  To: hadi; +Cc: linux netdev
In-Reply-To: <1252534266.4119.5.camel@dogo.mojatatu.com>



>> After run 'tcpdump -i wlan1 -e', I can not capture any packets.

>Could it be related to the wireless driver?
Maybe. I will check it.

>Here's something i tried on my laptop
....
>

I tried your example.

-on window1  'ping 127.0.0.2'
....
2616 packets transmitted, 0 received, 100% packet loss

-on window2  'tcpdump -n -i eth0 -e' , i see
....
10:15:06.314420 00:23:cd:af:d0:74 > 00:23:cd:af:ec:da, ethertype IPv4 (0x0800), length 98: 127.0.0.2 > 127.0.0.2: ICMP echo request, id 17419, seq 234, length 64
....

-on window3  'tcpdump -i lo -e'
....
10:15:37.332527 00:23:cd:af:d0:74 (oui Unknown) > 00:23:cd:af:ec:da (oui Unknown), ethertype IPv4 (0x0800), length 98: 127.0.0.2 > 127.0.0.2: ICMP echo request, id 17419, seq 265, length 64
....

It seems that I modify the dst MAC, src MAC of the packets,  then transmit to 'lo'  and  mirror the packects to 'eth0'.  (On 'lo',  '2616 packets transmitted, 0 received, 100% packet loss' .)  How to let 'lo' receive the packets?

But I want to only modify the dst MAC, src MAC of the mirroring packets, transmit them to next hop. (not modify the dst,src MAC of the packets to 'lo').  What should I do?

When I change 'lo' to 'eth1' (or wlan1 ...), node A will have two paths (A-B-C, A-D-C) to transmit the "same"(IP header, data)  packets to node C simultaneously.


regards,
wu



      


^ permalink raw reply

* Re: [PATCH 00/12] Gigaset driver patches for 2.6.32
From: David Miller @ 2009-09-10  3:51 UTC (permalink / raw)
  To: dwalker; +Cc: tilman, linux-kernel, netdev, i4ldeveloper, hjlipp
In-Reply-To: <1252554477.30578.167.camel@desktop>

From: Daniel Walker <dwalker@fifo99.com>
Date: Wed, 09 Sep 2009 20:47:57 -0700

> On Thu, 2009-09-10 at 00:32 +0200, Tilman Schmidt wrote:
>> Daniel Walker wrote 07.09.09 16:30:
>> > Yeah, it looks like the whole file needs a checkpatch clean up..
>> Sounds
>> like your not willing to do that?
>> 
>> It's not a question of willingness. You may notice I did a lot of
>> cleanup work already. But it's very time consuming work, and there has
>> been more important work to attend to first.
>> 
>> > Usually if a checkpatch cleanup comes
>> first prior to all your other changes , it doesn't usually cloud the
>> rest of the changes..
>> 
>> Sure. But that would mean postponing the merging of bugfixes until
>> someone finds the time to do a complete checkpatch cleanup of the
>> affected code. I don't think that's a sensible approach.
> 
> You shouldn't be adding any new checkpatch errors, but you currently
> are .. Just clean up the individual patches w/o the entire gigaset
> driver, that should be do-able (it's even a basic submission
> requirement). The other issue is that your adding new files which aren't
> clean, those can certainly be cleaned up.

Right, this is a very reasonable request.

^ permalink raw reply

* Re: [PATCH 00/12] Gigaset driver patches for 2.6.32
From: Daniel Walker @ 2009-09-10  3:47 UTC (permalink / raw)
  To: Tilman Schmidt; +Cc: davem, linux-kernel, netdev, i4ldeveloper, Hansjoerg Lipp
In-Reply-To: <20090909223205.E9D632269516@fifo99.com>

On Thu, 2009-09-10 at 00:32 +0200, Tilman Schmidt wrote:
> Daniel Walker wrote 07.09.09 16:30:
> > Yeah, it looks like the whole file needs a checkpatch clean up..
> Sounds
> like your not willing to do that?
> 
> It's not a question of willingness. You may notice I did a lot of
> cleanup work already. But it's very time consuming work, and there has
> been more important work to attend to first.
> 
> > Usually if a checkpatch cleanup comes
> first prior to all your other changes , it doesn't usually cloud the
> rest of the changes..
> 
> Sure. But that would mean postponing the merging of bugfixes until
> someone finds the time to do a complete checkpatch cleanup of the
> affected code. I don't think that's a sensible approach.

You shouldn't be adding any new checkpatch errors, but you currently
are .. Just clean up the individual patches w/o the entire gigaset
driver, that should be do-able (it's even a basic submission
requirement). The other issue is that your adding new files which aren't
clean, those can certainly be cleaned up.

Daniel

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox