Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH] vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)
From: David Miller @ 2010-07-18 22:39 UTC (permalink / raw)
  To: pedro.netdev; +Cc: netdev, kaber, bhutchings, eric.dumazet
In-Reply-To: <957a5becb6e742b6dc3255b68bef3ba8@dondevamos.com>

From: Pedro Garcia <pedro.netdev@dondevamos.com>
Date: Sun, 18 Jul 2010 18:43:25 +0200

> - Without the 8021q module loaded in the kernel, all 802.1p packets 
> (VLAN 0 but QoS tagging) are silently discarded (as expected, as 
> the protocol is not loaded).
>  
> - Without this patch in 8021q module, these packets are forwarded to 
> the module, but they are discarded also if VLAN 0 is not configured,
> which should not be the default behaviour, as VLAN 0 is not really
> a VLANed packet but a 802.1p packet. Defining VLAN 0 makes it almost
> impossible to communicate with mixed 802.1p and non 802.1p devices on
> the same network due to arp table issues.
> 
> - Changed logic to skip vlan specific code in vlan_skb_recv if VLAN 
> is 0 and we have not defined a VLAN with ID 0, but we accept the 
> packet with the encapsulated proto and pass it later to netif_rx.
> 
> - In the vlan device event handler, added some logic to add VLAN 0 
> to HW filter in devices that support it (this prevented any traffic
> in VLAN 0 to reach the stack in e1000e with HW filter under 2.6.35,
> and probably also with other HW filtered cards, so we fix it here).
> 
> - In the vlan unregister logic, prevent the elimination of VLAN 0 
> in devices with HW filter.
> 
> - The default behaviour is to ignore the VLAN 0 tagging and accept
> the packet as if it was not tagged, but we can still define a 
> VLAN 0 if desired (so it is backwards compatible).
> 
> Signed-off-by: Pedro Garcia <pedro.netdev@dondevamos.com>

Applied, thanks Pedro.

^ permalink raw reply

* Re: [PATCH 1/2] Remove REDWOOD_[456] config options and conditional code
From: Benjamin Herrenschmidt @ 2010-07-19  0:00 UTC (permalink / raw)
  To: Christian Dietrich
  Cc: Randy Dunlap, linuxppc-dev, Alexander Kurz, Paul Mackerras,
	John Linn, David Brown, Ladislav Michl, Solomon Peachy, vamos-dev,
	Mike Frysinger, Florian Fainelli, Artem Bityutskiy, Nicolas Pitre,
	Jiri Kosina, linux-kernel, Milton Miller, netdev, Joe Perches,
	linux-mtd, David Woodhouse, David S. Miller
In-Reply-To: <ca1bb25d203618c3548748f5efb6f125a96c89e0.1279282865.git.qy03fugy@stud.informatik.uni-erlangen.de>

On Fri, 2010-07-16 at 14:29 +0200, Christian Dietrich wrote:
> The config options for REDWOOD_[456] were commented out in the powerpc
> Kconfig. The ifdefs referencing this options therefore are dead and all
> references to this can be removed (Also dependencies in other KConfig
> files).
> 
> Signed-off-by: Christian Dietrich <qy03fugy@stud.informatik.uni-erlangen.de>
> Signed-off-by: Christoph Egger <siccegge@cs.fau.de>

Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

> ---
>  arch/powerpc/platforms/40x/Kconfig |   16 -------------
>  drivers/mtd/maps/Kconfig           |    2 +-
>  drivers/mtd/maps/redwood.c         |   43 ------------------------------------
>  drivers/net/Kconfig                |    2 +-
>  drivers/net/smc91x.h               |   37 -------------------------------
>  5 files changed, 2 insertions(+), 98 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
> index ec64264..b721764 100644
> --- a/arch/powerpc/platforms/40x/Kconfig
> +++ b/arch/powerpc/platforms/40x/Kconfig
> @@ -71,22 +71,6 @@ config MAKALU
>  	help
>  	  This option enables support for the AMCC PPC405EX board.
>  
> -#config REDWOOD_5
> -#	bool "Redwood-5"
> -#	depends on 40x
> -#	default n
> -#	select STB03xxx
> -#	help
> -#	  This option enables support for the IBM STB04 evaluation board.
> -
> -#config REDWOOD_6
> -#	bool "Redwood-6"
> -#	depends on 40x
> -#	default n
> -#	select STB03xxx
> -#	help
> -#	  This option enables support for the IBM STBx25xx evaluation board.
> -
>  #config SYCAMORE
>  #	bool "Sycamore"
>  #	depends on 40x
> diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
> index f22bc9f..6629d09 100644
> --- a/drivers/mtd/maps/Kconfig
> +++ b/drivers/mtd/maps/Kconfig
> @@ -321,7 +321,7 @@ config MTD_CFI_FLAGADM
>  
>  config MTD_REDWOOD
>  	tristate "CFI Flash devices mapped on IBM Redwood"
> -	depends on MTD_CFI && ( REDWOOD_4 || REDWOOD_5 || REDWOOD_6 )
> +	depends on MTD_CFI
>  	help
>  	  This enables access routines for the flash chips on the IBM
>  	  Redwood board. If you have one of these boards and would like to
> diff --git a/drivers/mtd/maps/redwood.c b/drivers/mtd/maps/redwood.c
> index 933c0b6..d2c9db0 100644
> --- a/drivers/mtd/maps/redwood.c
> +++ b/drivers/mtd/maps/redwood.c
> @@ -22,8 +22,6 @@
>  
>  #include <asm/io.h>
>  
> -#if !defined (CONFIG_REDWOOD_6)
> -
>  #define WINDOW_ADDR 0xffc00000
>  #define WINDOW_SIZE 0x00400000
>  
> @@ -69,47 +67,6 @@ static struct mtd_partition redwood_flash_partitions[] = {
>  	}
>  };
>  
> -#else /* CONFIG_REDWOOD_6 */
> -/* FIXME: the window is bigger - armin */
> -#define WINDOW_ADDR 0xff800000
> -#define WINDOW_SIZE 0x00800000
> -
> -#define RW_PART0_OF	0
> -#define RW_PART0_SZ	0x400000	/* 4 MiB data */
> -#define RW_PART1_OF	RW_PART0_OF + RW_PART0_SZ
> -#define RW_PART1_SZ	0x10000		/* 64K VPD */
> -#define RW_PART2_OF	RW_PART1_OF + RW_PART1_SZ
> -#define RW_PART2_SZ	0x400000 - (0x10000 + 0x20000)
> -#define RW_PART3_OF	RW_PART2_OF + RW_PART2_SZ
> -#define RW_PART3_SZ	0x20000
> -
> -static struct mtd_partition redwood_flash_partitions[] = {
> -	{
> -		.name = "Redwood filesystem",
> -		.offset = RW_PART0_OF,
> -		.size = RW_PART0_SZ
> -	},
> -	{
> -		.name = "Redwood OpenBIOS Vital Product Data",
> -		.offset = RW_PART1_OF,
> -		.size = RW_PART1_SZ,
> -		.mask_flags = MTD_WRITEABLE	/* force read-only */
> -	},
> -	{
> -		.name = "Redwood kernel",
> -		.offset = RW_PART2_OF,
> -		.size = RW_PART2_SZ
> -	},
> -	{
> -		.name = "Redwood OpenBIOS",
> -		.offset = RW_PART3_OF,
> -		.size = RW_PART3_SZ,
> -		.mask_flags = MTD_WRITEABLE	/* force read-only */
> -	}
> -};
> -
> -#endif /* CONFIG_REDWOOD_6 */
> -
>  struct map_info redwood_flash_map = {
>  	.name = "IBM Redwood",
>  	.size = WINDOW_SIZE,
> diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> index ce2fcdd..313d306 100644
> --- a/drivers/net/Kconfig
> +++ b/drivers/net/Kconfig
> @@ -913,7 +913,7 @@ config SMC91X
>  	tristate "SMC 91C9x/91C1xxx support"
>  	select CRC32
>  	select MII
> -	depends on ARM || REDWOOD_5 || REDWOOD_6 || M32R || SUPERH || \
> +	depends on ARM || M32R || SUPERH || \
>  		MIPS || BLACKFIN || MN10300 || COLDFIRE
>  	help
>  	  This is a driver for SMC's 91x series of Ethernet chipsets,
> diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h
> index 8d2772c..ee74791 100644
> --- a/drivers/net/smc91x.h
> +++ b/drivers/net/smc91x.h
> @@ -83,43 +83,6 @@ static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg)
>  	}
>  }
>  
> -#elif defined(CONFIG_REDWOOD_5) || defined(CONFIG_REDWOOD_6)
> -
> -/* We can only do 16-bit reads and writes in the static memory space. */
> -#define SMC_CAN_USE_8BIT	0
> -#define SMC_CAN_USE_16BIT	1
> -#define SMC_CAN_USE_32BIT	0
> -#define SMC_NOWAIT		1
> -
> -#define SMC_IO_SHIFT		0
> -
> -#define SMC_inw(a, r)		in_be16((volatile u16 *)((a) + (r)))
> -#define SMC_outw(v, a, r)	out_be16((volatile u16 *)((a) + (r)), v)
> -#define SMC_insw(a, r, p, l) 						\
> -	do {								\
> -		unsigned long __port = (a) + (r);			\
> -		u16 *__p = (u16 *)(p);					\
> -		int __l = (l);						\
> -		insw(__port, __p, __l);					\
> -		while (__l > 0) {					\
> -			*__p = swab16(*__p);				\
> -			__p++;						\
> -			__l--;						\
> -		}							\
> -	} while (0)
> -#define SMC_outsw(a, r, p, l) 						\
> -	do {								\
> -		unsigned long __port = (a) + (r);			\
> -		u16 *__p = (u16 *)(p);					\
> -		int __l = (l);						\
> -		while (__l > 0) {					\
> -			/* Believe it or not, the swab isn't needed. */	\
> -			outw( /* swab16 */ (*__p++), __port);		\
> -			__l--;						\
> -		}							\
> -	} while (0)
> -#define SMC_IRQ_FLAGS		(0)
> -
>  #elif defined(CONFIG_SA1100_PLEB)
>  /* We can only do 16-bit reads and writes in the static memory space. */
>  #define SMC_CAN_USE_8BIT	1

^ permalink raw reply

* Re: [PATCH v3 0/4] Extend Time Stamping
From: David Miller @ 2010-07-19  2:24 UTC (permalink / raw)
  To: richardcochran; +Cc: netdev
In-Reply-To: <cover.1279391885.git.richard.cochran@omicron.at>

From: Richard Cochran <richardcochran@gmail.com>
Date: Sat, 17 Jul 2010 20:48:02 +0200

> This patch set extends the packet time stamping capabilites of the
> network stack in two ways.
> 
> 1. The first patch presents a work-around for the TX software time
>    stamping fallback problem cited in cd4d8fdad1f1. The idea is to add
>    one inline function into each MAC driver. This function will act
>    as hooks for current (and possible future) time stamping needs,
>    once they are placed correctly within each MAC driver.
> 
> 2. The other patches prepare the way for PHY drivers to offer time
>    stamping.
> 
> I am preparing a new round of patches for PTP support, but it will
> require the changes in this patch set in order to function. Thus I
> would like to have this patch set reviewed (and hopefully merged) in
> order to go forward.

Ok this looks good enough to me to toss into net-next-2.6

All applied, thanks!

^ permalink raw reply

* [PATCH] sch_atm: Convert to use standard list_head facilities.
From: David Miller @ 2010-07-19  2:53 UTC (permalink / raw)
  To: netdev


Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c |   98 +++++++++++++++++++++-----------------------------
 1 files changed, 41 insertions(+), 57 deletions(-)

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index fcbb86a..e114f23 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -52,7 +52,7 @@ struct atm_flow_data {
 	int			ref;		/* reference count */
 	struct gnet_stats_basic_packed	bstats;
 	struct gnet_stats_queue	qstats;
-	struct atm_flow_data	*next;
+	struct list_head	list;
 	struct atm_flow_data	*excess;	/* flow for excess traffic;
 						   NULL to set CLP instead */
 	int			hdr_len;
@@ -61,34 +61,23 @@ struct atm_flow_data {
 
 struct atm_qdisc_data {
 	struct atm_flow_data	link;		/* unclassified skbs go here */
-	struct atm_flow_data	*flows;		/* NB: "link" is also on this
+	struct list_head	flows;		/* NB: "link" is also on this
 						   list */
 	struct tasklet_struct	task;		/* dequeue tasklet */
 };
 
 /* ------------------------- Class/flow operations ------------------------- */
 
-static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow)
-{
-	struct atm_flow_data *walk;
-
-	pr_debug("find_flow(qdisc %p,flow %p)\n", qdisc, flow);
-	for (walk = qdisc->flows; walk; walk = walk->next)
-		if (walk == flow)
-			return 1;
-	pr_debug("find_flow: not found\n");
-	return 0;
-}
-
 static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list) {
 		if (flow->classid == classid)
-			break;
-	return flow;
+			return flow;
+	}
+	return NULL;
 }
 
 static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
@@ -99,7 +88,7 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
 
 	pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
 		sch, p, flow, new, old);
-	if (!find_flow(p, flow))
+	if (list_empty(&flow->list))
 		return -EINVAL;
 	if (!new)
 		new = &noop_qdisc;
@@ -146,20 +135,12 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-	struct atm_flow_data **prev;
 
 	pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
 	if (--flow->ref)
 		return;
 	pr_debug("atm_tc_put: destroying\n");
-	for (prev = &p->flows; *prev; prev = &(*prev)->next)
-		if (*prev == flow)
-			break;
-	if (!*prev) {
-		printk(KERN_CRIT "atm_tc_put: class %p not found\n", flow);
-		return;
-	}
-	*prev = flow->next;
+	list_del_init(&flow->list);
 	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
 	qdisc_destroy(flow->q);
 	tcf_destroy_chain(&flow->filter_list);
@@ -274,7 +255,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 			error = -EINVAL;
 			goto err_out;
 		}
-		if (find_flow(p, flow)) {
+		if (!list_empty(&flow->list)) {
 			error = -EEXIST;
 			goto err_out;
 		}
@@ -313,8 +294,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	flow->classid = classid;
 	flow->ref = 1;
 	flow->excess = excess;
-	flow->next = p->link.next;
-	p->link.next = flow;
+	list_add(&flow->list, &p->link.list);
 	flow->hdr_len = hdr_len;
 	if (hdr)
 		memcpy(flow->hdr, hdr, hdr_len);
@@ -335,7 +315,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
 	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
 
 	pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
-	if (!find_flow(qdisc_priv(sch), flow))
+	if (list_empty(&flow->list))
 		return -EINVAL;
 	if (flow->filter_list || flow == &p->link)
 		return -EBUSY;
@@ -361,12 +341,12 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 	pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
 	if (walker->stop)
 		return;
-	for (flow = p->flows; flow; flow = flow->next) {
-		if (walker->count >= walker->skip)
-			if (walker->fn(sch, (unsigned long)flow, walker) < 0) {
-				walker->stop = 1;
-				break;
-			}
+	list_for_each_entry(flow, &p->flows, list) {
+		if (walker->count >= walker->skip &&
+		    walker->fn(sch, (unsigned long)flow, walker) < 0) {
+			walker->stop = 1;
+			break;
+		}
 		walker->count++;
 	}
 }
@@ -385,16 +365,17 @@ static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
 static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow = NULL;	/* @@@ */
+	struct atm_flow_data *flow;
 	struct tcf_result res;
 	int result;
 	int ret = NET_XMIT_POLICED;
 
 	pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
 	result = TC_POLICE_OK;	/* be nice to gcc */
+	flow = NULL;
 	if (TC_H_MAJ(skb->priority) != sch->handle ||
-	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority)))
-		for (flow = p->flows; flow; flow = flow->next)
+	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
+		list_for_each_entry(flow, &p->flows, list) {
 			if (flow->filter_list) {
 				result = tc_classify_compat(skb,
 							    flow->filter_list,
@@ -404,8 +385,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				flow = (struct atm_flow_data *)res.class;
 				if (!flow)
 					flow = lookup_flow(sch, res.classid);
-				break;
+				goto done;
 			}
+		}
+		flow = NULL;
+	done:
+		;		
+	}
 	if (!flow)
 		flow = &p->link;
 	else {
@@ -477,7 +463,9 @@ static void sch_atm_dequeue(unsigned long data)
 	struct sk_buff *skb;
 
 	pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->link.next; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list) {
+		if (flow == &p->link)
+			continue;
 		/*
 		 * If traffic is properly shaped, this won't generate nasty
 		 * little bursts. Otherwise, it may ... (but that's okay)
@@ -512,6 +500,7 @@ static void sch_atm_dequeue(unsigned long data)
 			/* atm.atm_options are already set by atm_tc_enqueue */
 			flow->vcc->send(flow->vcc, skb);
 		}
+	}
 }
 
 static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
@@ -543,9 +532,10 @@ static unsigned int atm_tc_drop(struct Qdisc *sch)
 	unsigned int len;
 
 	pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list) {
 		if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
 			return len;
+	}
 	return 0;
 }
 
@@ -554,7 +544,9 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 
 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
-	p->flows = &p->link;
+	INIT_LIST_HEAD(&p->flows);
+	INIT_LIST_HEAD(&p->link.list);
+	list_add(&p->link.list, &p->flows);
 	p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 				      &pfifo_qdisc_ops, sch->handle);
 	if (!p->link.q)
@@ -565,7 +557,6 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 	p->link.sock = NULL;
 	p->link.classid = sch->handle;
 	p->link.ref = 1;
-	p->link.next = NULL;
 	tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
 	return 0;
 }
@@ -576,7 +567,7 @@ static void atm_tc_reset(struct Qdisc *sch)
 	struct atm_flow_data *flow;
 
 	pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list)
 		qdisc_reset(flow->q);
 	sch->q.qlen = 0;
 }
@@ -584,24 +575,17 @@ static void atm_tc_reset(struct Qdisc *sch)
 static void atm_tc_destroy(struct Qdisc *sch)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow;
+	struct atm_flow_data *flow, *tmp;
 
 	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list)
 		tcf_destroy_chain(&flow->filter_list);
 
-	/* races ? */
-	while ((flow = p->flows)) {
+	list_for_each_entry_safe(flow, tmp, &p->flows, list) {
 		if (flow->ref > 1)
 			printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
 			       flow->ref);
 		atm_tc_put(sch, (unsigned long)flow);
-		if (p->flows == flow) {
-			printk(KERN_ERR "atm_destroy: putting flow %p didn't "
-			       "kill it\n", flow);
-			p->flows = flow->next;	/* brute force */
-			break;
-		}
 	}
 	tasklet_kill(&p->task);
 }
@@ -615,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 
 	pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
 		sch, p, flow, skb, tcm);
-	if (!find_flow(p, flow))
+	if (list_empty(&flow->list))
 		return -EINVAL;
 	tcm->tcm_handle = flow->classid;
 	tcm->tcm_info = flow->q->handle;
-- 
1.7.1.1


^ permalink raw reply related

* [PATCH] LSM: Add post accept() hook.
From: Tetsuo Handa @ 2010-07-19  4:25 UTC (permalink / raw)
  To: davem, eric.dumazet, jmorris, paul.moore, sam, serge
  Cc: netdev, linux-security-module
In-Reply-To: <20100718.142550.25115105.davem@davemloft.net>

David Miller wrote:
> > Eric Dumazet wrote:
> >> I read this patch and could not find out if an SNMP counter was
> >> increased in the case a frame was not delivered but dropped in kernel
> >> land.
> > 
> > UDP_MIB_INDATAGRAMS and UDP_MIB_INERRORS will not be increased
> > if dropped by security_socket_post_recvmsg()'s decision.
> > Should we increment UDP_MIB_INDATAGRAMS and/or UDP_MIB_INERRORS?
> 
> This decision should be guided by what we do for in the case
> of the other existing security hooks.
> 
> I don't think it makes any sense to make the post recvmsg() hook
> behave any differently from the existing hooks in this regard.

I see. Thank you.

I was misunderstanding assumption on select() -> recvmsg() sequence.
I was thinking that:

  If select() said "read operation will not block", the caller of recvmsg() can
  assume that recvmsg() which is preceded by select() will not be blocked.
  (The caller cannot assume that subsequent recvmsg() preceded by previous
  recvmsg() will not be blocked.) Therefore, the kernel must not wait for next
  message if current message was discarded by post recvmsg LSM hook. (And I
  thought that returning error code to the caller is the only way because the
  caller might be assuming that recvmsg() preceded by select() will not be
  blocked.)

But I understood that:

  Even if select() said "read operation will not block", the caller of recvmsg()
  can't assume that recvmsg() which is preceded by select() will not be blocked
  unless MSG_DONTWAIT or O_NONBLOCK was set.
  Therefore, the kernel is allowed to wait for next message if current message
  was discarded by post recvmsg LSM hook unless MSG_DONTWAIT or O_NONBLOCK was
  set.

Now, I'm thinking the same thing for select() -> accept() sequence:

  Even if select() said "read operation will not block", the caller of accept()
  can't assume that accept() which is preceded by select() will not be blocked
  unless MSG_DONTWAIT or O_NONBLOCK was set.
  Therefore, the kernel is allowed to wait for next connection if current
  connection was discarded by post accept LSM hook unless MSG_DONTWAIT or
  O_NONBLOCK was set.

Although "security_socket_post_accept() without retry loop" was proposed
in the past ( http://lkml.org/lkml/2010/3/2/297 ), I think I can propose
"security_socket_post_accept() with retry loop" (patch attached below)
if select() -> accept() case I wrote above is correct.

I can live with "security_socket_post_accept() without retry loop" by assigning
magic value to SOCK_INODE("struct socket *")->i_security field
( tomoyo_dead_sock() in http://lkml.org/lkml/2009/10/4/56 ) but below patch is
better for me because TOMOYO will not require the i_security field (which will
make it easier to realize LSM stacking/chaining) and will not need to implement
all LSM hooks for socket operations only for checking the i_security field.

May I have your opinion for below version?

Regards.
----------------------------------------
>From 54bc4ffee7998423e8c2d3a5cc9dfc647d5a892b Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 17 Jul 2010 12:04:18 +0900
Subject: [PATCH] LSM: Add post accept() hook.

Current pre accept hook (i.e. security_socket_accept()) has two problems.

One is that it will cause eating 100% of CPU time if the caller does not
close() the socket when accept() failed due to security_socket_accept(), for
subsequent select() notifies the caller of readiness for accept() since the
connection which would have been already picked up if security_socket_accept()
did not return error is remaining in the queue.

The other is that it is racy if LSM module wants to do filtering based on
"which process can pick up connections from which source" because the process
which picks up the connection is not known until sock->ops->accept() and lock
is not held between security_socket_accept() and sock->ops->accept.

This patch introduces post accept hook (i.e. security_socket_post_accept()) in
order to solve above problems at the cost of ability to pick up the connection
which would have been picked up if preceding security_socket_post_accept() did
not return error.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 include/linux/security.h |   21 +++++++++++++++++++++
 net/socket.c             |    7 +++++++
 security/capability.c    |    6 ++++++
 security/security.c      |    5 +++++
 4 files changed, 39 insertions(+), 0 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 409c44d..2ed73c1 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -866,6 +866,19 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@sock contains the listening socket structure.
  *	@newsock contains the newly created server socket for connection.
  *	Return 0 if permission is granted.
+ * @socket_post_accept:
+ *	Check permission after accepting a new connection.
+ *	The connection is discarded if permission is not granted.
+ *	Return 0 after updating security information on the socket if you want
+ *	to restrict some of socket syscalls on the connection (e.g. forbid only
+ *	sending data). But you can't use this hook for updating security
+ *	information of the socket for preventing the connection from receiving
+ *	incoming data, for the kernel already started receiving incoming data
+ *	before accept() syscall. Return error if updating security information
+ *	failed or you want to forbid all of socket syscalls on the connection.
+ *	@sock contains the listening socket structure.
+ *	@newsock contains the accepted socket structure.
+ *	Return 0 if permission is granted.
  * @socket_sendmsg:
  *	Check permission before transmitting a message to another socket.
  *	@sock contains the socket structure.
@@ -1577,6 +1590,7 @@ struct security_operations {
 			       struct sockaddr *address, int addrlen);
 	int (*socket_listen) (struct socket *sock, int backlog);
 	int (*socket_accept) (struct socket *sock, struct socket *newsock);
+	int (*socket_post_accept) (struct socket *sock, struct socket *newsock);
 	int (*socket_sendmsg) (struct socket *sock,
 			       struct msghdr *msg, int size);
 	int (*socket_recvmsg) (struct socket *sock,
@@ -2530,6 +2544,7 @@ int security_socket_bind(struct socket *sock, struct sockaddr *address, int addr
 int security_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen);
 int security_socket_listen(struct socket *sock, int backlog);
 int security_socket_accept(struct socket *sock, struct socket *newsock);
+int security_socket_post_accept(struct socket *sock, struct socket *newsock);
 int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size);
 int security_socket_recvmsg(struct socket *sock, struct msghdr *msg,
 			    int size, int flags);
@@ -2612,6 +2627,12 @@ static inline int security_socket_accept(struct socket *sock,
 	return 0;
 }
 
+static inline int security_socket_post_accept(struct socket *sock,
+					      struct socket *newsock)
+{
+	return 0;
+}
+
 static inline int security_socket_sendmsg(struct socket *sock,
 					  struct msghdr *msg, int size)
 {
diff --git a/net/socket.c b/net/socket.c
index 367d547..97d644c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1473,6 +1473,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 	if (!sock)
 		goto out;
 
+ retry:
 	err = -ENFILE;
 	if (!(newsock = sock_alloc()))
 		goto out_put;
@@ -1500,6 +1501,12 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
 	if (err < 0)
 		goto out_fd;
+	err = security_socket_post_accept(sock, newsock);
+	if (unlikely(err)) {
+		fput(newfile);
+		put_unused_fd(newfd);
+		goto retry;
+	}
 
 	if (upeer_sockaddr) {
 		if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
diff --git a/security/capability.c b/security/capability.c
index 709aea3..1fb88f5 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -586,6 +586,11 @@ static int cap_socket_accept(struct socket *sock, struct socket *newsock)
 	return 0;
 }
 
+static int cap_socket_post_accept(struct socket *sock, struct socket *newsock)
+{
+	return 0;
+}
+
 static int cap_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 {
 	return 0;
@@ -1004,6 +1009,7 @@ void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, socket_connect);
 	set_to_cap_if_null(ops, socket_listen);
 	set_to_cap_if_null(ops, socket_accept);
+	set_to_cap_if_null(ops, socket_post_accept);
 	set_to_cap_if_null(ops, socket_sendmsg);
 	set_to_cap_if_null(ops, socket_recvmsg);
 	set_to_cap_if_null(ops, socket_post_recvmsg);
diff --git a/security/security.c b/security/security.c
index 4291bd7..5c9ab0a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1026,6 +1026,11 @@ int security_socket_accept(struct socket *sock, struct socket *newsock)
 	return security_ops->socket_accept(sock, newsock);
 }
 
+int security_socket_post_accept(struct socket *sock, struct socket *newsock)
+{
+	return security_ops->socket_post_accept(sock, newsock);
+}
+
 int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 {
 	return security_ops->socket_sendmsg(sock, msg, size);
-- 
1.6.1

^ permalink raw reply related

* Problem with passing ptype_base list as a function argument
From: Abhishek @ 2010-07-19  5:01 UTC (permalink / raw)
  To: netdev

Hi all

I want to pass ptype_base list as an argument to a function defined in dev.c. 
The problem that I am facing once this is done is a kernel crash after execution 
of the kernel with the patch. Please let me know why there is crash? What is 
going wrong over here? Do we require any other lock apart from rcu_read_lock() 
which is already part of dev.c?

The relevant details are as follows:

Kernel version: 2.6.27-17.46
Defined Function name and definition:
 
struct sk_buff * foo(struct sk_buff*, struct list_head *)
{
         struct packet_type *ptype, *pt_prev;
         struct net_device *orig_dev;
         struct net_device *null_or_orig;
         int ret = NET_RX_DROP;
         __be16 type;

       pt_prev = NULL;
       null_or_orig = NULL;
       orig_dev = skb->dev;
       type = skb->protocol;

        list_for_each_entry_rcu(ptype, head, list) {
                 if (ptype->type == type && (ptype->dev == null_or_orig ||
                      ptype->dev == skb->dev || ptype->dev == orig_dev ||
                      ptype->dev == null_or_bond)) {
                         if (pt_prev)
                                 ret = deliver_skb(skb, pt_prev, orig_dev);
                         pt_prev = ptype;
                 }
         }
 
         if (pt_prev) {
                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
         } else {
                 kfree_skb(skb);
                 /* Jamal, now you will not able to escape explaining
                  * me how you were going to use this. :-)
                  */
                 ret = NET_RX_DROP;
         }
    if (ret)
      return skb;
 return NULL;
}

net/core/dev.c Patch:

 75 #include <asm/uaccess.h>
 76 #include <asm/system.h>
 77 #include <linux/bitops.h>
 78 #include <linux/capability.h>
 79 #include <linux/cpu.h>
 80 #include <linux/types.h>
 81 #include <linux/kernel.h>
 82 #include <linux/hash.h>
 83 #include <linux/slab.h>
 84 #include <linux/sched.h>
 85 #include <linux/mutex.h>
 86 #include <linux/string.h>
 87 #include <linux/mm.h>
 88 #include <linux/socket.h>
 89 #include <linux/sockios.h>
 90 #include <linux/errno.h>
 91 #include <linux/interrupt.h>
 92 #include <linux/if_ether.h>
 93 #include <linux/netdevice.h>
 94 #include <linux/etherdevice.h>
 95 #include <linux/ethtool.h>
 96 #include <linux/notifier.h>
 97 #include <linux/skbuff.h>
 98 #include <net/net_namespace.h>
 99 #include <net/sock.h>
100 #include <linux/rtnetlink.h>
101 #include <linux/proc_fs.h>
102 #include <linux/seq_file.h>
103 #include <linux/stat.h>
104 #include <linux/if_bridge.h>
105 #include <linux/if_macvlan.h>
106 #include <net/dst.h>
107 #include <net/pkt_sched.h>
108 #include <net/checksum.h>
109 #include <net/xfrm.h>
110 #include <linux/highmem.h>
111 #include <linux/init.h>
112 #include <linux/kmod.h>
113 #include <linux/module.h>
114 #include <linux/netpoll.h>
115 #include <linux/rcupdate.h>
116 #include <linux/delay.h>
117 #include <net/wext.h>
118 #include <net/iw_handler.h>
119 #include <asm/current.h>
120 #include <linux/audit.h>
121 #include <linux/dmaengine.h>
122 #include <linux/err.h>
123 #include <linux/ctype.h>
124 #include <linux/if_arp.h>
125 #include <linux/if_vlan.h>
126 #include <linux/ip.h>
127 #include <net/ip.h>
128 #include <linux/ipv6.h>
129 #include <linux/in.h>
130 #include <linux/jhash.h>
131 #include <linux/random.h>
132 #include <trace/events/napi.h>
133 
134 #include "net-sysfs.h"
135 
136 /* Instead of increasing this, you should create a hash table. */
137 #define MAX_GRO_SKBS 8
138 
139 /* This should be increased if a protocol with a bigger head is added. */
140 #define GRO_MAX_HEAD (MAX_HEADER + 128)
141 
142 /*
143  *      The list of packet types we will receive (as opposed to discard)
144  *      and the routines to invoke.
145  *
146  *      Why 16. Because with 16 the only overlap we get on a hash of the
147  *      low nibble of the protocol value is RARP/SNAP/X.25.
148  *
149  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
150  *             sure which should go first, but I bet it won't make much
151  *             difference if we are running VLANs.  The good news is that
152  *             this protocol won't be in the list unless compiled in, so
153  *             the average user (w/out VLANs) will not be adversely 
affected.
154  *             --BLG
155  *
156  *              0800    IP
157  *              8100    802.1Q VLAN
158  *              0001    802.3
159  *              0002    AX.25
160  *              0004    802.2
161  *              8035    RARP
162  *              0005    SNAP
163  *              0805    X.25
164  *              0806    ARP
165  *              8137    IPX
166  *              0009    Localtalk
167  *              86DD    IPv6
168  */
169 
170 #define PTYPE_HASH_SIZE (16)
171 #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
172 
173 static DEFINE_SPINLOCK(ptype_lock);
174 static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
175 static struct list_head ptype_all __read_mostly;        /* Taps */
 
+ struct sk_buff* foo(struct sk_buff*, struct list_head *);

             .
             .
             .
             .

int netif_receive_skb(struct sk_buff *skb)
2487 {
2488         struct packet_type *ptype, *pt_prev;
2489         struct net_device *orig_dev;
2490         struct net_device *master;
2491         struct net_device *null_or_orig;
2492         struct net_device *null_or_bond;
2493         int ret = NET_RX_DROP;
2494         __be16 type;
2495 
2496         if (!skb->tstamp.tv64)
2497                 net_timestamp(skb);
2498 
2499         if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2500                 return NET_RX_SUCCESS;
2501 
2502         /* if we've gotten here through NAPI, check netpoll */
2503         if (netpoll_receive_skb(skb))
2504                 return NET_RX_DROP;
2505 
2506         if (!skb->skb_iif)
2507                 skb->skb_iif = skb->dev->ifindex;
2508 
2509         null_or_orig = NULL;
2510         orig_dev = skb->dev;
2511         master = ACCESS_ONCE(orig_dev->master);
2512         if (master) {
2513                 if (skb_bond_should_drop(skb, master))
2514                         null_or_orig = orig_dev; /* deliver only exact 
match */
2515                 else
2516                         skb->dev = master;
2517         }
2518 
2519         __get_cpu_var(netdev_rx_stat).total++;
2520 
2521         skb_reset_network_header(skb);
2522         skb_reset_transport_header(skb);
2523         skb->mac_len = skb->network_header - skb->mac_header;
2524 
2525         pt_prev = NULL;
2526 
2527         rcu_read_lock();
2528 
2529 #ifdef CONFIG_NET_CLS_ACT
2530         if (skb->tc_verd & TC_NCLS) {
2531                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2532                 goto ncls;
2533         }
2534 #endif
2535 
2536         list_for_each_entry_rcu(ptype, &ptype_all, list) {
2537                 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2538                     ptype->dev == orig_dev) {
2539                         if (pt_prev)
2540                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2541                         pt_prev = ptype;
2542                 }
2543         }
2544 
2545 #ifdef CONFIG_NET_CLS_ACT
2546         skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2547         if (!skb)
2548                 goto out;
2549 ncls:
2550 #endif

+ skb = foo(skb, &ptype_base[ntohs(skb-<protocol) & PTYPE_HASK_MASK]);
 
2552         skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2553         if (!skb)
2554                 goto out;
2555         skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2556         if (!skb)
2557                 goto out;
2558 
2559         /*
2560          * Make sure frames received on VLAN interfaces stacked on
2561          * bonding interfaces still make their way to any base bonding
2562          * device that may have registered for a specific ptype.  The
2563          * handler may have to adjust skb->dev and orig_dev.
2564          */
2565         null_or_bond = NULL;
2566         if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2567             (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
2568                 null_or_bond = vlan_dev_real_dev(skb->dev);
2569         }
2570 
2571         type = skb->protocol;
2572         list_for_each_entry_rcu(ptype,
2573                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2574                 if (ptype->type == type && (ptype->dev == null_or_orig ||
2575                      ptype->dev == skb->dev || ptype->dev == orig_dev ||
2576                      ptype->dev == null_or_bond)) {
2577                         if (pt_prev)
2578                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2579                         pt_prev = ptype;
2580                 }
2581         }
2582 
2583         if (pt_prev) {
2584                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2585         } else {
2586                 kfree_skb(skb);
2587                 /* Jamal, now you will not able to escape explaining
2588                  * me how you were going to use this. :-)
2589                  */
2590                 ret = NET_RX_DROP;
2591         }
2592 
2593 out:
2594         rcu_read_unlock();
2595         return ret;
2596 }
2597 EXPORT_SYMBOL(netif_receive_skb);


^ permalink raw reply

* Re: oops in tcp_xmit_retransmit_queue() w/ v2.6.32.15
From: Lennart Schulte @ 2010-07-19  8:06 UTC (permalink / raw)
  To: Ilpo Järvinen
  Cc: David S. Miller, Eric Dumazet, Tejun Heo, lkml,
	netdev@vger.kernel.org, Fehrmann, Henning, Carsten Aulbert
In-Reply-To: <alpine.DEB.2.00.1007161602510.13946@melkinpaasi.cs.helsinki.fi>

I ran tests for about 2 hours with this patch and I got no output from 
the debug patch. This seems to have solved at least my problem :)

Thanks!
> [PATCH] tcp: fix crash in tcp_xmit_retransmit_queue
>
> It can happen that there are no packets in queue while calling
> tcp_xmit_retransmit_queue(). tcp_write_queue_head() then returns
> NULL and that gets deref'ed to get sacked into a local var.
>
> There is no work to do if no packets are outstanding so we just
> exit early.
>
> There may still be another bug affecting this same function.
>
> Signed-off-by: Ilpo Järvinen<ilpo.jarvinen@helsinki.fi>
> Reported-by: Lennart Schulte<lennart.schulte@nets.rwth-aachen.de>
> ---
>   net/ipv4/tcp_output.c |    3 +++
>   1 files changed, 3 insertions(+), 0 deletions(-)
>
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index b4ed957..7ed9dc1 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -2208,6 +2208,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
>   	int mib_idx;
>   	int fwd_rexmitting = 0;
>
> +	if (!tp->packets_out)
> +		return;
> +
>   	if (!tp->lost_out)
>   		tp->retransmit_high = tp->snd_una;
>
>    

^ permalink raw reply

* RTL8180 : improper usage of dma_addr_t
From: August @ 2010-07-19  8:23 UTC (permalink / raw)
  To: netdev

linux-kernel-version : 2.6.34
drivers/net/wireless/rtl818x/rtl8180_dev.c
__LINE__ :  500
cpu_to_le32((u32)dma + ((i + 1) % entries) * sizeof(*ring));

Although in the x86 infrastruct, the value of dma_addr == line addr,
it is inproper usage here.

suggestion:
cpu_to_le32((u32)(&priv[0]) + ((i + 1) % entries) * sizeof(*ring));

^ permalink raw reply

* Re: Problem with passing ptype_base list as a function argument
From: Abhishek @ 2010-07-19  8:53 UTC (permalink / raw)
  To: netdev
In-Reply-To: <loom.20100719T063120-919@post.gmane.org>

In addition to the aforementioned information, I would like add one more thing: 
the function foo() belongs to a separate module and is not part of the dev.c 
file. Hence, dev.c also includes the function exporting line: 
EXPORT_SYMBOL(foo);



^ permalink raw reply

* [PATCH] s2io: Remove unnecessary memset of netdev private data
From: Tobias Klauser @ 2010-07-19  8:55 UTC (permalink / raw)
  To: netdev, David S. Miller; +Cc: kernel-janitors

The memory for the private data is allocated using kzalloc in
alloc_etherdev (or alloc_netdev_mq respectively) so there is no need to
set it to 0 again.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
---
 drivers/net/s2io.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index d0af924..aa6cbb0 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -7886,7 +7886,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 
 	/*  Private member variable initialized to s2io NIC structure */
 	sp = netdev_priv(dev);
-	memset(sp, 0, sizeof(struct s2io_nic));
 	sp->dev = dev;
 	sp->pdev = pdev;
 	sp->high_dma_flag = dma_flag;
-- 
1.7.0.4


^ permalink raw reply related

* Re: [0/8] netpoll/bridge fixes
From: Michael S. Tsirkin @ 2010-07-19 10:19 UTC (permalink / raw)
  To: Herbert Xu
  Cc: Stephen Hemminger, Qianfeng Zhang, David S. Miller, netdev,
	WANG Cong, Matt Mackall
In-Reply-To: <20100611021142.GA24490@gondor.apana.org.au>

On Fri, Jun 11, 2010 at 12:11:42PM +1000, Herbert Xu wrote:
> On Fri, Jun 11, 2010 at 08:48:39AM +1000, Herbert Xu wrote:
> > On Thu, Jun 10, 2010 at 02:59:15PM -0700, Stephen Hemminger wrote:
> > >
> > > Okay, then add a comment where in_irq is used?
> > 
> > Actually let me put it into a wrapper.  I'll respin the patches.
> 
> OK here is a repost.  And this time it really is 8 patches :)
> I've tested it lightly.
> 
> Cheers,

Meanwhile, should we just disable netpoll for bridge in 2.6.35 and -stable?
We are getting crash reports in virtualization which I suspect are
related to this:
https://bugzilla.kernel.org/show_bug.cgi?id=16413

> -- 
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [0/8] netpoll/bridge fixes
From: Herbert Xu @ 2010-07-19 10:53 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Stephen Hemminger, Qianfeng Zhang, David S. Miller, netdev,
	WANG Cong, Matt Mackall
In-Reply-To: <20100719101904.GA31743@redhat.com>

On Mon, Jul 19, 2010 at 01:19:04PM +0300, Michael S. Tsirkin wrote:
> On Fri, Jun 11, 2010 at 12:11:42PM +1000, Herbert Xu wrote:
> > On Fri, Jun 11, 2010 at 08:48:39AM +1000, Herbert Xu wrote:
> > > On Thu, Jun 10, 2010 at 02:59:15PM -0700, Stephen Hemminger wrote:
> > > >
> > > > Okay, then add a comment where in_irq is used?
> > > 
> > > Actually let me put it into a wrapper.  I'll respin the patches.
> > 
> > OK here is a repost.  And this time it really is 8 patches :)
> > I've tested it lightly.
> > 
> > Cheers,
> 
> Meanwhile, should we just disable netpoll for bridge in 2.6.35 and -stable?
> We are getting crash reports in virtualization which I suspect are
> related to this:
> https://bugzilla.kernel.org/show_bug.cgi?id=16413

I think that's probably the best solution, Dave?

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCHv2] tcp: fix crash in tcp_xmit_retransmit_queue
From: Ilpo Järvinen @ 2010-07-19 11:16 UTC (permalink / raw)
  To: Lennart Schulte, David Miller
  Cc: Eric Dumazet, Tejun Heo, lkml, netdev@vger.kernel.org,
	Fehrmann, Henning, Carsten Aulbert
In-Reply-To: <4C440771.7080107@nets.rwth-aachen.de>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1951 bytes --]

On Mon, 19 Jul 2010, Lennart Schulte wrote:

> I ran tests for about 2 hours with this patch and I got no output from the
> debug patch. This seems to have solved at least my problem :)
> 
> Thanks!
> > [PATCH] tcp: fix crash in tcp_xmit_retransmit_queue
> > 
> > It can happen that there are no packets in queue while calling
> > tcp_xmit_retransmit_queue(). tcp_write_queue_head() then returns
> > NULL and that gets deref'ed to get sacked into a local var.
> > 
> > There is no work to do if no packets are outstanding so we just
> > exit early.
> > 
> > There may still be another bug affecting this same function.

Thanks for testing.

DaveM, I think this oops was introduced for 2.6.28 (in 
08ebd1721ab8fd362e90ae17b461c07b23fa2824 it seems, to be exact) so to 
stables it should go too please. I've only tweaked the message (so no need 
for Lennart to retest v2 :-)).

--
[PATCHv2] tcp: fix crash in tcp_xmit_retransmit_queue

It can happen that there are no packets in queue while calling
tcp_xmit_retransmit_queue(). tcp_write_queue_head() then returns
NULL and that gets deref'ed to get sacked into a local var.

There is no work to do if no packets are outstanding so we just
exit early.

This oops was introduced by 08ebd1721ab8fd (tcp: remove tp->lost_out
guard to make joining diff nicer).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Reported-by: Lennart Schulte <lennart.schulte@nets.rwth-aachen.de>
Tested-by: Lennart Schulte <lennart.schulte@nets.rwth-aachen.de>
---
 net/ipv4/tcp_output.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4ed957..7ed9dc1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2208,6 +2208,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	int mib_idx;
 	int fwd_rexmitting = 0;
 
+	if (!tp->packets_out)
+		return;
+
 	if (!tp->lost_out)
 		tp->retransmit_high = tp->snd_una;
 
-- 
1.5.6.5

^ permalink raw reply related

* Re: [0/8] netpoll/bridge fixes
From: Herbert Xu @ 2010-07-19 11:54 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Stephen Hemminger, Qianfeng Zhang, David S. Miller, netdev,
	WANG Cong, Matt Mackall
In-Reply-To: <20100719105300.GA22179@gondor.apana.org.au>

On Mon, Jul 19, 2010 at 06:53:00PM +0800, Herbert Xu wrote:
>
> > Meanwhile, should we just disable netpoll for bridge in 2.6.35 and -stable?
> > We are getting crash reports in virtualization which I suspect are
> > related to this:
> > https://bugzilla.kernel.org/show_bug.cgi?id=16413
> 
> I think that's probably the best solution, Dave?

I take that back :)

It turns out that 16413 has nothing to do with bridge netpoll
(which was not merged until after 2.6.34) since he's running
2.6.34.

Still, it might be a good idea to disable bridge netpoll in
2.6.35.

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCH] smsc911x: Add spinlocks around registers access
From: Catalin Marinas @ 2010-07-19 12:42 UTC (permalink / raw)
  To: netdev; +Cc: Steve Glendinning, stable

On SMP systems, the SMSC911x registers may be accessed by multiple CPUs
and this seems to put the chip in an inconsistent state. The patch adds
spinlocks to the smsc911x_reg_read, smsc911x_reg_write,
smsc911x_rx_readfifo and smsc911x_tx_writefifo functions.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Steve Glendinning <steve.glendinning@smsc.com>
Cc: stable@kernel.org
---

I've had this patch in my tree for some time. It's the only way I can
get the ARM SMP systems using this controller to work reliable. I
haven't got an ack from Steve yet but the only alternative is to mark
this driver BROKEN_ON_SMP and revert the ARM boards to use the old
smc911x.c driver.

Thanks.


 drivers/net/smsc911x.c |   92 +++++++++++++++++++++++++++---------------------
 1 files changed, 52 insertions(+), 40 deletions(-)

diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index cc55974..7a7b01a 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -84,8 +84,7 @@ struct smsc911x_data {
 	 */
 	spinlock_t mac_lock;
 
-	/* spinlock to ensure 16-bit accesses are serialised.
-	 * unused with a 32-bit bus */
+	/* spinlock to ensure register accesses are serialised */
 	spinlock_t dev_lock;
 
 	struct phy_device *phy_dev;
@@ -118,37 +117,33 @@ struct smsc911x_data {
 	unsigned int hashlo;
 };
 
-/* The 16-bit access functions are significantly slower, due to the locking
- * necessary.  If your bus hardware can be configured to do this for you
- * (in response to a single 32-bit operation from software), you should use
- * the 32-bit access functions instead. */
-
-static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
+static inline u32 __smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
 {
 	if (pdata->config.flags & SMSC911X_USE_32BIT)
 		return readl(pdata->ioaddr + reg);
 
-	if (pdata->config.flags & SMSC911X_USE_16BIT) {
-		u32 data;
-		unsigned long flags;
-
-		/* these two 16-bit reads must be performed consecutively, so
-		 * must not be interrupted by our own ISR (which would start
-		 * another read operation) */
-		spin_lock_irqsave(&pdata->dev_lock, flags);
-		data = ((readw(pdata->ioaddr + reg) & 0xFFFF) |
+	if (pdata->config.flags & SMSC911X_USE_16BIT)
+		return ((readw(pdata->ioaddr + reg) & 0xFFFF) |
 			((readw(pdata->ioaddr + reg + 2) & 0xFFFF) << 16));
-		spin_unlock_irqrestore(&pdata->dev_lock, flags);
-
-		return data;
-	}
 
 	BUG();
 	return 0;
 }
 
-static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
-				      u32 val)
+static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
+{
+	u32 data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pdata->dev_lock, flags);
+	data = __smsc911x_reg_read(pdata, reg);
+	spin_unlock_irqrestore(&pdata->dev_lock, flags);
+
+	return data;
+}
+
+static inline void __smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
+					u32 val)
 {
 	if (pdata->config.flags & SMSC911X_USE_32BIT) {
 		writel(val, pdata->ioaddr + reg);
@@ -156,44 +151,54 @@ static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
 	}
 
 	if (pdata->config.flags & SMSC911X_USE_16BIT) {
-		unsigned long flags;
-
-		/* these two 16-bit writes must be performed consecutively, so
-		 * must not be interrupted by our own ISR (which would start
-		 * another read operation) */
-		spin_lock_irqsave(&pdata->dev_lock, flags);
 		writew(val & 0xFFFF, pdata->ioaddr + reg);
 		writew((val >> 16) & 0xFFFF, pdata->ioaddr + reg + 2);
-		spin_unlock_irqrestore(&pdata->dev_lock, flags);
 		return;
 	}
 
 	BUG();
 }
 
+static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
+				      u32 val)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&pdata->dev_lock, flags);
+	__smsc911x_reg_write(pdata, reg, val);
+	spin_unlock_irqrestore(&pdata->dev_lock, flags);
+}
+
 /* Writes a packet to the TX_DATA_FIFO */
 static inline void
 smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf,
 		      unsigned int wordcount)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&pdata->dev_lock, flags);
+
 	if (pdata->config.flags & SMSC911X_SWAP_FIFO) {
 		while (wordcount--)
-			smsc911x_reg_write(pdata, TX_DATA_FIFO, swab32(*buf++));
-		return;
+			__smsc911x_reg_write(pdata, TX_DATA_FIFO,
+					     swab32(*buf++));
+		goto out;
 	}
 
 	if (pdata->config.flags & SMSC911X_USE_32BIT) {
 		writesl(pdata->ioaddr + TX_DATA_FIFO, buf, wordcount);
-		return;
+		goto out;
 	}
 
 	if (pdata->config.flags & SMSC911X_USE_16BIT) {
 		while (wordcount--)
-			smsc911x_reg_write(pdata, TX_DATA_FIFO, *buf++);
-		return;
+			__smsc911x_reg_write(pdata, TX_DATA_FIFO, *buf++);
+		goto out;
 	}
 
 	BUG();
+out:
+	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 }
 
 /* Reads a packet out of the RX_DATA_FIFO */
@@ -201,24 +206,31 @@ static inline void
 smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf,
 		     unsigned int wordcount)
 {
+	unsigned long flags;
+
+	spin_lock_irqsave(&pdata->dev_lock, flags);
+
 	if (pdata->config.flags & SMSC911X_SWAP_FIFO) {
 		while (wordcount--)
-			*buf++ = swab32(smsc911x_reg_read(pdata, RX_DATA_FIFO));
-		return;
+			*buf++ = swab32(__smsc911x_reg_read(pdata,
+							    RX_DATA_FIFO));
+		goto out;
 	}
 
 	if (pdata->config.flags & SMSC911X_USE_32BIT) {
 		readsl(pdata->ioaddr + RX_DATA_FIFO, buf, wordcount);
-		return;
+		goto out;
 	}
 
 	if (pdata->config.flags & SMSC911X_USE_16BIT) {
 		while (wordcount--)
-			*buf++ = smsc911x_reg_read(pdata, RX_DATA_FIFO);
-		return;
+			*buf++ = __smsc911x_reg_read(pdata, RX_DATA_FIFO);
+		goto out;
 	}
 
 	BUG();
+out:
+	spin_unlock_irqrestore(&pdata->dev_lock, flags);
 }
 
 /* waits for MAC not busy, with timeout.  Only called by smsc911x_mac_read


^ permalink raw reply related

* [BUG net-next-2.6] vlan, bonding, bnx2 problems
From: Eric Dumazet @ 2010-07-19 13:24 UTC (permalink / raw)
  To: David Miller, Michael Chan; +Cc: pedro.netdev, netdev, kaber, bhutchings
In-Reply-To: <20100718.153910.67919508.davem@davemloft.net>

Le dimanche 18 juillet 2010 à 15:39 -0700, David Miller a écrit :
> From: Pedro Garcia <pedro.netdev@dondevamos.com>
> Date: Sun, 18 Jul 2010 18:43:25 +0200
> 
> > - Without the 8021q module loaded in the kernel, all 802.1p packets 
> > (VLAN 0 but QoS tagging) are silently discarded (as expected, as 
> > the protocol is not loaded).
> >  
> > - Without this patch in 8021q module, these packets are forwarded to 
> > the module, but they are discarded also if VLAN 0 is not configured,
> > which should not be the default behaviour, as VLAN 0 is not really
> > a VLANed packet but a 802.1p packet. Defining VLAN 0 makes it almost
> > impossible to communicate with mixed 802.1p and non 802.1p devices on
> > the same network due to arp table issues.
> > 
> > - Changed logic to skip vlan specific code in vlan_skb_recv if VLAN 
> > is 0 and we have not defined a VLAN with ID 0, but we accept the 
> > packet with the encapsulated proto and pass it later to netif_rx.
> > 
> > - In the vlan device event handler, added some logic to add VLAN 0 
> > to HW filter in devices that support it (this prevented any traffic
> > in VLAN 0 to reach the stack in e1000e with HW filter under 2.6.35,
> > and probably also with other HW filtered cards, so we fix it here).
> > 
> > - In the vlan unregister logic, prevent the elimination of VLAN 0 
> > in devices with HW filter.
> > 
> > - The default behaviour is to ignore the VLAN 0 tagging and accept
> > the packet as if it was not tagged, but we can still define a 
> > VLAN 0 if desired (so it is backwards compatible).
> > 
> > Signed-off-by: Pedro Garcia <pedro.netdev@dondevamos.com>
> 
> Applied, thanks Pedro.

Hmm, current net-next-2.6 is not working with bonding and bnx2.

I got some fatal oops.

modprobe bond0
ifconfig bond0 down
echo 100 >/sys/class/net/bond0/bonding/miimon
echo 1 >/sys/class/net/bond0/bonding/mode
ifconfig bond0 up
ifenslave bond0 eth1 eth2
ip link set eth1 up
ip link set eth2 up

After some debugging to avoid crashes, I get :


[   31.784308] bonding: bond0: Setting MII monitoring interval to 100.
[   31.784391] bonding: bond0: setting mode to active-backup (1).
[   31.784900] 8021q: adding VLAN 0 to HW filter on device bond0
[   31.784903] ADDRCONF(NETDEV_UP): bond0: link is not ready
[   31.904440] ------------[ cut here ]------------
[   31.904500] WARNING: at drivers/net/bonding/bond_ipv6.c:185 bond_inet6addr_event+0x179/0x240 [bonding]()
[   31.904576] Hardware name: ProLiant BL460c G1
[   31.904629] Modules linked in: ipmi_si ipmi_msghandler hpilo bonding ipv6
[   31.904873] Pid: 4586, comm: ifenslave Tainted: G        W   2.6.35-rc1-01453-g3e12451-dirty #836
[   31.904948] Call Trace:
[   31.905002]  [<c13421c4>] ? printk+0x18/0x1c
[   31.905057]  [<c103c8fd>] warn_slowpath_common+0x6d/0xa0
[   31.905114]  [<f8cf5fd9>] ? bond_inet6addr_event+0x179/0x240 [bonding]
[   31.905172]  [<f8cf5fd9>] ? bond_inet6addr_event+0x179/0x240 [bonding]
[   31.905236]  [<c103c94d>] warn_slowpath_null+0x1d/0x20
[   31.905296]  [<f8cf5fd9>] bond_inet6addr_event+0x179/0x240 [bonding]
[   31.905354]  [<c105b061>] notifier_call_chain+0x41/0x60
[   31.905409]  [<c105b0cd>] atomic_notifier_call_chain+0x1d/0x20
[   31.905471]  [<f8b88b31>] addrconf_ifdown+0x211/0x320 [ipv6]
[   31.905529]  [<f8b897ae>] addrconf_notify+0x6e/0x870 [ipv6]
[   31.905586]  [<c1344912>] ? _raw_write_unlock_bh+0x12/0x20
[   31.905642]  [<c1344912>] ? _raw_write_unlock_bh+0x12/0x20
[   31.905701]  [<f8b8f1f0>] ? fib6_clean_all+0x70/0x80 [ipv6]
[   31.905770]  [<f8b8dda0>] ? fib6_age+0x0/0x90 [ipv6]
[   31.905830]  [<c104a106>] ? lock_timer_base+0x26/0x50
[   31.905884]  [<c104a279>] ? del_timer+0x69/0xb0
[   31.905938]  [<c134493d>] ? _raw_spin_unlock_bh+0xd/0x10
[   31.905997]  [<f8b8f267>] ? fib6_run_gc+0x67/0xe0 [ipv6]
[   31.906052]  [<c105b061>] notifier_call_chain+0x41/0x60
[   31.906107]  [<c105b19a>] raw_notifier_call_chain+0x1a/0x20
[   31.906165]  [<c129fe37>] call_netdevice_notifiers+0x27/0x60
[   31.906221]  [<c12ac0cd>] ? rtmsg_ifinfo+0xbd/0xf0
[   31.906276]  [<c12a183c>] __dev_notify_flags+0x5c/0x80
[   31.906333]  [<c12a1897>] dev_change_flags+0x37/0x60
[   31.906390]  [<c12f6291>] devinet_ioctl+0x591/0x6f0
[   31.906445]  [<c11726be>] ? copy_to_user+0x2e/0x40
[   31.906500]  [<c12f7212>] inet_ioctl+0xa2/0xd0
[   31.906555]  [<c128f65e>] sock_ioctl+0x4e/0x240
[   31.906610]  [<c10d3a44>] vfs_ioctl+0x34/0xa0
[   31.906664]  [<c10c7cab>] ? alloc_file+0x1b/0xa0
[   31.906718]  [<c128f610>] ? sock_ioctl+0x0/0x240
[   31.906771]  [<c10d4186>] do_vfs_ioctl+0x66/0x550
[   31.906827]  [<c1022ca0>] ? do_page_fault+0x0/0x350
[   31.906881]  [<c1022e41>] ? do_page_fault+0x1a1/0x350
[   31.906936]  [<c129098c>] ? sys_socket+0x5c/0x70
[   31.906990]  [<c1291860>] ? sys_socketcall+0x60/0x270
[   31.907045]  [<c10d46a9>] sys_ioctl+0x39/0x60
[   31.907099]  [<c1002bd0>] sysenter_do_call+0x12/0x26
[   31.907153] ---[ end trace 5c4638450a77a22f ]---
[   32.046479] BUG: scheduling while atomic: ifenslave/4586/0x00000100
[   32.046540] Modules linked in: ipmi_si ipmi_msghandler hpilo bonding ipv6
[   32.046784] Pid: 4586, comm: ifenslave Tainted: G        W   2.6.35-rc1-01453-g3e12451-dirty #836
[   32.046860] Call Trace:
[   32.046910]  [<c13421c4>] ? printk+0x18/0x1c
[   32.046965]  [<c10315c9>] __schedule_bug+0x59/0x60
[   32.047019]  [<c1342a2c>] schedule+0x57c/0x850
[   32.047074]  [<c104a106>] ? lock_timer_base+0x26/0x50
[   32.047128]  [<c1342f78>] schedule_timeout+0x118/0x250
[   32.047183]  [<c104a2c0>] ? process_timeout+0x0/0x10
[   32.047238]  [<c13430c5>] schedule_timeout_uninterruptible+0x15/0x20
[   32.047295]  [<c104a345>] msleep+0x15/0x20
[   32.047350]  [<c1227082>] bnx2_napi_disable+0x52/0x80
[   32.047405]  [<c122b56f>] bnx2_netif_stop+0x3f/0xa0
[   32.047460]  [<c122b62a>] bnx2_vlan_rx_register+0x5a/0x80
[   32.047516]  [<f8ced776>] bond_enslave+0x526/0xa90 [bonding]
[   32.047576]  [<f8b8f0d0>] ? fib6_clean_node+0x0/0xb0 [ipv6]
[   32.047634]  [<f8b8dda0>] ? fib6_age+0x0/0x90 [ipv6]
[   32.047689]  [<c129d2d3>] ? netdev_set_master+0x3/0xc0
[   32.047746]  [<f8cee4cb>] bond_do_ioctl+0x31b/0x430 [bonding]
[   32.047804]  [<c105b19a>] ? raw_notifier_call_chain+0x1a/0x20
[   32.047861]  [<c12abd5d>] ? __rtnl_unlock+0xd/0x10
[   32.047915]  [<c129f8cd>] ? __dev_get_by_name+0x7d/0xa0
[   32.047970]  [<c12a19b0>] dev_ifsioc+0xf0/0x290
[   32.048025]  [<f8cee1b0>] ? bond_do_ioctl+0x0/0x430 [bonding]
[   32.048081]  [<c12a1ce1>] dev_ioctl+0x191/0x610
[   32.048136]  [<c12eeb20>] ? udp_ioctl+0x0/0x70
[   32.048189]  [<c128f67c>] sock_ioctl+0x6c/0x240
[   32.048243]  [<c10d3a44>] vfs_ioctl+0x34/0xa0
[   32.048297]  [<c10c7cab>] ? alloc_file+0x1b/0xa0
[   32.048351]  [<c128f610>] ? sock_ioctl+0x0/0x240
[   32.048404]  [<c10d4186>] do_vfs_ioctl+0x66/0x550
[   32.048459]  [<c1022ca0>] ? do_page_fault+0x0/0x350
[   32.048513]  [<c1022e41>] ? do_page_fault+0x1a1/0x350
[   32.048568]  [<c129098c>] ? sys_socket+0x5c/0x70
[   32.048622]  [<c1291860>] ? sys_socketcall+0x60/0x270
[   32.048677]  [<c10d46a9>] sys_ioctl+0x39/0x60
[   32.048730]  [<c1002bd0>] sysenter_do_call+0x12/0x26
[   32.052025] bonding: bond0: enslaving eth1 as a backup interface with a down link.
[   32.100207] tg3 0000:14:04.0: PME# enabled
[   32.100222]  pci0000:00: wake-up capability enabled by ACPI
[   32.224488]  pci0000:00: wake-up capability disabled by ACPI
[   32.224492] tg3 0000:14:04.0: PME# disabled
[   32.348516] tg3 0000:14:04.0: BAR 0: set to [mem 0xfdff0000-0xfdffffff 64bit] (PCI address [0xfdff0000-0xfdffffff]
[   32.348524] tg3 0000:14:04.0: BAR 2: set to [mem 0xfdfe0000-0xfdfeffff 64bit] (PCI address [0xfdfe0000-0xfdfeffff]
[   32.363711] bonding: bond0: enslaving eth2 as a backup interface with a down link.



For bnx2, it seems commit 212f9934afccf9c9739921
was not sufficient to correct the "scheduling while atomic" bug...
enslaving a bnx2 on a bond device with one vlan already set :
 bond_enslave -> bnx2_vlan_rx_register -> bnx2_netif_stop -> bnx2_napi_disable -> msleep()

For the first oops, following patch cures it, but I am not pleased
with it. This zero-vid registration seems wrong at the beginning.

Thanks

[RFC net-next-2.6] bonding: fix bond_inet6addr_event() 

After commit ad1afb0039391 (vlan_dev: VLAN 0 should be treated
as "no vlan tag" (802.1p packet)),
bond_inet6addr_event() might be called with a NULL bond->vlgrp pointer, and
a non empty bond->vlan_list. vlan_group_get_device() is dereferencing a NULL pointer.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---

diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c
index 969ffed..121b073 100644
--- a/drivers/net/bonding/bond_ipv6.c
+++ b/drivers/net/bonding/bond_ipv6.c
@@ -178,6 +178,8 @@ static int bond_inet6addr_event(struct notifier_block *this,
 		}
 
 		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+			if (!bond->vlgrp)
+				continue;
 			vlan_dev = vlan_group_get_device(bond->vlgrp,
 							 vlan->vlan_id);
 			if (vlan_dev == event_dev) {




^ permalink raw reply related

* [PATCH net-next-2.6] net: 64bit stats for netdev_queue
From: Eric Dumazet @ 2010-07-19 13:33 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Since struct netdev_queue tx_bytes/tx_packets/tx_dropped are already
protected by _xmit_lock, its easy to convert these fields to u64 instead
of unsigned long.
This completes 64bit stats for devices using them (vlan, macvlan, ...)

Strictly, we could avoid the locking in dev_txq_stats_fold() on 64bit
arches, but its slow path and we prefer keep it simple.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/linux/netdevice.h |    6 +++---
 net/core/dev.c            |    4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fdc3f29..b626289 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -501,9 +501,9 @@ struct netdev_queue {
 	 * please use this field instead of dev->trans_start
 	 */
 	unsigned long		trans_start;
-	unsigned long		tx_bytes;
-	unsigned long		tx_packets;
-	unsigned long		tx_dropped;
+	u64			tx_bytes;
+	u64			tx_packets;
+	u64			tx_dropped;
 } ____cacheline_aligned_in_smp;
 
 #ifdef CONFIG_RPS
diff --git a/net/core/dev.c b/net/core/dev.c
index 1c002c7..9de75cd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5282,15 +5282,17 @@ void netdev_run_todo(void)
 void dev_txq_stats_fold(const struct net_device *dev,
 			struct rtnl_link_stats64 *stats)
 {
-	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+	u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
 	unsigned int i;
 	struct netdev_queue *txq;
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		txq = netdev_get_tx_queue(dev, i);
+		spin_lock_bh(&txq->_xmit_lock);
 		tx_bytes   += txq->tx_bytes;
 		tx_packets += txq->tx_packets;
 		tx_dropped += txq->tx_dropped;
+		spin_unlock_bh(&txq->_xmit_lock);
 	}
 	if (tx_bytes || tx_packets || tx_dropped) {
 		stats->tx_bytes   = tx_bytes;



^ permalink raw reply related

* Re: [PATCHv2] tcp: fix crash in tcp_xmit_retransmit_queue
From: Eric Dumazet @ 2010-07-19 14:09 UTC (permalink / raw)
  To: Ilpo Järvinen
  Cc: Lennart Schulte, David Miller, Tejun Heo, lkml,
	netdev@vger.kernel.org, Fehrmann, Henning, Carsten Aulbert
In-Reply-To: <alpine.DEB.2.00.1007191319010.13002@wel-95.cs.helsinki.fi>

Le lundi 19 juillet 2010 à 14:16 +0300, Ilpo Järvinen a écrit :

> Thanks for testing.
> 
> DaveM, I think this oops was introduced for 2.6.28 (in 
> 08ebd1721ab8fd362e90ae17b461c07b23fa2824 it seems, to be exact) so to 
> stables it should go too please. I've only tweaked the message (so no need 
> for Lennart to retest v2 :-)).
> 
> --
> [PATCHv2] tcp: fix crash in tcp_xmit_retransmit_queue
> 
> It can happen that there are no packets in queue while calling
> tcp_xmit_retransmit_queue(). tcp_write_queue_head() then returns
> NULL and that gets deref'ed to get sacked into a local var.
> 
> There is no work to do if no packets are outstanding so we just
> exit early.
> 
> This oops was introduced by 08ebd1721ab8fd (tcp: remove tp->lost_out
> guard to make joining diff nicer).
> 

But prior to commit 08ebd1721ab8fd3, we were not testing
tp->packets_out, but tp->lost_out

if it was 0, we were not doing the tcp_for_write_queue_from() loop.

Not sure it makes a difference ?

> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
> Reported-by: Lennart Schulte <lennart.schulte@nets.rwth-aachen.de>
> Tested-by: Lennart Schulte <lennart.schulte@nets.rwth-aachen.de>
> ---
>  net/ipv4/tcp_output.c |    3 +++
>  1 files changed, 3 insertions(+), 0 deletions(-)
> 
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index b4ed957..7ed9dc1 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -2208,6 +2208,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
>  	int mib_idx;
>  	int fwd_rexmitting = 0;
>  
> +	if (!tp->packets_out)
> +		return;
> +
>  	if (!tp->lost_out)
>  		tp->retransmit_high = tp->snd_una;
>  

^ permalink raw reply

* [RFC 1/2] netfilter: xt_condition: export list management code
From: Luciano Coelho @ 2010-07-19 14:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, kaber, jengelh, sameo
In-Reply-To: <1279548947-10470-1-git-send-email-luciano.coelho@nokia.com>

From: Luciano Coelho <coelho@testbed>

This patch isolates and exports the condition list management code, in
preparation for the CONDITION target to use it.  No functional change,
just reorganization of the code.

Signed-off-by: Luciano Coelho <luciano.coelho@nokia.com>
---
 include/linux/netfilter/xt_condition.h |   17 ++++++-
 net/netfilter/xt_condition.c           |   82 ++++++++++++++++++-------------
 2 files changed, 64 insertions(+), 35 deletions(-)

diff --git a/include/linux/netfilter/xt_condition.h b/include/linux/netfilter/xt_condition.h
index 4faf3ca..eebf41a 100644
--- a/include/linux/netfilter/xt_condition.h
+++ b/include/linux/netfilter/xt_condition.h
@@ -3,12 +3,27 @@
 
 #include <linux/types.h>
 
+#define XT_CONDITION_MAX_NAME_SIZE 30
+
 struct xt_condition_mtinfo {
-	char name[31];
+	char name[XT_CONDITION_MAX_NAME_SIZE + 1];
 	__u8 invert;
 
 	/* Used internally by the kernel */
 	void *condvar __attribute__((aligned(8)));
 };
 
+#ifdef __KERNEL__
+struct condition_variable {
+	struct list_head list;
+	struct proc_dir_entry *status_proc;
+	unsigned int refcount;
+	bool enabled;
+};
+
+struct condition_variable *xt_condition_insert(const char *name);
+void xt_condition_put(struct condition_variable *var);
+void xt_condition_set(struct condition_variable *var, bool enabled);
+#endif /* __KERNEL__ */
+
 #endif /* _XT_CONDITION_H */
diff --git a/net/netfilter/xt_condition.c b/net/netfilter/xt_condition.c
index a7ccea3..dec97fe 100644
--- a/net/netfilter/xt_condition.c
+++ b/net/netfilter/xt_condition.c
@@ -43,13 +43,6 @@ MODULE_PARM_DESC(condition_gid_perms, "default group owner of /proc/net/nf_condi
 MODULE_ALIAS("ipt_condition");
 MODULE_ALIAS("ip6t_condition");
 
-struct condition_variable {
-	struct list_head list;
-	struct proc_dir_entry *status_proc;
-	unsigned int refcount;
-	bool enabled;
-};
-
 /* proc_lock is a user context only semaphore used for write access */
 /*           to the conditions' list.                               */
 static DEFINE_MUTEX(proc_lock);
@@ -100,47 +93,34 @@ condition_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	return var->enabled ^ info->invert;
 }
 
-static int condition_mt_check(const struct xt_mtchk_param *par)
+struct condition_variable *xt_condition_insert(const char *name)
 {
-	struct xt_condition_mtinfo *info = par->matchinfo;
 	struct condition_variable *var;
 
-	/* Forbid certain names */
-	if (*info->name == '\0' || *info->name == '.' ||
-	    info->name[sizeof(info->name)-1] != '\0' ||
-	    memchr(info->name, '/', sizeof(info->name)) != NULL) {
-		pr_info("name not allowed or too long: \"%.*s\"\n",
-			(unsigned int)sizeof(info->name), info->name);
-		return -EINVAL;
-	}
 	/*
 	 * Let's acquire the lock, check for the condition and add it
 	 * or increase the reference counter.
 	 */
 	mutex_lock(&proc_lock);
 	list_for_each_entry(var, &conditions_list, list) {
-		if (strcmp(info->name, var->status_proc->name) == 0) {
+		if (strcmp(name, var->status_proc->name) == 0) {
 			++var->refcount;
-			mutex_unlock(&proc_lock);
-			info->condvar = var;
-			return 0;
+			goto out;
 		}
 	}
 
 	/* At this point, we need to allocate a new condition variable. */
 	var = kmalloc(sizeof(struct condition_variable), GFP_KERNEL);
-	if (var == NULL) {
-		mutex_unlock(&proc_lock);
-		return -ENOMEM;
-	}
+	if (var == NULL)
+		goto out;
 
 	/* Create the condition variable's proc file entry. */
-	var->status_proc = create_proc_entry(info->name, condition_list_perms,
+	var->status_proc = create_proc_entry(name, condition_list_perms,
 			   proc_net_condition);
 	if (var->status_proc == NULL) {
 		kfree(var);
-		mutex_unlock(&proc_lock);
-		return -ENOMEM;
+		var = NULL;
+		goto out;
 	}
 
 	var->refcount = 1;
@@ -151,16 +131,14 @@ static int condition_mt_check(const struct xt_mtchk_param *par)
 	var->status_proc->uid        = condition_uid_perms;
 	var->status_proc->gid        = condition_gid_perms;
 	list_add(&var->list, &conditions_list);
+out:
 	mutex_unlock(&proc_lock);
-	info->condvar = var;
-	return 0;
+	return var;
 }
+EXPORT_SYMBOL_GPL(xt_condition_insert);
 
-static void condition_mt_destroy(const struct xt_mtdtor_param *par)
+void xt_condition_put(struct condition_variable *var)
 {
-	const struct xt_condition_mtinfo *info = par->matchinfo;
-	struct condition_variable *var = info->condvar;
-
 	mutex_lock(&proc_lock);
 	if (--var->refcount == 0) {
 		list_del(&var->list);
@@ -171,6 +149,42 @@ static void condition_mt_destroy(const struct xt_mtdtor_param *par)
 	}
 	mutex_unlock(&proc_lock);
 }
+EXPORT_SYMBOL_GPL(xt_condition_put);
+
+void xt_condition_set(struct condition_variable *var, bool enabled)
+{
+	var->enabled = enabled;
+}
+EXPORT_SYMBOL_GPL(xt_condition_set);
+
+static int condition_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_condition_mtinfo *info = par->matchinfo;
+	struct condition_variable *var;
+
+	/* Forbid certain names */
+	if (*info->name == '\0' || *info->name == '.' ||
+	    info->name[sizeof(info->name)-1] != '\0' ||
+	    memchr(info->name, '/', sizeof(info->name)) != NULL) {
+		pr_info("name not allowed or too long: \"%.*s\"\n",
+			(unsigned int)sizeof(info->name), info->name);
+		return -EINVAL;
+	}
+
+	var = xt_condition_insert(info->name);
+	if (var == NULL)
+		return -ENOMEM;
+
+	info->condvar = var;
+	return 0;
+}
+
+static void condition_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_condition_mtinfo *info = par->matchinfo;
+
+	xt_condition_put(info->condvar);
+}
 
 static struct xt_match condition_mt_reg __read_mostly = {
 	.name       = "condition",
-- 
1.7.0.4


^ permalink raw reply related

* [RFC 2/2] netfilter: xtables: implement CONDITION target
From: Luciano Coelho @ 2010-07-19 14:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, kaber, jengelh, sameo
In-Reply-To: <1279548947-10470-1-git-send-email-luciano.coelho@nokia.com>

From: Luciano Coelho <coelho@testbed>

This patch implements a condition target, which let's the user set
netfilter rules that enable/disable the conditions used by the
condition match.  Originally, the condition match only allowed the
variable to be changed via procfs.  This new target makes it easy to
enable or disable the condition depending on the rules set.

Signed-off-by: Luciano Coelho <luciano.coelho@nokia.com>
---
 include/linux/netfilter/Kbuild         |    1 +
 include/linux/netfilter/xt_CONDITION.h |   39 +++++++++++
 net/netfilter/Kconfig                  |   12 ++++
 net/netfilter/Makefile                 |    1 +
 net/netfilter/xt_CONDITION.c           |  112 ++++++++++++++++++++++++++++++++
 5 files changed, 165 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netfilter/xt_CONDITION.h
 create mode 100644 net/netfilter/xt_CONDITION.c

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index c57e099..72eff3a 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -4,6 +4,7 @@ header-y += nfnetlink_conntrack.h
 header-y += nfnetlink_log.h
 header-y += nfnetlink_queue.h
 header-y += xt_CLASSIFY.h
+header-y += xt_CONDITION.h
 header-y += xt_CONNMARK.h
 header-y += xt_CONNSECMARK.h
 header-y += xt_CT.h
diff --git a/include/linux/netfilter/xt_CONDITION.h b/include/linux/netfilter/xt_CONDITION.h
new file mode 100644
index 0000000..cbffe3f
--- /dev/null
+++ b/include/linux/netfilter/xt_CONDITION.h
@@ -0,0 +1,39 @@
+/*
+ * linux/include/linux/netfilter/xt_CONDITION.h
+ *
+ * Header file for Xtables timer target module.
+ *
+ * Copyright (C) 2010 Nokia Corporation
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef _XT_CONDITION_TG_H
+#define _XT_CONDITION_TG_H
+
+#include <linux/types.h>
+#include <linux/netfilter/xt_condition.h>
+
+struct condition_tg_info {
+	char name[XT_CONDITION_MAX_NAME_SIZE + 1];
+	__u8 enabled;
+
+	/* Used internally by the kernel */
+	void *condvar __attribute__((aligned(8)));
+};
+
+#endif
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e54e216..1877c6a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -310,6 +310,18 @@ config NETFILTER_XT_MARK
 	"Use netfilter MARK value as routing key") and can also be used by
 	other subsystems to change their behavior.
 
+config NETFILTER_XT_TARGET_CONDITION
+	tristate  "'CONDITION' target support"
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_MATCH_CONDITION
+	help
+
+	  Allows changing the condition match value in procfs from the
+	  netfilter tables, without requiring userspace to change the
+	  condition value.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_CONNMARK
 	tristate 'ctmark target and match support'
 	depends on NF_CONNTRACK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 474dd06..9237a67 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
 
 # targets
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_CONDITION) += xt_CONDITION.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
diff --git a/net/netfilter/xt_CONDITION.c b/net/netfilter/xt_CONDITION.c
new file mode 100644
index 0000000..8150352
--- /dev/null
+++ b/net/netfilter/xt_CONDITION.c
@@ -0,0 +1,112 @@
+/*
+ * linux/net/netfilter/xt_CONDITION.c
+ *
+ * Netfilter module to trigger a timer when packet matches.
+ * After timer expires a kevent will be sent.
+ *
+ * Copyright (C) 2010 Nokia Corporation
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_CONDITION.h>
+
+static unsigned int condition_tg_target(struct sk_buff *skb,
+					 const struct xt_action_param *par)
+{
+	const struct condition_tg_info *info = par->targinfo;
+
+	pr_debug("setting condition %s, enabled %d\n",
+		 info->name, info->enabled);
+
+	xt_condition_set(info->condvar, info->enabled);
+
+	return XT_CONTINUE;
+}
+
+static int condition_tg_checkentry(const struct xt_tgchk_param *par)
+{
+	struct condition_tg_info *info = par->targinfo;
+	struct condition_variable *var;
+
+	pr_debug("checkentry %s\n", info->name);
+
+	/* Forbid certain names */
+	if (*info->name == '\0' || *info->name == '.' ||
+	    info->name[sizeof(info->name)-1] != '\0' ||
+	    memchr(info->name, '/', sizeof(info->name)) != NULL) {
+		pr_info("name not allowed or too long: \"%.*s\"\n",
+			(unsigned int)sizeof(info->name), info->name);
+		return -EINVAL;
+	}
+
+	var = xt_condition_insert(info->name);
+	if (var == NULL)
+		return -ENOMEM;
+
+	info->condvar = var;
+	return 0;
+}
+
+static void condition_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct condition_tg_info *info = par->targinfo;
+
+	pr_debug("destroy %s\n", info->name);
+
+	xt_condition_put(info->condvar);
+}
+
+static struct xt_target condition_tg __read_mostly = {
+	.name		= "CONDITION",
+	.family		= NFPROTO_UNSPEC,
+	.target		= condition_tg_target,
+	.targetsize     = sizeof(struct condition_tg_info),
+	.checkentry	= condition_tg_checkentry,
+	.destroy        = condition_tg_destroy,
+	.me		= THIS_MODULE,
+};
+
+static int __init condition_tg_init(void)
+{
+	int err;
+
+	err =  xt_register_target(&condition_tg);
+	if (err < 0) {
+		pr_debug("couldn't register xt target\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static void __exit condition_tg_exit(void)
+{
+	xt_unregister_target(&condition_tg);
+}
+
+module_init(condition_tg_init);
+module_exit(condition_tg_exit);
+
+MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
+MODULE_DESCRIPTION("Xtables: condition target");
+MODULE_LICENSE("GPL v2");
-- 
1.7.0.4


^ permalink raw reply related

* [RFC 0/2] netfilter: xtables: CONDITION target implementation
From: Luciano Coelho @ 2010-07-19 14:15 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev, kaber, jengelh, sameo

From: Luciano Coelho <coelho@testbed.(none)>

Hi all,

As discussed earlier, I've been looking for a way to enable and disable the
condition match automatically, in the netfilter tables themselves (ie. without
the need to use procfs).

This is my initial implementation.  Please let me know how it looks.  The first
patch is based on the xt_condition patch that Jan sent to the list (but which
has not been finalized for inclusion yet).  Once the condition match gets
applied, I'll forward port my patch and submit it again.

Cheers,
Luca.

Luciano Coelho (2):
  netfilter: xt_condition: export list management code
  netfilter: xtables: implement CONDITION target

 include/linux/netfilter/Kbuild         |    1 +
 include/linux/netfilter/xt_CONDITION.h |   39 +++++++++++
 include/linux/netfilter/xt_condition.h |   17 +++++-
 net/netfilter/Kconfig                  |   12 ++++
 net/netfilter/Makefile                 |    1 +
 net/netfilter/xt_CONDITION.c           |  112 ++++++++++++++++++++++++++++++++
 net/netfilter/xt_condition.c           |   82 ++++++++++++++----------
 7 files changed, 229 insertions(+), 35 deletions(-)
 create mode 100644 include/linux/netfilter/xt_CONDITION.h
 create mode 100644 net/netfilter/xt_CONDITION.c


^ permalink raw reply

* Are concurrent calls to tc action ipt safe?
From: Gerd v. Egidy @ 2010-07-19 14:23 UTC (permalink / raw)
  To: netfilter-devel; +Cc: netdev

Hi,

AFAIK, current iptables has a short race condition when two rules within the 
same table are changed at once.

E.g. when two users simultaneously call something like this
iptables -t filter -A INPUT -s 192.168.1.1 -j MARK --set-mark 1
and
iptables -t filter -A INPUT -s 192.168.1.2 -j MARK --set-mark 2
one of these entries can get lost.

Jan Engelhard recently posted his xt2 patchset to overcome problems like this, 
but it seems to still have performance issues.

I have a set of simple rules which need to change often and are subject to 
this problem. I now wonder if I can solve this by using tc and the ipt action:

tc filter add dev eth0 parent ffff: protocol ip prio 1 u32  \
match ip src 192.168.1.1 \
action ipt -j MARK --set-mark 1

Since this call uses the xtables targets I'm currently not sure if the same 
problem regarding concurrent changes exists or not. Can anyone tell me if 
concurrent calls like this are safe?

Thank you very much.

Kind regards,

Gerd

-- 
Address (better: trap) for people I really don't want to get mail from:
jonas@cactusamerica.com

^ permalink raw reply

* Re: [RFC 0/2] netfilter: xtables: CONDITION target implementation
From: Changli Gao @ 2010-07-19 14:27 UTC (permalink / raw)
  To: Luciano Coelho; +Cc: netfilter-devel, netdev, kaber, jengelh, sameo
In-Reply-To: <1279548947-10470-1-git-send-email-luciano.coelho@nokia.com>

On Mon, Jul 19, 2010 at 10:15 PM, Luciano Coelho
<luciano.coelho@nokia.com> wrote:
> From: Luciano Coelho <coelho@testbed.(none)>
>
> Hi all,
>
> As discussed earlier, I've been looking for a way to enable and disable the
> condition match automatically, in the netfilter tables themselves (ie. without
> the need to use procfs).
>
> This is my initial implementation.  Please let me know how it looks.  The first
> patch is based on the xt_condition patch that Jan sent to the list (but which
> has not been finalized for inclusion yet).  Once the condition match gets
> applied, I'll forward port my patch and submit it again.
>
> Cheers,
> Luca.
>
> Luciano Coelho (2):
>  netfilter: xt_condition: export list management code
>  netfilter: xtables: implement CONDITION target
>
>  include/linux/netfilter/Kbuild         |    1 +
>  include/linux/netfilter/xt_CONDITION.h |   39 +++++++++++
>  include/linux/netfilter/xt_condition.h |   17 +++++-
>  net/netfilter/Kconfig                  |   12 ++++
>  net/netfilter/Makefile                 |    1 +
>  net/netfilter/xt_CONDITION.c           |  112 ++++++++++++++++++++++++++++++++
>  net/netfilter/xt_condition.c           |   82 ++++++++++++++----------

Why not combine xt_CONDITION.c and xt_condition.c into xt_condition.c,
like xt_mark.c?


-- 
Regards,
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC 0/2] netfilter: xtables: CONDITION target implementation
From: Luciano Coelho @ 2010-07-19 14:31 UTC (permalink / raw)
  To: ext Changli Gao
  Cc: netfilter-devel@vger.kernel.org, netdev@vger.kernel.org,
	kaber@trash.net, jengelh@medozas.de, sameo@linux.intel.com
In-Reply-To: <AANLkTilRtCiuAwox2sTsEu73fE89OLEUtIbVQ_njtkqz@mail.gmail.com>

On Mon, 2010-07-19 at 16:27 +0200, ext Changli Gao wrote:
> On Mon, Jul 19, 2010 at 10:15 PM, Luciano Coelho
> <luciano.coelho@nokia.com> wrote:
> > From: Luciano Coelho <coelho@testbed.(none)>
> >
> > Hi all,
> >
> > As discussed earlier, I've been looking for a way to enable and disable the
> > condition match automatically, in the netfilter tables themselves (ie. without
> > the need to use procfs).
> >
> > This is my initial implementation.  Please let me know how it looks.  The first
> > patch is based on the xt_condition patch that Jan sent to the list (but which
> > has not been finalized for inclusion yet).  Once the condition match gets
> > applied, I'll forward port my patch and submit it again.
> >
> > Cheers,
> > Luca.
> >
> > Luciano Coelho (2):
> >  netfilter: xt_condition: export list management code
> >  netfilter: xtables: implement CONDITION target
> >
> >  include/linux/netfilter/Kbuild         |    1 +
> >  include/linux/netfilter/xt_CONDITION.h |   39 +++++++++++
> >  include/linux/netfilter/xt_condition.h |   17 +++++-
> >  net/netfilter/Kconfig                  |   12 ++++
> >  net/netfilter/Makefile                 |    1 +
> >  net/netfilter/xt_CONDITION.c           |  112 ++++++++++++++++++++++++++++++++
> >  net/netfilter/xt_condition.c           |   82 ++++++++++++++----------
> 
> Why not combine xt_CONDITION.c and xt_condition.c into xt_condition.c,
> like xt_mark.c?

I just thought that someone may want to use the condition match without
using the CONDITION target, that's why I've put it in a different
module.

But I don't have a strong opinion about this.  If everybody agrees on
that, I can merge the code into a single module.

Thanks for your comment.

-- 
Cheers,
Luca.


^ permalink raw reply

* Re: [PATCH 07/11] Removing dead ARCH_PNX010X
From: Christoph Egger @ 2010-07-19 14:37 UTC (permalink / raw)
  To: David Miller
  Cc: joe, shemminger, dongdong.deng, jkosina, netdev, linux-kernel,
	vamos-dev
In-Reply-To: <20100714.133916.71109591.davem@davemloft.net>

On Wed, Jul 14, 2010 at 01:39:16PM -0700, David Miller wrote:
> From: Christoph Egger <siccegge@cs.fau.de>
> Date: Wed, 14 Jul 2010 14:41:09 +0200
> 
> > ARCH_PNX010X doesn't exist in Kconfig, therefore removing all
> > references for it from the source code.
> > 
> > Signed-off-by: Christoph Egger <siccegge@cs.fau.de>
> 
> If you are going to kill this off, kill the references in
> driver/net/Kconfig at the same time.
> 
> Please fix this up and resubmit your patch, thanks.

DOne, patch below

Thanks

    CHristoph

---
>From ed6ffbfd77e14f17fa7d75ddf70b0d3b0126848c Mon Sep 17 00:00:00 2001
From: Christoph Egger <siccegge@cs.fau.de>
Date: Wed, 14 Jul 2010 14:19:15 +0200
Subject: [PATCH] Removing dead ARCH_PNX010X

ARCH_PNX010X doesn't exist in Kconfig, therefore removing all
references for it from the source code/Kconfig.

Signed-off-by: Christoph Egger <siccegge@cs.fau.de>
---
 drivers/net/Kconfig  |    4 ++--
 drivers/net/cs89x0.c |   45 ---------------------------------------------
 2 files changed, 2 insertions(+), 47 deletions(-)

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index ce2fcdd..ba5b862 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -1463,7 +1463,7 @@ config FORCEDETH
 config CS89x0
 	tristate "CS89x0 support"
 	depends on NET_ETHERNET && (ISA || EISA || MACH_IXDP2351 \
-		|| ARCH_IXDP2X01 || ARCH_PNX010X || MACH_MX31ADS)
+		|| ARCH_IXDP2X01 || MACH_MX31ADS)
 	---help---
 	  Support for CS89x0 chipset based Ethernet cards. If you have a
 	  network (Ethernet) card of this type, say Y and read the
@@ -1477,7 +1477,7 @@ config CS89x0
 config CS89x0_NONISA_IRQ
 	def_bool y
 	depends on CS89x0 != n
-	depends on MACH_IXDP2351 || ARCH_IXDP2X01 || ARCH_PNX010X || MACH_MX31ADS
+	depends on MACH_IXDP2351 || ARCH_IXDP2X01 || MACH_MX31ADS
 
 config TC35815
 	tristate "TOSHIBA TC35815 Ethernet support"
diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c
index 2ccb9f1..7a5d787 100644
--- a/drivers/net/cs89x0.c
+++ b/drivers/net/cs89x0.c
@@ -180,12 +180,6 @@ static unsigned int cs8900_irq_map[] = {IRQ_IXDP2351_CS8900, 0, 0, 0};
 #elif defined(CONFIG_ARCH_IXDP2X01)
 static unsigned int netcard_portlist[] __used __initdata = {IXDP2X01_CS8900_VIRT_BASE, 0};
 static unsigned int cs8900_irq_map[] = {IRQ_IXDP2X01_CS8900, 0, 0, 0};
-#elif defined(CONFIG_ARCH_PNX010X)
-#include <mach/gpio.h>
-#define CIRRUS_DEFAULT_BASE	IO_ADDRESS(EXT_STATIC2_s0_BASE + 0x200000)	/* = Physical address 0x48200000 */
-#define CIRRUS_DEFAULT_IRQ	VH_INTC_INT_NUM_CASCADED_INTERRUPT_1 /* Event inputs bank 1 - ID 35/bit 3 */
-static unsigned int netcard_portlist[] __used __initdata = {CIRRUS_DEFAULT_BASE, 0};
-static unsigned int cs8900_irq_map[] = {CIRRUS_DEFAULT_IRQ, 0, 0, 0};
 #elif defined(CONFIG_MACH_MX31ADS)
 #include <mach/board-mx31ads.h>
 static unsigned int netcard_portlist[] __used __initdata = {
@@ -372,18 +366,6 @@ writeword(unsigned long base_addr, int portno, u16 value)
 {
 	__raw_writel(value, base_addr + (portno << 1));
 }
-#elif defined(CONFIG_ARCH_PNX010X)
-static u16
-readword(unsigned long base_addr, int portno)
-{
-	return inw(base_addr + (portno << 1));
-}
-
-static void
-writeword(unsigned long base_addr, int portno, u16 value)
-{
-	outw(value, base_addr + (portno << 1));
-}
 #else
 static u16
 readword(unsigned long base_addr, int portno)
@@ -546,30 +528,6 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
 #endif
         }
 
-#ifdef CONFIG_ARCH_PNX010X
-	initialize_ebi();
-
-	/* Map GPIO registers for the pins connected to the CS8900a. */
-	if (map_cirrus_gpio() < 0)
-		return -ENODEV;
-
-	reset_cirrus();
-
-	/* Map event-router registers. */
-	if (map_event_router() < 0)
-		return -ENODEV;
-
-	enable_cirrus_irq();
-
-	unmap_cirrus_gpio();
-	unmap_event_router();
-
-	dev->base_addr = ioaddr;
-
-	for (i = 0 ; i < 3 ; i++)
-		readreg(dev, 0);
-#endif
-
 	/* Grab the region so we can find another board if autoIRQ fails. */
 	/* WTF is going on here? */
 	if (!request_region(ioaddr & ~3, NETCARD_IO_EXTENT, DRV_NAME)) {
@@ -1391,9 +1349,6 @@ net_open(struct net_device *dev)
 	case A_CNF_MEDIA_10B_2: result = lp->adapter_cnf & A_CNF_10B_2; break;
         default: result = lp->adapter_cnf & (A_CNF_10B_T | A_CNF_AUI | A_CNF_10B_2);
         }
-#ifdef CONFIG_ARCH_PNX010X
-	result = A_CNF_10B_T;
-#endif
         if (!result) {
                 printk(KERN_ERR "%s: EEPROM is configured for unavailable media\n", dev->name);
 release_dma:
-- 
1.7.0.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox