Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next-2.6 v4 2/5] sctp: Add sysctl support for Auto-ASCONF
From: Michio Honda @ 2011-04-20 19:24 UTC (permalink / raw)
  To: netdev; +Cc: lksctp-developers

This patch allows the system administrator to change default Auto-ASCONF on/off behavior via an sysctl value.  

Signed-off-by: Michio Honda <micchie@sfc.wide.ad.jp>
---
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 50cb57f..df39789 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -183,6 +183,13 @@ static ctl_table sctp_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 	{
+		.procname	= "auto_asconf_enable",
+		.data		= &sctp_auto_asconf_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
 		.procname	= "prsctp_enable",
 		.data		= &sctp_prsctp_enable,
 		.maxlen		= sizeof(int),


^ permalink raw reply related

* [PATCH net-next-2.6 v4 1/5] sctp: Add Auto-ASCONF support
From: Michio Honda @ 2011-04-20 19:24 UTC (permalink / raw)
  To: netdev; +Cc: lksctp-developers

SCTP reconfigure the IP addresses in the association by using ASCONF chunks as mentioned in RFC5061.  
For example, we can start to use the newly configured IP address in the existing association.  
ASCONF operation is invoked in two ways: 
First is done by the application to call sctp_bindx() system call.  
Second is automatic operation in the SCTP stack with address events in the host computer (called auto_asconf) .  
The former is already implemented, and this patch implement the latter.  

Signed-off-by: Michio Honda <micchie@sfc.wide.ad.jp>
---
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 505845d..8678cbd 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -121,6 +121,7 @@ extern int sctp_copy_local_addr_list(struct sctp_bind_addr *,
 				     int flags);
 extern struct sctp_pf *sctp_get_pf_specific(sa_family_t family);
 extern int sctp_register_pf(struct sctp_pf *, sa_family_t);
+void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *, int);
 
 /*
  * sctp/socket.c
@@ -135,6 +136,7 @@ void sctp_sock_rfree(struct sk_buff *skb);
 void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 		    struct sctp_association *asoc);
 extern struct percpu_counter sctp_sockets_allocated;
+int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
 
 /*
  * sctp/primitive.c
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index cc9185c..80974c3 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -205,6 +205,11 @@ extern struct sctp_globals {
 	 * It is a list of sctp_sockaddr_entry.
 	 */
 	struct list_head local_addr_list;
+	int auto_asconf_enable;
+	struct list_head addr_waitq;
+	struct timer_list addr_wq_timer;
+	struct list_head auto_asconf_splist;
+	spinlock_t addr_wq_lock;
 
 	/* Lock that protects the local_addr_list writers */
 	spinlock_t addr_list_lock;
@@ -264,6 +269,11 @@ extern struct sctp_globals {
 #define sctp_port_hashtable		(sctp_globals.port_hashtable)
 #define sctp_local_addr_list		(sctp_globals.local_addr_list)
 #define sctp_local_addr_lock		(sctp_globals.addr_list_lock)
+#define sctp_auto_asconf_splist		(sctp_globals.auto_asconf_splist)
+#define sctp_addr_waitq			(sctp_globals.addr_waitq)
+#define sctp_addr_wq_timer		(sctp_globals.addr_wq_timer)
+#define sctp_addr_wq_lock		(sctp_globals.addr_wq_lock)
+#define sctp_auto_asconf_enable		(sctp_globals.auto_asconf_enable)
 #define sctp_scope_policy		(sctp_globals.ipv4_scope_policy)
 #define sctp_addip_enable		(sctp_globals.addip_enable)
 #define sctp_addip_noauth		(sctp_globals.addip_noauth_enable)
@@ -341,6 +351,8 @@ struct sctp_sock {
 	atomic_t pd_mode;
 	/* Receive to here while partial delivery is in effect. */
 	struct sk_buff_head pd_lobby;
+	struct list_head auto_asconf_list;
+	int do_auto_asconf;
 };
 
 static inline struct sctp_sock *sctp_sk(const struct sock *sk)
@@ -796,6 +808,10 @@ struct sctp_sockaddr_entry {
 	__u8 valid;
 };
 
+#define SCTP_NEWADDR	1
+#define SCTP_DELADDR	2
+#define SCTP_ADDRESS_TICK_DELAY	500
+
 typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *);
 
 /* This structure holds lists of chunks as we are assembling for
@@ -1239,6 +1255,7 @@ sctp_scope_t sctp_scope(const union sctp_addr *);
 int sctp_in_scope(const union sctp_addr *addr, const sctp_scope_t scope);
 int sctp_is_any(struct sock *sk, const union sctp_addr *addr);
 int sctp_addr_is_valid(const union sctp_addr *addr);
+int sctp_is_ep_boundall(struct sock *sk);
 
 
 /* What type of endpoint?  */
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index faf71d1..869267b 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -536,6 +536,21 @@ int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope)
 	return 0;
 }
 
+int sctp_is_ep_boundall(struct sock *sk)
+{
+	struct sctp_bind_addr *bp;
+	struct sctp_sockaddr_entry *addr;
+
+	bp = &sctp_sk(sk)->ep->base.bind_addr;
+	if (sctp_list_single_entry(&bp->address_list)) {
+		addr = list_entry(bp->address_list.next,
+				  struct sctp_sockaddr_entry, list);
+		if (sctp_is_any(sk, &addr->a))
+			return 1;
+	}
+	return 0;
+}
+
 /********************************************************************
  * 3rd Level Abstractions
  ********************************************************************/
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 865ce7b..0ba27b9 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -105,6 +105,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
 			addr->valid = 1;
 			spin_lock_bh(&sctp_local_addr_lock);
 			list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
+			sctp_addr_wq_mgmt(addr, SCTP_NEWADDR);
 			spin_unlock_bh(&sctp_local_addr_lock);
 		}
 		break;
@@ -115,6 +116,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
 			if (addr->a.sa.sa_family == AF_INET6 &&
 					ipv6_addr_equal(&addr->a.v6.sin6_addr,
 						&ifa->addr)) {
+				sctp_addr_wq_mgmt(addr, SCTP_DELADDR);
 				found = 1;
 				addr->valid = 0;
 				list_del_rcu(&addr->list);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 152976e..d8442a4 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -636,6 +636,164 @@ static void sctp_v4_ecn_capable(struct sock *sk)
 	INET_ECN_xmit(sk);
 }
 
+void sctp_addr_wq_timeout_handler(unsigned long arg)
+{
+	struct sctp_sockaddr_entry *addrw = NULL;
+	union sctp_addr *addr = NULL;
+	struct sctp_sock *sp = NULL;
+
+	spin_lock_bh(&sctp_addr_wq_lock);
+retry_wq:
+	if (list_empty(&sctp_addr_waitq)) {
+		SCTP_DEBUG_PRINTK("sctp_addrwq_timo_handler: nothing in addr waitq\n");
+		spin_unlock_bh(&sctp_addr_wq_lock);
+		return;
+	}
+	addrw = list_first_entry(&sctp_addr_waitq, struct sctp_sockaddr_entry,
+			list);
+	addr = &addrw->a;
+	SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ",
+	    " for cmd %d at entry %p\n", &sctp_addr_waitq, addr, addrw->state,
+	    addrw);
+
+	/* Now we send an ASCONF for each association */
+	/* Note. we currently don't handle link local IPv6 addressees */
+	if (addr->sa.sa_family == AF_INET6) {
+		struct in6_addr *in6 = (struct in6_addr *)&addr->v6.sin6_addr;
+
+		if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
+			SCTP_DEBUG_PRINTK("sctp_timo_handler: link local, hence don't tell sockets\n");
+			list_del(&addrw->list);
+			kfree(addrw);
+			goto retry_wq;
+		}
+		if (ipv6_chk_addr(&init_net, in6, NULL, 0) == 0 &&
+		    addrw->state == SCTP_NEWADDR) {
+			unsigned long timeo_val;
+
+			SCTP_DEBUG_PRINTK("sctp_timo_handler: this is on DAD, trying %d sec later\n",
+			    SCTP_ADDRESS_TICK_DELAY);
+			timeo_val = jiffies;
+			timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY);
+			mod_timer(&sctp_addr_wq_timer, timeo_val);
+			spin_unlock_bh(&sctp_addr_wq_lock);
+			return;
+		}
+	}
+	list_for_each_entry(sp, &sctp_auto_asconf_splist, auto_asconf_list) {
+		struct sock *sk;
+
+		if (sp == NULL) {
+			SCTP_DEBUG_PRINTK("addrwq_timo_handler: no socket\n");
+			continue;
+		}
+		sk = sctp_opt2sk(sp);
+		if (!sctp_is_ep_boundall(sk))
+			/* ignore bound-specific endpoints */
+			continue;
+		sctp_bh_lock_sock(sk);
+		if (sctp_asconf_mgmt(sp, addrw) < 0) {
+			SCTP_DEBUG_PRINTK("sctp_addrwq_timo_handler: sctp_asconf_mgmt failed\n");
+			sctp_bh_unlock_sock(sk);
+			continue;
+		}
+		sctp_bh_unlock_sock(sk);
+	}
+
+	list_del(&addrw->list);
+	kfree(addrw);
+
+	if (!list_empty(&sctp_addr_waitq))
+		goto retry_wq;
+
+	spin_unlock_bh(&sctp_addr_wq_lock);
+}
+
+static void sctp_free_addr_wq()
+{
+	struct sctp_sockaddr_entry *addrw = NULL;
+	struct sctp_sockaddr_entry *temp = NULL;
+
+	spin_lock_bh(&sctp_addr_wq_lock);
+	del_timer(&sctp_addr_wq_timer);
+	list_for_each_entry_safe(addrw, temp, &sctp_addr_waitq, list) {
+		list_del(&addrw->list);
+		kfree(addrw);
+	}
+	spin_unlock_bh(&sctp_addr_wq_lock);
+}
+
+/* lookup the entry for the same address in the addr_waitq
+ * sctp_addr_wq MUST be locked
+ */
+static struct sctp_sockaddr_entry *sctp_addr_wq_lookup(struct sctp_sockaddr_entry *addr)
+{
+	struct sctp_sockaddr_entry *addrw;
+
+	list_for_each_entry(addrw, &sctp_addr_waitq, list) {
+		if (addrw->a.sa.sa_family != addr->a.sa.sa_family)
+			continue;
+		if (addrw->a.sa.sa_family == AF_INET) {
+			if (addrw->a.v4.sin_addr.s_addr ==
+			    addr->a.v4.sin_addr.s_addr)
+				return addrw;
+		} else if (addrw->a.sa.sa_family == AF_INET6) {
+			if (ipv6_addr_equal(&addrw->a.v6.sin6_addr,
+			    &addr->a.v6.sin6_addr))
+				return addrw;
+		}
+	}
+	return NULL;
+}
+
+void sctp_addr_wq_mgmt(struct sctp_sockaddr_entry *addr, int cmd)
+{
+	struct sctp_sockaddr_entry *addrw = NULL;
+	unsigned long timeo_val;
+	union sctp_addr *tmpaddr;
+
+	/* first, we check if an opposite message already exist in the queue.
+	 * If we found such message, it is removed.
+	 * This operation is a bit stupid, but the DHCP client attaches the
+	 * new address after a couple of addition and deletion of that address
+	 */
+
+	spin_lock_bh(&sctp_addr_wq_lock);
+	/* Offsets existing events in addr_wq */
+	addrw = sctp_addr_wq_lookup(addr);
+	tmpaddr = &addrw->a;
+	if (addrw) {
+		if (addrw->state != cmd) {
+			SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ",
+			    " in wq %p\n", addrw->state, tmpaddr,
+			    &sctp_addr_waitq);
+			list_del(&addrw->list);
+			kfree(addrw);
+		}
+		spin_unlock_bh(&sctp_addr_wq_lock);
+		return;
+	}
+
+	/* OK, we have to add the new address to the wait queue */
+	addrw = kmemdup(addr, sizeof(struct sctp_sockaddr_entry), GFP_ATOMIC);
+	if (addrw == NULL) {
+		spin_unlock_bh(&sctp_addr_wq_lock);
+		return;
+	}
+	addrw->state = cmd;
+	list_add_tail(&addrw->list, &sctp_addr_waitq);
+	tmpaddr = &addrw->a;
+	SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ",
+	    " in wq %p\n", addrw->state, tmpaddr, &sctp_addr_waitq);
+
+	if (!timer_pending(&sctp_addr_wq_timer)) {
+		timeo_val = jiffies;
+		timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY);
+		mod_timer(&sctp_addr_wq_timer, timeo_val);
+	}
+	spin_unlock_bh(&sctp_addr_wq_lock);
+}
+
 /* Event handler for inet address addition/deletion events.
  * The sctp_local_addr_list needs to be protocted by a spin lock since
  * multiple notifiers (say IPv4 and IPv6) may be running at the same
@@ -663,6 +821,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
 			addr->valid = 1;
 			spin_lock_bh(&sctp_local_addr_lock);
 			list_add_tail_rcu(&addr->list, &sctp_local_addr_list);
+			sctp_addr_wq_mgmt(addr, SCTP_NEWADDR);
 			spin_unlock_bh(&sctp_local_addr_lock);
 		}
 		break;
@@ -673,6 +832,7 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev,
 			if (addr->a.sa.sa_family == AF_INET &&
 					addr->a.v4.sin_addr.s_addr ==
 					ifa->ifa_local) {
+				sctp_addr_wq_mgmt(addr, SCTP_DELADDR);
 				found = 1;
 				addr->valid = 0;
 				list_del_rcu(&addr->list);
@@ -1256,6 +1416,7 @@ SCTP_STATIC __init int sctp_init(void)
 	/* Disable ADDIP by default. */
 	sctp_addip_enable = 0;
 	sctp_addip_noauth = 0;
+	sctp_auto_asconf_enable = 0;
 
 	/* Enable PR-SCTP by default. */
 	sctp_prsctp_enable = 1;
@@ -1280,6 +1441,13 @@ SCTP_STATIC __init int sctp_init(void)
 	spin_lock_init(&sctp_local_addr_lock);
 	sctp_get_local_addr_list();
 
+	/* Initialize the address event list */
+	INIT_LIST_HEAD(&sctp_addr_waitq);
+	INIT_LIST_HEAD(&sctp_auto_asconf_splist);
+	spin_lock_init(&sctp_addr_wq_lock);
+	sctp_addr_wq_timer.expires = 0;
+	setup_timer(&sctp_addr_wq_timer, sctp_addr_wq_timeout_handler, 0);
+
 	status = sctp_v4_protosw_init();
 
 	if (status)
@@ -1344,6 +1512,7 @@ err_chunk_cachep:
 /* Exit handler for the SCTP protocol.  */
 SCTP_STATIC __exit void sctp_exit(void)
 {
+	sctp_free_addr_wq();
 	/* BUG.  This should probably do something useful like clean
 	 * up all the remaining associations and all that memory.
 	 */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3951a10..29f2b1f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -807,6 +807,37 @@ out:
 	return retval;
 }
 
+/* set addr events to assocs in the endpoint.  ep and addr_wq must be locked */
+int
+sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw)
+{
+	struct sock *sk = sctp_opt2sk(sp);
+	union sctp_addr *addr = NULL;
+	int cmd;
+	int error = 0;
+
+	addr = &addrw->a;
+	addr->v4.sin_port = htons(sp->ep->base.bind_addr.port);
+	cmd = addrw->state;
+
+	SCTP_DEBUG_PRINTK("sctp_asconf_mgmt sp:%p\n", sp);
+	if (cmd == SCTP_NEWADDR) {
+		error = sctp_send_asconf_add_ip(sk, (struct sockaddr *)addr, 1);
+		if (error) {
+			SCTP_DEBUG_PRINTK("asconf_mgmt: send_asconf_add_ip returns %d\n", error);
+			return error;
+		}
+	} else if (cmd == SCTP_DELADDR) {
+		error = sctp_send_asconf_del_ip(sk, (struct sockaddr *)addr, 1);
+		if (error) {
+			SCTP_DEBUG_PRINTK("asconf_mgmt: send_asconf_del_ip returns %d\n", error);
+			return error;
+		}
+	}
+
+	return 0;
+}
+
 /* Helper for tunneling sctp_bindx() requests through sctp_setsockopt()
  *
  * API 8.1
@@ -3770,6 +3801,13 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	local_bh_disable();
 	percpu_counter_inc(&sctp_sockets_allocated);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	if (sctp_auto_asconf_enable) {
+		list_add_tail(&sp->auto_asconf_list,
+		    &sctp_auto_asconf_splist);
+		sp->do_auto_asconf = 1;
+	} else
+		sp->do_auto_asconf = 0;
+	SCTP_DEBUG_PRINTK("sctp_init_sk sk:%p ep:%p\n", sk, ep);
 	local_bh_enable();
 
 	return 0;
@@ -3779,11 +3817,17 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 SCTP_STATIC void sctp_destroy_sock(struct sock *sk)
 {
 	struct sctp_endpoint *ep;
+	struct sctp_sock *sp;
 
 	SCTP_DEBUG_PRINTK("sctp_destroy_sock(sk: %p)\n", sk);
 
 	/* Release our hold on the endpoint. */
-	ep = sctp_sk(sk)->ep;
+	sp = sctp_sk(sk);
+	ep = sp->ep;
+	if (sp->do_auto_asconf) {
+		sp->do_auto_asconf = 0;
+		list_del(&sp->auto_asconf_list);
+	}
 	sctp_endpoint_free(ep);
 	local_bh_disable();
 	percpu_counter_dec(&sctp_sockets_allocated);


^ permalink raw reply related

* Re: [PATCH v2] net: r8169: convert to hw_features
From: Francois Romieu @ 2011-04-20 19:13 UTC (permalink / raw)
  To: David Miller; +Cc: dave, netdev, nic_swsd
In-Reply-To: <20110418.225400.189709717.davem@davemloft.net>

David Miller <davem@davemloft.net> :
[...]
> Francois, since TSO is disabled by default I'm applying this to
> net-next-2.6, thanks!

Fine.

> Maybe we can progressively start to try and turn these things on
> by default now that we know more about the TX descriptor layout
> differences.

Sure. I'd welcome a little timeslot to push a few configuration and
cosmetic things though.

-- 
Ueimor

^ permalink raw reply

* Re: [PATCH v2] can: add missing socket check in can/raw release
From: David Miller @ 2011-04-20 19:22 UTC (permalink / raw)
  To: davej; +Cc: socketcan, netdev
In-Reply-To: <20110420170333.GD24930@redhat.com>

From: Dave Jones <davej@redhat.com>
Date: Wed, 20 Apr 2011 13:03:34 -0400

> On Wed, Apr 20, 2011 at 01:57:15PM +0200, Oliver Hartkopp wrote:
>  > v2: added space after 'if' according code style.
>  > 
>  > We can get here with a NULL socket argument passed from userspace,
>  > so we need to handle it accordingly.
>  > 
>  > Thanks to Dave Jones pointing at this issue in net/can/bcm.c
>  > 
>  > Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
> 
> ACK. That was the first thing I hit this morning ;-)

Aha, so this patch fixes the same bug in a different part of CAN,
I missed that :-)

Ok I'll apply this, thanks!

^ permalink raw reply

* Re: r8169 doesn't report link state correctly.
From: Francois Romieu @ 2011-04-20 19:14 UTC (permalink / raw)
  To: Ben Greear; +Cc: netdev
In-Reply-To: <20110413174815.GB17579@electric-eye.fr.zoreil.com>

François Romieu <romieu@fr.zoreil.com> :
> On Mon, Apr 11, 2011 at 01:09:19PM -0700, Ben Greear wrote:
> > I notice that in kernel 2.6.38-wl, the realtek 8169 NIC doesn't
> > report link down when in fact there is no cable connected.  Instead,
[...]
> Thanks for the report. I'll try it tomorrow or friday.

I have not been able to notice it with a current kernel.

I'd welcome the XID of the 8169 NIC (see dmesg) and a short explanation
(no cable from boot ? cable removed after ifconfig up ? brand / ability
of the switch / hub ?).

Thanks.

-- 
Ueimor

^ permalink raw reply

* Re: Add missing socket check in can/bcm release.
From: David Miller @ 2011-04-20 19:21 UTC (permalink / raw)
  To: davej; +Cc: netdev
In-Reply-To: <20110420160350.GA24930@redhat.com>

From: Dave Jones <davej@redhat.com>
Date: Wed, 20 Apr 2011 12:03:50 -0400

> On Tue, Apr 19, 2011 at 08:37:20PM -0700, David Miller wrote:
>  > From: Dave Jones <davej@redhat.com>
>  > Date: Tue, 19 Apr 2011 23:30:01 -0400
>  > 
>  > > We can get here with a NULL socket argument passed from userspace,
>  > > so we need to handle it accordingly.
>  > > 
>  > > Signed-off-by: Dave Jones <davej@redhat.com>
>  > 
>  > Applied and queued up for -stable, thanks Dave.
> 
> Out of curiousity, while I was asleep it occured to me.. is it ever valid
> for a ->release to get passed a NULL socket->sk ?

Yes, it happens all the time.

If accept() fails mid-stream, we'll have an 'sk' that hasn't been
hooked up to ->socket yet, but we still have to release the 'sk'
in the error handling.

See also commit c100c8f4c3c6f2a407bdbaaad2c4f1062e6a473a, which
fixes a bug triggered via the same code path.

^ permalink raw reply

* Re: r8169 :  always copying the rx buffer to new skb
From: Francois Romieu @ 2011-04-20 19:13 UTC (permalink / raw)
  To: John Lumby; +Cc: netdev, Ben Hutchings, nic_swsd
In-Reply-To: <4DACAC7E.4070400@hotmail.com>

John Lumby <johnlumby@hotmail.com> :
[...]
> I've  verified that on my 8168c by simulating an allocation failure
> on 15 out of every 16 rx-Interrupts  (unhooking the current skb and
> then simply not allocating a new skb and not giving the
> corresponding descriptor to the asic) and everything works just
> fine,  with just a slight drop in throughput (down to 987 Mbits/sec,
> still well ahead of the always-copy).

Did your testing account for some memory pressure ?

> So do we really need to be that concerned about occasional
> allocation failure?

See $search_engine +r8169 high order memory allocation failure.

> And if someone is that concerned,   then,   with my proposal,  they
> can leave the rx_copybreak at its default of 16383,   when every
> packet is copied anyway.     (My patch takes a slightly different
> approach if the allocation of the new skb fails  -   current 2.6.39
> drops the packet,   I would propose to unhook and retain the
> descriptor because I can replenish later  -  but that is also
> debatable).     Also that's why I favour making the rx ring size
> configurable.

Why don't you send the patch through the mailing list ?

(hint, hint)

> On 04/18/11 14:21, Francois Romieu wrote:
> >Short answer: it's mostly related to CVE-2009-4537 (see git log).
> 
> I understand the need to make the rx_buf_size 16383 to defeat the
> DOS attacker,   no suggestion to alter that.     I'm just not sure I
> see why that has to imply the always_copy.

Because of high-order memory allocation failure under memory pressure and
memory wastage. Btw several 816x have limited jumbo frames abilities.

-- 
Ueimor

^ permalink raw reply

* Re: Suspend/resume - slow resume
From: Ciprian Docan @ 2011-04-20 19:10 UTC (permalink / raw)
  To: Francois Romieu
  Cc: Linus Torvalds, netdev, Linux Kernel Mailing List, Len Brown,
	Pavel Machek, Rafael, J. Wysocki, Greg KH, nic_swsd
In-Reply-To: <20110420181602.GA18740@electric-eye.fr.zoreil.com>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 201 bytes --]


Hello Francois,

I tried to unload the module, but it crashed. I re-booted the machine and 
tried to unload the module again. I got the output from dmesg; please see 
attached.

Regards,
--
 	Ciprian

[-- Attachment #2: Type: TEXT/PLAIN, Size: 7026 bytes --]

[    7.007361] r8169 Gigabit Ethernet driver 2.3LK-NAPI loaded
[    7.007383] r8169 0000:02:00.0: PCI INT A -> GSI 16 (level, low) -> IRQ 16
[    7.007454] r8169 0000:02:00.0: setting latency timer to 64
[    7.007506] r8169 0000:02:00.0: irq 45 for MSI/MSI-X
[    7.007628] r8169 0000:02:00.0: eth0: RTL8168d/8111d at 0xffffc90011810000, 64:31:50:69:2c:61, XID 083000c0 IRQ 45

...

[  109.216074] r8169 0000:02:00.0: PCI INT A disabled
[  116.748813] EXT4-fs (dm-1): mounted filesystem with ordered data mode. Opts: (null)
[  166.963998] r8169 Gigabit Ethernet driver 2.3LK-NAPI loaded
[  166.964100] r8169 0000:02:00.0: PCI INT A -> GSI 16 (level, low) -> IRQ 16
[  166.964319] r8169 0000:02:00.0: setting latency timer to 64
[  166.964379] r8169 0000:02:00.0: irq 45 for MSI/MSI-X
[  166.964593] r8169 0000:02:00.0: eth0: RTL8168d/8111d at 0xffffc90022fc4000, 64:31:50:69:2c:61, XID 083000c0 IRQ 45
[  174.917306] BUG: unable to handle kernel paging request at fffffffffffffffd
[  174.919573] IP: [<ffffffff8127b267>] firmware_free_data+0xf/0x55
[  174.921828] PGD 1605067 PUD 1606067 PMD 0 
[  174.924088] Oops: 0000 [#1] SMP 
[  174.926323] last sysfs file: /sys/devices/pci0000:00/0000:00:1c.1/0000:03:00.0/ieee80211/phy0/rfkill0/uevent
[  174.928649] CPU 2 
[  174.928670] Modules linked in: r8169(-) aes_x86_64 aes_generic fuse hidp fbcon tileblit font bitblit softcursor rfcomm bnep acpi_cpufreq mperf ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ipv6 snd_hda_codec_hdmi snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm arc4 snd_timer snd soundcore ecb btusb processor thermal uvcvideo hp_wmi psmouse fan videodev i2c_i801 sparse_keymap snd_page_alloc pcspkr brcmsmac(C) hp_accel iTCO_wdt mac80211 iTCO_vendor_support bluetooth lis3lv02d mii input_polldev serio_raw wmi v4l2_compat_ioctl32 cfg80211 rfkill microcode i915 drm_kms_helper drm fb fbdev intel_agp i2c_algo_bit cfbcopyarea intel_gtt agpgart i2c_core video cfbimgblt cfbfillrect [last unloaded: r8169]
[  174.944284] 
[  174.946871] Pid: 2003, comm: rmmod Tainted: G         C  2.6.39-rc3+ #13 Hewlett-Packard HP Pavilion dm4 Notebook PC     /1603
[  174.949618] RIP: 0010:[<ffffffff8127b267>]  [<ffffffff8127b267>] firmware_free_data+0xf/0x55
[  174.952348] RSP: 0018:ffff88023ad45d48  EFLAGS: 00010246
[  174.954988] RAX: ffffffff815be000 RBX: fffffffffffffff5 RCX: ffff88024b1c0398
[  174.957707] RDX: 0000000000000000 RSI: 0000000000000037 RDI: fffffffffffffff5
[  174.960417] RBP: ffff88023ad45d58 R08: 0000000000000100 R09: ffffffff8172f690
[  174.963039] R10: 000000000000000f R11: ffff88024b1c0000 R12: ffff88024f790000
[  174.965662] R13: ffff88024b1c0000 R14: ffff88024f790090 R15: 0000000000000001
[  174.968194] FS:  00007faaa8774720(0000) GS:ffff88025bc80000(0000) knlGS:0000000000000000
[  174.970806] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  174.973388] CR2: fffffffffffffffd CR3: 000000024b828000 CR4: 00000000000006e0
[  174.973392] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  174.973396] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[  174.973400] Process rmmod (pid: 2003, threadinfo ffff88023ad44000, task ffff88023adb8000)
[  174.973403] Stack:
[  174.973405]  fffffffffffffff5 ffff88024f790000 ffff88023ad45d78 ffffffff8127b2f6
[  174.973410]  ffff88023ad45d78 ffff88024b1c0740 ffff88023ad45db8 ffffffffa046e21e
[  174.973415]  ffff88024f7900f0 ffff88024f790090 ffffffffa0470030 ffff88024f790000
[  174.973420] Call Trace:
[  174.973427]  [<ffffffff8127b2f6>] release_firmware+0x49/0x4f
[  174.973437]  [<ffffffffa046e21e>] rtl8169_remove_one+0x8f/0x118 [r8169]
[  174.973446]  [<ffffffff8120435c>] pci_device_remove+0x3f/0x91
[  174.973453]  [<ffffffff8127209c>] __device_release_driver+0x83/0xd9
[  174.973458]  [<ffffffff8127278c>] driver_detach+0x86/0xae
[  174.973464]  [<ffffffff81271f4e>] bus_remove_driver+0xb9/0xdf
[  174.973469]  [<ffffffff81272af7>] driver_unregister+0x6c/0x74
[  174.973474]  [<ffffffff8120463a>] pci_unregister_driver+0x44/0x89
[  174.973482]  [<ffffffffa046e2bc>] rtl8169_cleanup_module+0x15/0x17 [r8169]
[  174.973491]  [<ffffffff8106e474>] sys_delete_module+0x1ca/0x23b
[  174.973498]  [<ffffffff81109445>] ? path_put+0x22/0x27
[  174.973505]  [<ffffffff813d43eb>] system_call_fastpath+0x16/0x1b
[  174.973508] Code: c4 18 5b 41 5c 41 5d 41 5e 41 5f c9 c3 4c 89 e7 e8 4d 8d e7 ff b8 f2 ff ff ff eb e2 55 48 89 e5 41 54 53 0f 1f 44 00 00 48 89 fb 
[  174.973529]  8b 7f 08 e8 e5 79 e6 ff 48 83 7b 10 00 74 31 45 31 e4 eb 0e 
[  174.973539] RIP  [<ffffffff8127b267>] firmware_free_data+0xf/0x55
[  174.973545]  RSP <ffff88023ad45d48>
[  174.973547] CR2: fffffffffffffffd
[  174.973551] ---[ end trace 4aa4e169865c5c0c ]---
[  174.973557] BUG: sleeping function called from invalid context at kernel/rwsem.c:21
[  174.973561] in_atomic(): 0, irqs_disabled(): 1, pid: 2003, name: rmmod
[  174.973565] Pid: 2003, comm: rmmod Tainted: G      D  C  2.6.39-rc3+ #13
[  174.973569] Call Trace:
[  174.973576]  [<ffffffff8102dd7b>] __might_sleep+0xeb/0xf0
[  174.973583]  [<ffffffff813cd632>] down_read+0x24/0x3b
[  174.973589]  [<ffffffff810768f9>] acct_collect+0x4d/0x181
[  174.973595]  [<ffffffff81041ccd>] do_exit+0x22d/0x70a
[  174.973600]  [<ffffffff813cde9a>] ? _raw_spin_lock_irqsave+0x12/0x2f
[  174.973605]  [<ffffffff8103fc36>] ? kmsg_dump+0x49/0xdb
[  174.973610]  [<ffffffff813ceeea>] oops_end+0xb7/0xbf
[  174.973616]  [<ffffffff81022b3b>] no_context+0x1f9/0x208
[  174.973620]  [<ffffffff813cc815>] ? schedule+0x664/0x67e
[  174.973626]  [<ffffffff81022cd2>] __bad_area_nosemaphore+0x188/0x1ab
[  174.973631]  [<ffffffff81022d08>] bad_area_nosemaphore+0x13/0x15
[  174.973636]  [<ffffffff813d0d9d>] do_page_fault+0x16f/0x342
[  174.973643]  [<ffffffff8101c67b>] ? flat_send_IPI_allbutself+0x73/0x7e
[  174.973649]  [<ffffffff81018300>] ? native_send_call_func_ipi+0x76/0x96
[  174.973655]  [<ffffffff8106b119>] ? smp_call_function_many+0x1de/0x1ed
[  174.973661]  [<ffffffff813ce49f>] page_fault+0x1f/0x30
[  174.973667]  [<ffffffff8127b267>] ? firmware_free_data+0xf/0x55
[  174.973672]  [<ffffffff8127b2f6>] release_firmware+0x49/0x4f
[  174.973679]  [<ffffffffa046e21e>] rtl8169_remove_one+0x8f/0x118 [r8169]
[  174.973685]  [<ffffffff8120435c>] pci_device_remove+0x3f/0x91
[  174.973691]  [<ffffffff8127209c>] __device_release_driver+0x83/0xd9
[  174.973696]  [<ffffffff8127278c>] driver_detach+0x86/0xae
[  174.973701]  [<ffffffff81271f4e>] bus_remove_driver+0xb9/0xdf
[  174.973706]  [<ffffffff81272af7>] driver_unregister+0x6c/0x74
[  174.973711]  [<ffffffff8120463a>] pci_unregister_driver+0x44/0x89
[  174.973719]  [<ffffffffa046e2bc>] rtl8169_cleanup_module+0x15/0x17 [r8169]
[  174.973724]  [<ffffffff8106e474>] sys_delete_module+0x1ca/0x23b
[  174.973729]  [<ffffffff81109445>] ? path_put+0x22/0x27
[  174.973735]  [<ffffffff813d43eb>] system_call_fastpath+0x16/0x1b

^ permalink raw reply

* Re: can: add missing socket check in can/raw release.
From: Dave Jones @ 2011-04-20 19:04 UTC (permalink / raw)
  To: David Miller; +Cc: socketcan, netdev
In-Reply-To: <20110420.120218.15220634.davem@davemloft.net>

On Wed, Apr 20, 2011 at 12:02:18PM -0700, David Miller wrote:
 > From: Oliver Hartkopp <socketcan@hartkopp.net>
 > Date: Wed, 20 Apr 2011 13:46:40 +0200
 > 
 > > We can get here with a NULL socket argument passed from userspace,
 > > so we need to handle it accordingly.
 > > 
 > > Thanks to Dave Jones pointing at this issue in net/can/bcm.c
 > > 
 > > Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
 > 
 > I already applied Dave's patch from last night.

This is in a different path.

	Dave

^ permalink raw reply

* Re: can: add missing socket check in can/raw release.
From: David Miller @ 2011-04-20 19:02 UTC (permalink / raw)
  To: socketcan; +Cc: netdev, davej
In-Reply-To: <4DAEC7A0.1090103@hartkopp.net>

From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Wed, 20 Apr 2011 13:46:40 +0200

> We can get here with a NULL socket argument passed from userspace,
> so we need to handle it accordingly.
> 
> Thanks to Dave Jones pointing at this issue in net/can/bcm.c
> 
> Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>

I already applied Dave's patch from last night.

^ permalink raw reply

* Re: Suspend/resume - slow resume
From: Ciprian Docan @ 2011-04-20 18:51 UTC (permalink / raw)
  To: Francois Romieu
  Cc: Linus Torvalds, netdev, Linux Kernel Mailing List, Len Brown,
	Pavel Machek, Rafael, J. Wysocki, Greg KH, nic_swsd
In-Reply-To: <20110420181602.GA18740@electric-eye.fr.zoreil.com>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 518 bytes --]


Hello Francois,

I tried your patch and my machine resumes properly.

> The firmware is cached during the first successfull call to open() and
> released once the network device is unregistered. The driver uses the
> cached firmware between open() and unregister_netdev().

Here is what I did to test it: I compiled the patched module, unloaded the 
previous module, loaded the new one, and then suspend & resume. Output of 
dmesg attached. Please let me know if you need any other information.

Regards,
--
 	Ciprian

[-- Attachment #2: Type: TEXT/PLAIN, Size: 12206 bytes --]

[74001.664088] r8169 0000:02:00.0: PCI INT A disabled
[74033.821473] r8169 Gigabit Ethernet driver 2.3LK-NAPI loaded
[74033.821497] r8169 0000:02:00.0: PCI INT A -> GSI 16 (level, low) -> IRQ 16
[74033.822310] r8169 0000:02:00.0: setting latency timer to 64
[74033.822364] r8169 0000:02:00.0: irq 45 for MSI/MSI-X
[74033.822572] r8169 0000:02:00.0: eth0: RTL8168d/8111d at 0xffffc90027e2e000, 64:31:50:69:2c:61, XID 083000c0 IRQ 45
[74058.429717] wl_ops_bss_info_changed: qos enabled: false (implement)
[74058.429729] brcmsmac: wl_ops_bss_info_changed: disassociated
[74058.429735] wl_ops_bss_info_changed: use_cts_prot: false (implement)
[74058.429741] wl_ops_bss_info_changed: short preamble: false (implement)
[74058.429754] wl_ops_bss_info_changed: arp filtering: enabled false, count 1 (implement)
[74058.429764] wlan0: deauthenticating from 00:1e:2a:01:3e:bb by local choice (reason=3)
[74058.455026] wl_ops_bss_info_changed: BSS idle: true (implement)
[74058.455046] cfg80211: All devices are disconnected, going to restore regulatory settings
[74058.455055] cfg80211: Restoring regulatory settings
[74058.455064] cfg80211: Calling CRDA to update world regulatory domain
[74058.455069] wl_ops_bss_info_changed: BSS idle: false (implement)
[74058.460281] cfg80211: Updating information on frequency 2412 MHz for a 20 MHz width channel with regulatory rule:
[74058.460291] cfg80211: 2402000 KHz - 2472000 KHz @  KHz), (300 mBi, 2700 mBm)
[74058.460297] cfg80211: Updating information on frequency 2417 MHz for a 20 MHz width channel with regulatory rule:
[74058.460305] cfg80211: 2402000 KHz - 2472000 KHz @  KHz), (300 mBi, 2700 mBm)
[74058.460311] cfg80211: Updating information on frequency 2422 MHz for a 20 MHz width channel with regulatory rule:
[74058.460319] cfg80211: 2402000 KHz - 2472000 KHz @  KHz), (300 mBi, 2700 mBm)
[74058.460325] cfg80211: Updating information on frequency 2427 MHz for a 20 MHz width channel with regulatory rule:
[74058.460330] cfg80211: 2402000 KHz - 2472000 KHz @  KHz), (300 mBi, 2700 mBm)
[74058.460334] cfg80211: Updating information on frequency 2432 MHz for a 20 MHz width channel with regulatory rule:
[74058.460339] cfg80211: 2402000 KHz - 2472000 KHz @  KHz), (300 mBi, 2700 mBm)
[74058.460344] cfg80211: Updating information on frequency 2437 MHz for a 20 MHz width channel with regulatory rule:
[74058.460349] cfg80211: 2402000 KHz - 2472000 KHz @  KHz), (300 mBi, 2700 mBm)
[74058.460353] cfg80211: Updating information on frequency 2442 MHz for a 20 MHz width channel with regulatory rule:
[74058.460358] cfg80211: 2402000 KHz - 2472000 KHz @  KHz), (300 mBi, 2700 mBm)
[74058.460363] cfg80211: Updating information on frequency 2447 MHz for a 20 MHz width channel with regulatory rule:
[74058.460368] cfg80211: 2402000 KHz - 2472000 KHz @  KHz), (300 mBi, 2700 mBm)
[74058.460372] cfg80211: Updating information on frequency 2452 MHz for a 20 MHz width channel with regulatory rule:
[74058.460377] cfg80211: 2402000 KHz - 2472000 KHz @  KHz), (300 mBi, 2700 mBm)
[74058.460382] cfg80211: Updating information on frequency 2457 MHz for a 20 MHz width channel with regulatory rule:
[74058.460387] cfg80211: 2402000 KHz - 2472000 KHz @  KHz), (300 mBi, 2700 mBm)
[74058.460391] cfg80211: Updating information on frequency 2462 MHz for a 20 MHz width channel with regulatory rule:
[74058.460396] cfg80211: 2402000 KHz - 2472000 KHz @  KHz), (300 mBi, 2700 mBm)
[74058.460400] cfg80211: Disabling freq 2467 MHz
[74058.460403] cfg80211: Disabling freq 2472 MHz
[74058.460405] cfg80211: Disabling freq 2484 MHz
[74058.460410] cfg80211: World regulatory domain updated:
[74058.460413] cfg80211:     (start_freq - end_freq @ bandwidth), (max_antenna_gain, max_eirp)
[74058.460418] cfg80211:     (2402000 KHz - 2472000 KHz @ 40000 KHz), (300 mBi, 2000 mBm)
[74058.460423] cfg80211:     (2457000 KHz - 2482000 KHz @ 20000 KHz), (300 mBi, 2000 mBm)
[74058.460427] cfg80211:     (2474000 KHz - 2494000 KHz @ 20000 KHz), (300 mBi, 2000 mBm)
[74058.460432] cfg80211:     (5170000 KHz - 5250000 KHz @ 40000 KHz), (300 mBi, 2000 mBm)
[74058.460437] cfg80211:     (5735000 KHz - 5835000 KHz @ 40000 KHz), (300 mBi, 2000 mBm)
[74058.464926] wl_ops_bss_info_changed: BSS idle: true (implement)
[74059.892010] PM: Syncing filesystems ... done.
[74059.909543] PM: Preparing system for mem sleep
[74060.072757] Freezing user space processes ... (elapsed 0.01 seconds) done.
[74060.084607] Freezing remaining freezable tasks ... (elapsed 0.01 seconds) done.
[74060.095548] PM: Entering mem sleep
[74060.095712] Suspending console(s) (use no_console_suspend to debug)
[74060.107943] sd 0:0:0:0: [sda] Synchronizing SCSI cache
[74060.108024] sd 0:0:0:0: [sda] Stopping disk
[74060.147028] HDA Intel 0000:01:00.1: PCI INT B disabled
[74060.147031] brcmsmac 0000:03:00.0: PCI INT A disabled
[74060.147064] ACPI handle has no context!
[74060.157421] ehci_hcd 0000:00:1d.0: PCI INT A disabled
[74060.157444] ehci_hcd 0000:00:1a.0: PCI INT A disabled
[74060.248878] HDA Intel 0000:00:1b.0: PCI INT A disabled
[74062.612741] PM: suspend of devices complete after 2512.037 msecs
[74062.612949] r8169 0000:02:00.0: PME# enabled
[74062.621666] r8169 0000:02:00.0: wake-up capability enabled by ACPI
[74062.654658] PM: late suspend of devices complete after 42.007 msecs
[74062.654817] ACPI: Preparing to enter system sleep state S3
[74062.700627] PM: Saving platform NVS memory
[74062.704367] Disabling non-boot CPUs ...
[74062.706846] CPU 1 is now offline
[74062.724771] CPU 2 is now offline
[74062.734603] CPU 3 is now offline
[74062.734969] Extended CMOS year: 2000
[74062.735165] ACPI: Low-level resume complete
[74062.735220] PM: Restoring platform NVS memory
[74062.735780] Extended CMOS year: 2000
[74062.735802] Enabling non-boot CPUs ...
[74062.735923] Booting Node 0 Processor 1 APIC 0x1
[74062.735924] smpboot cpu 1: start_ip = 98000
[74062.826473] CPU1 is up
[74062.826637] Booting Node 0 Processor 2 APIC 0x4
[74062.826641] smpboot cpu 2: start_ip = 98000
[74062.826826] Switched to NOHz mode on CPU #1
[74062.917217] CPU2 is up
[74062.917382] Booting Node 0 Processor 3 APIC 0x5
[74062.917386] smpboot cpu 3: start_ip = 98000
[74062.917580] Switched to NOHz mode on CPU #2
[74063.008091] CPU3 is up
[74063.009302] Switched to NOHz mode on CPU #3
[74063.009672] ACPI: Waking up from system sleep state S3
[74063.090894] pcieport 0000:00:01.0: restoring config space at offset 0x1 (was 0x100007, writing 0x100407)
[74063.090915] i915 0000:00:02.0: restoring config space at offset 0x1 (was 0x900007, writing 0x900407)
[74063.090974] ehci_hcd 0000:00:1a.0: restoring config space at offset 0x1 (was 0x2900006, writing 0x2900002)
[74063.091009] HDA Intel 0000:00:1b.0: restoring config space at offset 0x1 (was 0x100006, writing 0x100002)
[74063.091117] ehci_hcd 0000:00:1d.0: restoring config space at offset 0x1 (was 0x2900006, writing 0x2900002)
[74063.091140] pci 0000:00:1e.0: restoring config space at offset 0xa (was 0xffffffff, writing 0x0)
[74063.091213] ahci 0000:00:1f.2: restoring config space at offset 0x1 (was 0x2b00007, writing 0x2b00407)
[74063.091336] HDA Intel 0000:01:00.1: restoring config space at offset 0x1 (was 0x100007, writing 0x100003)
[74063.091389] r8169 0000:02:00.0: restoring config space at offset 0x1 (was 0x100007, writing 0x100407)
[74063.091477] brcmsmac 0000:03:00.0: restoring config space at offset 0xf (was 0x100, writing 0x10a)
[74063.091496] brcmsmac 0000:03:00.0: restoring config space at offset 0x4 (was 0x4, writing 0xc2400004)
[74063.091502] brcmsmac 0000:03:00.0: restoring config space at offset 0x3 (was 0x0, writing 0x10)
[74063.091508] brcmsmac 0000:03:00.0: restoring config space at offset 0x1 (was 0x100000, writing 0x100006)
[74063.091655] PM: early resume of devices complete after 0.826 msecs
[74063.091803] i915 0000:00:02.0: setting latency timer to 64
[74063.091883] HDA Intel 0000:01:00.1: PCI INT B -> GSI 17 (level, low) -> IRQ 17
[74063.091888] ehci_hcd 0000:00:1a.0: PCI INT A -> GSI 16 (level, low) -> IRQ 16
[74063.091890] HDA Intel 0000:01:00.1: setting latency timer to 64
[74063.091895] ehci_hcd 0000:00:1a.0: setting latency timer to 64
[74063.091938] HDA Intel 0000:01:00.1: irq 46 for MSI/MSI-X
[74063.091945] HDA Intel 0000:00:1b.0: PCI INT A -> GSI 22 (level, low) -> IRQ 22
[74063.091953] HDA Intel 0000:00:1b.0: setting latency timer to 64
[74063.091987] HDA Intel 0000:00:1b.0: irq 47 for MSI/MSI-X
[74063.091990] ehci_hcd 0000:00:1d.0: PCI INT A -> GSI 20 (level, low) -> IRQ 20
[74063.091996] ehci_hcd 0000:00:1d.0: setting latency timer to 64
[74063.092023] pci 0000:00:1e.0: setting latency timer to 64
[74063.092088] ahci 0000:00:1f.2: setting latency timer to 64
[74063.092096] brcmsmac 0000:03:00.0: PCI INT A -> GSI 17 (level, low) -> IRQ 17
[74063.092107] brcmsmac 0000:03:00.0: setting latency timer to 64
[74063.102150] sd 0:0:0:0: [sda] Starting disk
[74063.105668] r8169 0000:02:00.0: wake-up capability disabled by ACPI
[74063.105674] r8169 0000:02:00.0: PME# disabled
[74063.322691] usb 1-1.4: reset full speed USB device number 3 using ehci_hcd
[74063.406321] ata2: SATA link up 1.5 Gbps (SStatus 113 SControl 300)
[74063.410296] ata5: SATA link down (SStatus 0 SControl 300)
[74063.440225] ata2.00: configured for UDMA/100
[74063.480281] usb 2-1.3: reset full speed USB device number 3 using ehci_hcd
[74063.566915] btusb 2-1.3:1.0: no reset_resume for driver btusb?
[74063.566921] btusb 2-1.3:1.1: no reset_resume for driver btusb?
[74063.637853] usb 2-1.6: reset high speed USB device number 5 using ehci_hcd
[74064.566218] hci_cmd_timer: hci0 command tx timeout
[74065.849747] ata1: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
[74065.861688] ata1.00: configured for UDMA/133
[74066.466771] PM: resume of devices complete after 3384.208 msecs
[74066.467065] PM: Finishing wakeup.
[74066.467067] Restarting tasks ... done.
[74066.475211] video LNXVIDEO:00: Restoring backlight state
[74066.475339] video LNXVIDEO:01: Restoring backlight state
[74066.740330] wl_ops_bss_info_changed: use_cts_prot: false (implement)
[74066.740334] wl_ops_bss_info_changed: short preamble: false (implement)
[74066.740349] wl_ops_config: change monitor mode: false (implement)
[74066.740350] wl_ops_config: change power-save mode: false (implement)
[74066.742592] wl_ops_bss_info_changed: qos enabled: false (implement)
[74066.743416] wl_ops_bss_info_changed: BSS idle: false (implement)
[74066.743435] ADDRCONF(NETDEV_UP): wlan0: link is not ready
[74066.833025] wl_ops_bss_info_changed: BSS idle: true (implement)
[74066.833049] wl_ops_bss_info_changed: BSS idle: false (implement)
[74066.922738] wl_ops_bss_info_changed: BSS idle: true (implement)
[74067.470305] hci_cmd_timer: hci0 command tx timeout
[74071.913773] wl_ops_bss_info_changed: BSS idle: false (implement)
[74072.813906] wl_ops_bss_info_changed: BSS idle: true (implement)
[74072.813930] wl_ops_bss_info_changed: BSS idle: false (implement)
[74072.813990] wlan0: deauthenticating from 00:1e:2a:00:2d:fe by local choice (reason=3)
[74072.903591] wlan0: authenticate with 00:1e:2a:00:2d:fe (try 1)
[74072.913250] wlan0: authenticated
[74072.913271] wl_ops_bss_info_changed: BSS idle: true (implement)
[74072.913304] wl_ops_bss_info_changed: BSS idle: false (implement)
[74072.913312] wlan0: associate with 00:1e:2a:00:2d:fe (try 1)
[74072.916142] wlan0: RX AssocResp from 00:1e:2a:00:2d:fe (capab=0x411 status=0 aid=1)
[74072.916147] wlan0: associated
[74072.917091] wl_ops_bss_info_changed: qos enabled: true (implement)
[74072.917096] brcmsmac: wl_ops_bss_info_changed: associated
[74072.917100] wl_ops_bss_info_changed: use_cts_prot: true (implement)
[74072.917103] wl_ops_bss_info_changed: short preamble: true (implement)
[74072.917111] wl_ops_bss_info_changed: Need to change Basic Rates: 0xf (implement)
[74072.917118] wl_ops_bss_info_changed: arp filtering: enabled true, count 0 (implement)
[74072.918403] ADDRCONF(NETDEV_CHANGE): wlan0: link becomes ready
[74073.806196] wl_ops_bss_info_changed: arp filtering: enabled true, count 1 (implement)

^ permalink raw reply

* [PATCH 1/6] IPVS: Change of socket usage to enable name space exit.
From: Hans Schillstrom @ 2011-04-20 18:33 UTC (permalink / raw)
  To: horms, ja, ebiederm, lvs-devel, netdev, netfilter-devel
  Cc: hans.schillstrom, Hans Schillstrom
In-Reply-To: <1303324434-14024-1-git-send-email-hans@schillstrom.com>

To work this patch also needs the other patches in this series.

VERSION: 2

DESCRIPTION

If the sync daemons run in a name space while it crashes
or get killed, there is no way to stop them except for a reboot.
When all patches are there, ip_vs_core will handle register_pernet_(),
i.e. ip_vs_sync_init() and ip_vs_sync_cleanup() will be removed.

Kernel threads should not increment the use count of a socket.
By calling sk_change_net() after creating a socket this is avoided.
sock_release cant be used intead sk_release_kernel() should be used.

Thanks Eric W Biederman for your advices.

This patch is based on net-next-2.6  ver 2.6.39-rc2

CHANGES
 Rev 2
   sock_create_kern() used instead of __sock_create()
   return codes of kthread_stop() used.

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
---
 net/netfilter/ipvs/ip_vs_core.c |    2 +-
 net/netfilter/ipvs/ip_vs_sync.c |   58 +++++++++++++++++++++++++--------------
 2 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 07accf6..a0791dc 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1896,7 +1896,7 @@ static int __net_init __ip_vs_init(struct net *net)

 static void __net_exit __ip_vs_cleanup(struct net *net)
 {
-	IP_VS_DBG(10, "ipvs netns %d released\n", net_ipvs(net)->gen);
+	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
 }

 static struct pernet_operations ipvs_core_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3e7961e..c187823 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1303,13 +1303,18 @@ static struct socket *make_send_sock(struct net *net)
 	struct socket *sock;
 	int result;

-	/* First create a socket */
-	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
+	/* First create a socket move it to right name space later */
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
 	if (result < 0) {
 		pr_err("Error setting outbound mcast interface\n");
@@ -1334,8 +1339,8 @@ static struct socket *make_send_sock(struct net *net)

 	return sock;

-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }

@@ -1350,12 +1355,17 @@ static struct socket *make_receive_sock(struct net *net)
 	int result;

 	/* First create a socket */
-	result = __sock_create(net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock, 1);
+	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
-
+	/*
+	 * Kernel sockets that are a part of a namespace, should not
+	 * hold a reference to a namespace in order to allow to stop it.
+	 * After sk_change_net should be released using sk_release_kernel.
+	 */
+	sk_change_net(sock->sk, net);
 	/* it is equivalent to the REUSEADDR option in user-space */
 	sock->sk->sk_reuse = 1;

@@ -1377,8 +1387,8 @@ static struct socket *make_receive_sock(struct net *net)

 	return sock;

-  error:
-	sock_release(sock);
+error:
+	sk_release_kernel(sock->sk);
 	return ERR_PTR(result);
 }

@@ -1473,7 +1483,7 @@ static int sync_thread_master(void *data)
 		ip_vs_sync_buff_release(sb);

 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo);

 	return 0;
@@ -1513,7 +1523,7 @@ static int sync_thread_backup(void *data)
 	}

 	/* release the sending multicast socket */
-	sock_release(tinfo->sock);
+	sk_release_kernel(tinfo->sock->sk);
 	kfree(tinfo->buf);
 	kfree(tinfo);

@@ -1601,7 +1611,7 @@ outtinfo:
 outbuf:
 	kfree(buf);
 outsocket:
-	sock_release(sock);
+	sk_release_kernel(sock->sk);
 out:
 	return result;
 }
@@ -1610,6 +1620,7 @@ out:
 int stop_sync_thread(struct net *net, int state)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
+	int retc = -EINVAL;

 	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));

@@ -1629,7 +1640,7 @@ int stop_sync_thread(struct net *net, int state)
 		spin_lock_bh(&ipvs->sync_lock);
 		ipvs->sync_state &= ~IP_VS_STATE_MASTER;
 		spin_unlock_bh(&ipvs->sync_lock);
-		kthread_stop(ipvs->master_thread);
+		retc = kthread_stop(ipvs->master_thread);
 		ipvs->master_thread = NULL;
 	} else if (state == IP_VS_STATE_BACKUP) {
 		if (!ipvs->backup_thread)
@@ -1639,16 +1650,14 @@ int stop_sync_thread(struct net *net, int state)
 			task_pid_nr(ipvs->backup_thread));

 		ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
-		kthread_stop(ipvs->backup_thread);
+		retc = kthread_stop(ipvs->backup_thread);
 		ipvs->backup_thread = NULL;
-	} else {
-		return -EINVAL;
 	}

 	/* decrease the module use count */
 	ip_vs_use_count_dec();

-	return 0;
+	return retc;
 }

 /*
@@ -1670,8 +1679,15 @@ static int __net_init __ip_vs_sync_init(struct net *net)

 static void __ip_vs_sync_cleanup(struct net *net)
 {
-	stop_sync_thread(net, IP_VS_STATE_MASTER);
-	stop_sync_thread(net, IP_VS_STATE_BACKUP);
+	int retc;
+
+	retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
+	if (retc && retc != -EINVAL)
+		pr_err("Failed to stop Master Daemon\n");
+
+	retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
+	if (retc && retc != -EINVAL)
+		pr_err("Failed to stop Backup Daemon\n");
 }

 static struct pernet_operations ipvs_sync_ops = {
@@ -1682,10 +1698,10 @@ static struct pernet_operations ipvs_sync_ops = {

 int __init ip_vs_sync_init(void)
 {
-	return register_pernet_subsys(&ipvs_sync_ops);
+	return register_pernet_device(&ipvs_sync_ops);
 }

 void ip_vs_sync_cleanup(void)
 {
-	unregister_pernet_subsys(&ipvs_sync_ops);
+	unregister_pernet_device(&ipvs_sync_ops);
 }
--
1.7.2.3


^ permalink raw reply related

* [v2 PATCH 0/6] IPVS: init and cleanup.
From: Hans Schillstrom @ 2011-04-20 18:33 UTC (permalink / raw)
  To: horms, ja, ebiederm, lvs-devel, netdev, netfilter-devel
  Cc: hans.schillstrom, Hans Schillstrom

This patch series handles exit from a network name space.

REVISION

This is version 2

OVERVIEW
Basically there was three faults in the netns implementation.
- Kernel threads hold devices and preventing an exit.
- dst cache holds references to devices.
- Services was not always released.

Patch 1 & 3 contains the functionality
      4 renames funcctions
      5 removes empty functions
      6 Debuging.

IMPLEMENTATION
- Avoid to increment the usage counter for kernel threads.
  this is done in the first patch.
- Patch 3 tries to restore the cleanup order.
  Add NETDEV_UNREGISTER notification for dst_reset

>From Julian -
"For __ip_vs_service_cleanup: it still has to use mutex.
Or we can avoid it by introducing ip_vs_unlink_service_nolock:
ip_vs_flush will look like your __ip_vs_service_cleanup and
will call ip_vs_unlink_service_nolock. ip_vs_unlink_service_nolock
will be called by ip_vs_flush and by ip_vs_unlink_service."

I will give above another try later on see if I can get it to work.
Right now ip_vs_service_net_cleanup() seems to work.


An netns exit could look like this
IPVS: Enter: __ip_vs_dev_cleanup, net/netfilter/ipvs/ip_vs_core.c
IPVS: stopping master sync thread 1286 ...
IPVS: stopping backup sync thread 1294 ...
IPVS: Leave: __ip_vs_dev_cleanup, net/netfilter/ipvs/ip_vs_core.c
IPVS: Enter: ip_vs_dst_event, net/netfilter/ipvs/ip_vs_ctl.c line
IPVS: Leave: ip_vs_dst_event, net/netfilter/ipvs/ip_vs_ctl.c line
...
IPVS: Enter: ip_vs_dst_event, net/netfilter/ipvs/ip_vs_ctl.c line
IPVS: Leave: ip_vs_dst_event, net/netfilter/ipvs/ip_vs_ctl.c line
IPVS: Enter: ip_vs_service_net_cleanup, net/netfilter/ipvs/ip_vs_ctl.c
IPVS: __ip_vs_del_service: enter
IPVS: Moving dest 192.168.1.6:0 into trash, dest->refcnt=43450
...
IPVS: Moving dest 192.168.1.3:0 into trash, dest->refcnt=43449
IPVS: __ip_vs_del_service: enter
IPVS: Removing destination 0/[2003:0000:0000:0000:0000:0002:0000:0006]:80
...
IPVS: Removing destination 0/[2003:0000:0000:0000:0000:0002:0000:0003]:80
IPVS: Removing service 0/[2003:0000:0000:0000:0000:0002:0004:0100]:80 usecnt=0
IPVS: Leave: ip_vs_service_net_cleanup, net/netfilter/ipvs/ip_vs_ctl.c
IPVS: Enter: ip_vs_control_net_cleanup, net/netfilter/ipvs/ip_vs_ctl.c
IPVS: Removing service 80/0.0.0.0:0 usecnt=0
IPVS: Leave: ip_vs_control_net_cleanup, net/netfilter/ipvs/ip_vs_ctl.c
IPVS: ipvs netns 8 released

PATCH SET
This patch set is based upon net-next-2.6 (2.6.39-rc2)

SUMMARY

 include/net/ip_vs.h              |   23 ++++--
 net/netfilter/ipvs/ip_vs_app.c   |   23 +-----
 net/netfilter/ipvs/ip_vs_conn.c  |   14 +---
 net/netfilter/ipvs/ip_vs_core.c  |  118 +++++++++++++++++++--------
 net/netfilter/ipvs/ip_vs_ctl.c   |  168 +++++++++++++++++++++++++++++++-------
 net/netfilter/ipvs/ip_vs_est.c   |   21 +----
 net/netfilter/ipvs/ip_vs_proto.c |   11 +--
 net/netfilter/ipvs/ip_vs_sync.c  |   70 ++++++++--------
 8 files changed, 281 insertions(+), 167 deletions(-)


^ permalink raw reply

* [PATCH 6/6] IPVS: add debug functions
From: Hans Schillstrom @ 2011-04-20 18:33 UTC (permalink / raw)
  To: horms, ja, ebiederm, lvs-devel, netdev, netfilter-devel
  Cc: hans.schillstrom, Hans Schillstrom
In-Reply-To: <1303324434-14024-1-git-send-email-hans@schillstrom.com>

Optional patch, but it is nice to have.

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
---
 net/netfilter/ipvs/ip_vs_ctl.c |   13 ++++++++++++-
 1 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 34f05db..3b6c411 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -351,8 +351,11 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 	svc->flags &= ~IP_VS_SVC_F_HASHED;
 	atomic_dec(&svc->refcnt);
 	/* No more services then no need for input */
-	if (atomic_read(&svc->refcnt) == 0)
+	if (!atomic_read(&svc->refcnt)) {
 		atomic_set(&net_ipvs(svc->net)->enable, 0);
+		IP_VS_DBG(2, "Last service removed in net(%d) input disabled\n",
+			     net_ipvs(svc->net)->gen);
+	}
 	return 1;
 }

@@ -1222,6 +1225,10 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	write_unlock_bh(&__ip_vs_svc_lock);

 	*svc_p = svc;
+	if (atomic_read(&ipvs->enable) == 0)
+		pr_info("netns(%d) enabled first service added\n",
+			 ipvs->gen);
+
 	/* Now there is a service - full throttle */
 	atomic_set(&ipvs->enable, 1);
 	return 0;
@@ -3695,6 +3702,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
 	int idx;
 	struct netns_ipvs *ipvs = net_ipvs(net);

+	EnterFunction(2);
 	ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);

 	/* Initialize rs_table */
@@ -3721,6 +3729,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
 	if (ip_vs_control_net_init_sysctl(net))
 		goto err;

+	LeaveFunction(2);
 	return 0;

 err:
@@ -3732,6 +3741,7 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

+	EnterFunction(2);
 	ip_vs_trash_cleanup(net);
 	ip_vs_stop_estimator(net, &ipvs->tot_stats);
 	ip_vs_control_net_cleanup_sysctl(net);
@@ -3739,6 +3749,7 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
 	proc_net_remove(net, "ip_vs_stats");
 	proc_net_remove(net, "ip_vs");
 	free_percpu(ipvs->tot_stats.cpustats);
+	LeaveFunction(2);
 }

 int __init ip_vs_control_init(void)
--
1.7.2.3


^ permalink raw reply related

* [PATCH 5/6] IPVS: remove unused init and cleanup functions.
From: Hans Schillstrom @ 2011-04-20 18:33 UTC (permalink / raw)
  To: horms, ja, ebiederm, lvs-devel, netdev, netfilter-devel
  Cc: hans.schillstrom, Hans Schillstrom
In-Reply-To: <1303324434-14024-1-git-send-email-hans@schillstrom.com>

After restructuring, there is some unused or empty functions
left to be removed.

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
---
 include/net/ip_vs.h             |    6 ------
 net/netfilter/ipvs/ip_vs_app.c  |   10 ----------
 net/netfilter/ipvs/ip_vs_core.c |   29 ++++-------------------------
 net/netfilter/ipvs/ip_vs_est.c  |    9 ---------
 net/netfilter/ipvs/ip_vs_sync.c |    9 ---------
 5 files changed, 4 insertions(+), 59 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index f7a0fdb..4d56af7 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1123,8 +1123,6 @@ extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
 
 extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb);
 extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb);
-extern int ip_vs_app_init(void);
-extern void ip_vs_app_cleanup(void);
 
 void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe);
 void ip_vs_unbind_pe(struct ip_vs_service *svc);
@@ -1227,15 +1225,11 @@ extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
 			     __u8 syncid);
 extern int stop_sync_thread(struct net *net, int state);
 extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
-extern int ip_vs_sync_init(void);
-extern void ip_vs_sync_cleanup(void);
 
 
 /*
  *      IPVS rate estimator prototypes (from ip_vs_est.c)
  */
-extern int ip_vs_estimator_init(void);
-extern void ip_vs_estimator_cleanup(void);
 extern void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
 extern void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
 extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 81b299d..5f34cf4 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -589,13 +589,3 @@ void __net_exit ip_vs_app_net_cleanup(struct net *net)
 {
 	proc_net_remove(net, "ip_vs_app");
 }
-
-int __init ip_vs_app_init(void)
-{
-	return 0;
-}
-
-
-void ip_vs_app_cleanup(void)
-{
-}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 00da1e5..48eb587 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1973,36 +1973,23 @@ static int __init ip_vs_init(void)
 {
 	int ret;
 
-	ip_vs_estimator_init();
 	ret = ip_vs_control_init();
 	if (ret < 0) {
 		pr_err("can't setup control.\n");
-		goto cleanup_estimator;
+		goto exit;
 	}
 
 	ip_vs_protocol_init();
 
-	ret = ip_vs_app_init();
-	if (ret < 0) {
-		pr_err("can't setup application helper.\n");
-		goto cleanup_protocol;
-	}
-
 	ret = ip_vs_conn_init();
 	if (ret < 0) {
 		pr_err("can't setup connection table.\n");
-		goto cleanup_app;
-	}
-
-	ret = ip_vs_sync_init();
-	if (ret < 0) {
-		pr_err("can't setup sync data.\n");
-		goto cleanup_conn;
+		goto cleanup_protocol;
 	}
 
 	ret = register_pernet_subsys(&ipvs_core_ops);	/* Alloc ip_vs struct */
 	if (ret < 0)
-		goto cleanup_sync;
+		goto cleanup_conn;
 
 	ret = register_pernet_device(&ipvs_core_dev_ops);
 	if (ret < 0)
@@ -2022,17 +2009,12 @@ cleanup_dev:
 	unregister_pernet_device(&ipvs_core_dev_ops);
 cleanup_sub:
 	unregister_pernet_subsys(&ipvs_core_ops);
-cleanup_sync:
-	ip_vs_sync_cleanup();
 cleanup_conn:
 	ip_vs_conn_cleanup();
-cleanup_app:
-	ip_vs_app_cleanup();
 cleanup_protocol:
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
-cleanup_estimator:
-	ip_vs_estimator_cleanup();
+exit:
 	return ret;
 }
 
@@ -2041,12 +2023,9 @@ static void __exit ip_vs_cleanup(void)
 	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	unregister_pernet_device(&ipvs_core_dev_ops);
 	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
-	ip_vs_sync_cleanup();
 	ip_vs_conn_cleanup();
-	ip_vs_app_cleanup();
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
-	ip_vs_estimator_cleanup();
 	pr_info("ipvs unloaded.\n");
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index f5d2a01..0fac601 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -207,12 +207,3 @@ void __net_exit ip_vs_estimator_net_cleanup(struct net *net)
 {
 	del_timer_sync(&net_ipvs(net)->est_timer);
 }
-
-int __init ip_vs_estimator_init(void)
-{
-	return 0;
-}
-
-void ip_vs_estimator_cleanup(void)
-{
-}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 1036b34..ef09afb 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1689,12 +1689,3 @@ void ip_vs_sync_net_cleanup(struct net *net)
 	if (retc && retc != -EINVAL)
 		pr_err("Failed to stop Backup Daemon\n");
 }
-
-int __init ip_vs_sync_init(void)
-{
-	return 0;
-}
-
-void ip_vs_sync_cleanup(void)
-{
-}
-- 
1.7.2.3


^ permalink raw reply related

* [PATCH 4/6] IPVS: rename of netns init and cleanup functions.
From: Hans Schillstrom @ 2011-04-20 18:33 UTC (permalink / raw)
  To: horms, ja, ebiederm, lvs-devel, netdev, netfilter-devel
  Cc: hans.schillstrom, Hans Schillstrom
In-Reply-To: <1303324434-14024-1-git-send-email-hans@schillstrom.com>

Make it more clear what the functions does,
on request by Julian.

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
---
 include/net/ip_vs.h              |   26 +++++++++++++-------------
 net/netfilter/ipvs/ip_vs_app.c   |    4 ++--
 net/netfilter/ipvs/ip_vs_conn.c  |    4 ++--
 net/netfilter/ipvs/ip_vs_core.c  |   36 ++++++++++++++++++------------------
 net/netfilter/ipvs/ip_vs_ctl.c   |   20 ++++++++++----------
 net/netfilter/ipvs/ip_vs_est.c   |    4 ++--
 net/netfilter/ipvs/ip_vs_proto.c |    4 ++--
 net/netfilter/ipvs/ip_vs_sync.c  |    4 ++--
 8 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 83fa9e8..f7a0fdb 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1093,19 +1093,19 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
 /*
  * IPVS netns init & cleanup functions
  */
-extern int __ip_vs_estimator_init(struct net *net);
-extern int __ip_vs_control_init(struct net *net);
-extern int __ip_vs_protocol_init(struct net *net);
-extern int __ip_vs_app_init(struct net *net);
-extern int __ip_vs_conn_init(struct net *net);
-extern int __ip_vs_sync_init(struct net *net);
-extern void __ip_vs_conn_cleanup(struct net *net);
-extern void __ip_vs_app_cleanup(struct net *net);
-extern void __ip_vs_protocol_cleanup(struct net *net);
-extern void __ip_vs_control_cleanup(struct net *net);
-extern void __ip_vs_estimator_cleanup(struct net *net);
-extern void __ip_vs_sync_cleanup(struct net *net);
-extern void __ip_vs_service_cleanup(struct net *net);
+extern int ip_vs_estimator_net_init(struct net *net);
+extern int ip_vs_control_net_init(struct net *net);
+extern int ip_vs_protocol_net_init(struct net *net);
+extern int ip_vs_app_net_init(struct net *net);
+extern int ip_vs_conn_net_init(struct net *net);
+extern int ip_vs_sync_net_init(struct net *net);
+extern void ip_vs_conn_net_cleanup(struct net *net);
+extern void ip_vs_app_net_cleanup(struct net *net);
+extern void ip_vs_protocol_net_cleanup(struct net *net);
+extern void ip_vs_control_net_cleanup(struct net *net);
+extern void ip_vs_estimator_net_cleanup(struct net *net);
+extern void ip_vs_sync_net_cleanup(struct net *net);
+extern void ip_vs_service_net_cleanup(struct net *net);

 /*
  *      IPVS application functions
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 51f3af7..81b299d 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -576,7 +576,7 @@ static const struct file_operations ip_vs_app_fops = {
 };
 #endif

-int __net_init __ip_vs_app_init(struct net *net)
+int __net_init ip_vs_app_net_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -585,7 +585,7 @@ int __net_init __ip_vs_app_init(struct net *net)
 	return 0;
 }

-void __net_exit __ip_vs_app_cleanup(struct net *net)
+void __net_exit ip_vs_app_net_cleanup(struct net *net)
 {
 	proc_net_remove(net, "ip_vs_app");
 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d3fd91b..b1e7a0c 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1247,7 +1247,7 @@ flush_again:
 /*
  * per netns init and exit
  */
-int __net_init __ip_vs_conn_init(struct net *net)
+int __net_init ip_vs_conn_net_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -1258,7 +1258,7 @@ int __net_init __ip_vs_conn_init(struct net *net)
 	return 0;
 }

-void __net_exit __ip_vs_conn_cleanup(struct net *net)
+void __net_exit ip_vs_conn_net_cleanup(struct net *net)
 {
 	/* flush all the connection entries first */
 	ip_vs_conn_flush(net);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 77377a8..00da1e5 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1897,22 +1897,22 @@ static int __net_init __ip_vs_init(struct net *net)
 	atomic_inc(&ipvs_netns_cnt);
 	net->ipvs = ipvs;

-	if (__ip_vs_estimator_init(net) < 0)
+	if (ip_vs_estimator_net_init(net) < 0)
 		goto estimator_fail;

-	if (__ip_vs_control_init(net) < 0)
+	if (ip_vs_control_net_init(net) < 0)
 		goto control_fail;

-	if (__ip_vs_protocol_init(net) < 0)
+	if (ip_vs_protocol_net_init(net) < 0)
 		goto protocol_fail;

-	if (__ip_vs_app_init(net) < 0)
+	if (ip_vs_app_net_init(net) < 0)
 		goto app_fail;

-	if (__ip_vs_conn_init(net) < 0)
+	if (ip_vs_conn_net_init(net) < 0)
 		goto conn_fail;

-	if (__ip_vs_sync_init(net) < 0)
+	if (ip_vs_sync_net_init(net) < 0)
 		goto sync_fail;

 	printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
@@ -1923,27 +1923,27 @@ static int __net_init __ip_vs_init(struct net *net)
  */

 sync_fail:
-	__ip_vs_conn_cleanup(net);
+	ip_vs_conn_net_cleanup(net);
 conn_fail:
-	__ip_vs_app_cleanup(net);
+	ip_vs_app_net_cleanup(net);
 app_fail:
-	__ip_vs_protocol_cleanup(net);
+	ip_vs_protocol_net_cleanup(net);
 protocol_fail:
-	__ip_vs_control_cleanup(net);
+	ip_vs_control_net_cleanup(net);
 control_fail:
-	__ip_vs_estimator_cleanup(net);
+	ip_vs_estimator_net_cleanup(net);
 estimator_fail:
 	return -ENOMEM;
 }

 static void __net_exit __ip_vs_cleanup(struct net *net)
 {
-	__ip_vs_service_cleanup(net);	/* ip_vs_flush() with locks */
-	__ip_vs_conn_cleanup(net);
-	__ip_vs_app_cleanup(net);
-	__ip_vs_protocol_cleanup(net);
-	__ip_vs_control_cleanup(net);
-	__ip_vs_estimator_cleanup(net);
+	ip_vs_service_net_cleanup(net);	/* ip_vs_flush() with locks */
+	ip_vs_conn_net_cleanup(net);
+	ip_vs_app_net_cleanup(net);
+	ip_vs_protocol_net_cleanup(net);
+	ip_vs_control_net_cleanup(net);
+	ip_vs_estimator_net_cleanup(net);
 	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
 }

@@ -1951,7 +1951,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net)
 {
 	EnterFunction(2);
 	atomic_set(&net_ipvs(net)->enable, 0); /* Disable packet reception */
-	__ip_vs_sync_cleanup(net);
+	ip_vs_sync_net_cleanup(net);
 	LeaveFunction(2);
 }

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 70e7a5e..34f05db 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1486,7 +1486,7 @@ static int ip_vs_flush(struct net *net)
  *	Delete service by {netns} in the service table.
  *	Called by __ip_vs_cleanup()
  */
-void __ip_vs_service_cleanup(struct net *net)
+void ip_vs_service_net_cleanup(struct net *net)
 {
 	unsigned hash;
 	struct ip_vs_service *svc, *tmp;
@@ -1675,7 +1675,7 @@ proc_do_sync_mode(ctl_table *table, int write,
 /*
  *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
  *	Do not change order or insert new entries without
- *	align with netns init in __ip_vs_control_init()
+ *	align with netns init in ip_vs_control_net_init()
  */

 static struct ctl_table vs_vars[] = {
@@ -3611,7 +3611,7 @@ static void ip_vs_genl_unregister(void)
  * per netns intit/exit func.
  */
 #ifdef CONFIG_SYSCTL
-int __net_init __ip_vs_control_init_sysctl(struct net *net)
+int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 {
 	int idx;
 	struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3670,7 +3670,7 @@ int __net_init __ip_vs_control_init_sysctl(struct net *net)
 	return 0;
 }

-void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
+void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -3681,8 +3681,8 @@ void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)

 #else

-int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
-void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
+int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
+void __net_init ip_vs_control_net_cleanup_sysctl(struct net *net) { }

 #endif

@@ -3690,7 +3690,7 @@ static struct notifier_block ip_vs_dst_notifier = {
 	.notifier_call = ip_vs_dst_event,
 };

-int __net_init __ip_vs_control_init(struct net *net)
+int __net_init ip_vs_control_net_init(struct net *net)
 {
 	int idx;
 	struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3718,7 +3718,7 @@ int __net_init __ip_vs_control_init(struct net *net)
 	proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
 			     &ip_vs_stats_percpu_fops);

-	if (__ip_vs_control_init_sysctl(net))
+	if (ip_vs_control_net_init_sysctl(net))
 		goto err;

 	return 0;
@@ -3728,13 +3728,13 @@ err:
 	return -ENOMEM;
 }

-void __net_exit __ip_vs_control_cleanup(struct net *net)
+void __net_exit ip_vs_control_net_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

 	ip_vs_trash_cleanup(net);
 	ip_vs_stop_estimator(net, &ipvs->tot_stats);
-	__ip_vs_control_cleanup_sysctl(net);
+	ip_vs_control_net_cleanup_sysctl(net);
 	proc_net_remove(net, "ip_vs_stats_percpu");
 	proc_net_remove(net, "ip_vs_stats");
 	proc_net_remove(net, "ip_vs");
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 508cce9..f5d2a01 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
 	dst->outbps = (e->outbps + 0xF) >> 5;
 }

-int __net_init __ip_vs_estimator_init(struct net *net)
+int __net_init ip_vs_estimator_net_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -203,7 +203,7 @@ int __net_init __ip_vs_estimator_init(struct net *net)
 	return 0;
 }

-void __net_exit __ip_vs_estimator_cleanup(struct net *net)
+void __net_exit ip_vs_estimator_net_cleanup(struct net *net)
 {
 	del_timer_sync(&net_ipvs(net)->est_timer);
 }
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index eb86028..52d073c 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
 /*
  * per network name-space init
  */
-int __net_init __ip_vs_protocol_init(struct net *net)
+int __net_init ip_vs_protocol_net_init(struct net *net)
 {
 #ifdef CONFIG_IP_VS_PROTO_TCP
 	register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
@@ -336,7 +336,7 @@ int __net_init __ip_vs_protocol_init(struct net *net)
 	return 0;
 }

-void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+void __net_exit ip_vs_protocol_net_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_proto_data *pd;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index f31ef18..1036b34 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1663,7 +1663,7 @@ int stop_sync_thread(struct net *net, int state)
 /*
  * Initialize data struct for each netns
  */
-int __net_init __ip_vs_sync_init(struct net *net)
+int __net_init ip_vs_sync_net_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -1677,7 +1677,7 @@ int __net_init __ip_vs_sync_init(struct net *net)
 	return 0;
 }

-void __ip_vs_sync_cleanup(struct net *net)
+void ip_vs_sync_net_cleanup(struct net *net)
 {
 	int retc;

--
1.7.2.3


^ permalink raw reply related

* [PATCH 3/6] IPVS: init and cleanup restructuring.
From: Hans Schillstrom @ 2011-04-20 18:33 UTC (permalink / raw)
  To: horms, ja, ebiederm, lvs-devel, netdev, netfilter-devel
  Cc: hans.schillstrom, Hans Schillstrom
In-Reply-To: <1303324434-14024-1-git-send-email-hans@schillstrom.com>

REVISION 2

DESCRIPTION
This patch tries to restore the initial init and cleanup
sequences that was before namspace patch.

The number of calls to register_pernet_device have been
reduced to one for the ip_vs.ko
Schedulers still have their own calls.

This patch adds a function __ip_vs_service_cleanup()
and a throttle or actually on/off switch for
the netfilter hooks.

The nf hooks will be enabled when the first service is loaded
and disabled when the last service is removed or when a
namespace exit starts.

CHANGES R2
	receivening av device events, suggested by Julian.
	This change add 3 new functions ip_vs_dst_event()
	ip_vs_svc_reset() and __ip_vs_dev_reset().

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
---
 include/net/ip_vs.h              |   17 +++++
 net/netfilter/ipvs/ip_vs_app.c   |   15 +----
 net/netfilter/ipvs/ip_vs_conn.c  |   12 +---
 net/netfilter/ipvs/ip_vs_core.c  |   87 ++++++++++++++++++++++---
 net/netfilter/ipvs/ip_vs_ctl.c   |  133 ++++++++++++++++++++++++++++++++------
 net/netfilter/ipvs/ip_vs_est.c   |   14 +---
 net/netfilter/ipvs/ip_vs_proto.c |   11 +---
 net/netfilter/ipvs/ip_vs_sync.c  |   13 +---
 8 files changed, 222 insertions(+), 80 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index d516f00..83fa9e8 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -830,6 +830,7 @@ struct netns_ipvs {
 	struct list_head	sctp_apps[SCTP_APP_TAB_SIZE];
 	spinlock_t		sctp_app_lock;
 #endif
+	atomic_t		enable;          /* enable like nf_hooks do */
 	/* ip_vs_conn */
 	atomic_t		conn_count;      /*  connection counter */

@@ -1089,6 +1090,22 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
 	atomic_inc(&ctl_cp->n_control);
 }

+/*
+ * IPVS netns init & cleanup functions
+ */
+extern int __ip_vs_estimator_init(struct net *net);
+extern int __ip_vs_control_init(struct net *net);
+extern int __ip_vs_protocol_init(struct net *net);
+extern int __ip_vs_app_init(struct net *net);
+extern int __ip_vs_conn_init(struct net *net);
+extern int __ip_vs_sync_init(struct net *net);
+extern void __ip_vs_conn_cleanup(struct net *net);
+extern void __ip_vs_app_cleanup(struct net *net);
+extern void __ip_vs_protocol_cleanup(struct net *net);
+extern void __ip_vs_control_cleanup(struct net *net);
+extern void __ip_vs_estimator_cleanup(struct net *net);
+extern void __ip_vs_sync_cleanup(struct net *net);
+extern void __ip_vs_service_cleanup(struct net *net);

 /*
  *      IPVS application functions
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 2dc6de1..51f3af7 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -576,7 +576,7 @@ static const struct file_operations ip_vs_app_fops = {
 };
 #endif

-static int __net_init __ip_vs_app_init(struct net *net)
+int __net_init __ip_vs_app_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -585,26 +585,17 @@ static int __net_init __ip_vs_app_init(struct net *net)
 	return 0;
 }

-static void __net_exit __ip_vs_app_cleanup(struct net *net)
+void __net_exit __ip_vs_app_cleanup(struct net *net)
 {
 	proc_net_remove(net, "ip_vs_app");
 }

-static struct pernet_operations ip_vs_app_ops = {
-	.init = __ip_vs_app_init,
-	.exit = __ip_vs_app_cleanup,
-};
-
 int __init ip_vs_app_init(void)
 {
-	int rv;
-
-	rv = register_pernet_subsys(&ip_vs_app_ops);
-	return rv;
+	return 0;
 }


 void ip_vs_app_cleanup(void)
 {
-	unregister_pernet_subsys(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index c97bd45..d3fd91b 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1258,22 +1258,17 @@ int __net_init __ip_vs_conn_init(struct net *net)
 	return 0;
 }

-static void __net_exit __ip_vs_conn_cleanup(struct net *net)
+void __net_exit __ip_vs_conn_cleanup(struct net *net)
 {
 	/* flush all the connection entries first */
 	ip_vs_conn_flush(net);
 	proc_net_remove(net, "ip_vs_conn");
 	proc_net_remove(net, "ip_vs_conn_sync");
 }
-static struct pernet_operations ipvs_conn_ops = {
-	.init = __ip_vs_conn_init,
-	.exit = __ip_vs_conn_cleanup,
-};

 int __init ip_vs_conn_init(void)
 {
 	int idx;
-	int retc;

 	/* Compute size and mask */
 	ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
@@ -1309,17 +1304,14 @@ int __init ip_vs_conn_init(void)
 		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
 	}

-	retc = register_pernet_subsys(&ipvs_conn_ops);
-
 	/* calculate the random value for connection hash */
 	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));

-	return retc;
+	return 0;
 }

 void ip_vs_conn_cleanup(void)
 {
-	unregister_pernet_subsys(&ipvs_conn_ops);
 	/* Release the empty cache */
 	kmem_cache_destroy(ip_vs_conn_cachep);
 	vfree(ip_vs_conn_tab);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d536b51..77377a8 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1343,6 +1343,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 		return NF_ACCEPT; /* The packet looks wrong, ignore */

 	net = skb_net(skb);
+	/* ipvs enabled in this Network Name Space ? */
+	if (!atomic_read(&net_ipvs(net)->enable))
+		return NF_ACCEPT;
+
 	pd = ip_vs_proto_data_get(net, cih->protocol);
 	if (!pd)
 		return NF_ACCEPT;
@@ -1531,6 +1535,9 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);

+	net = skb_net(skb);
+	if (!atomic_read(&net_ipvs(net)->enable))
+		return NF_ACCEPT;
 	/* Bad... Do not break raw sockets */
 	if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
 		     af == AF_INET)) {
@@ -1562,7 +1569,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 			ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 		}

-	net = skb_net(skb);
 	/* Protocol supported? */
 	pd = ip_vs_proto_data_get(net, iph.protocol);
 	if (unlikely(!pd))
@@ -1588,7 +1594,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	}

 	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-	net = skb_net(skb);
 	ipvs = net_ipvs(net);
 	/* Check the server status */
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@@ -1884,21 +1889,72 @@ static int __net_init __ip_vs_init(struct net *net)
 		pr_err("%s(): no memory.\n", __func__);
 		return -ENOMEM;
 	}
+	/* Hold the beast until a service is registerd */
+	atomic_set(&ipvs->enable, 0);
 	ipvs->net = net;
 	/* Counters used for creating unique names */
 	ipvs->gen = atomic_read(&ipvs_netns_cnt);
 	atomic_inc(&ipvs_netns_cnt);
 	net->ipvs = ipvs;
+
+	if (__ip_vs_estimator_init(net) < 0)
+		goto estimator_fail;
+
+	if (__ip_vs_control_init(net) < 0)
+		goto control_fail;
+
+	if (__ip_vs_protocol_init(net) < 0)
+		goto protocol_fail;
+
+	if (__ip_vs_app_init(net) < 0)
+		goto app_fail;
+
+	if (__ip_vs_conn_init(net) < 0)
+		goto conn_fail;
+
+	if (__ip_vs_sync_init(net) < 0)
+		goto sync_fail;
+
 	printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
 			 sizeof(struct netns_ipvs), ipvs->gen);
 	return 0;
+/*
+ * Error handling
+ */
+
+sync_fail:
+	__ip_vs_conn_cleanup(net);
+conn_fail:
+	__ip_vs_app_cleanup(net);
+app_fail:
+	__ip_vs_protocol_cleanup(net);
+protocol_fail:
+	__ip_vs_control_cleanup(net);
+control_fail:
+	__ip_vs_estimator_cleanup(net);
+estimator_fail:
+	return -ENOMEM;
 }

 static void __net_exit __ip_vs_cleanup(struct net *net)
 {
+	__ip_vs_service_cleanup(net);	/* ip_vs_flush() with locks */
+	__ip_vs_conn_cleanup(net);
+	__ip_vs_app_cleanup(net);
+	__ip_vs_protocol_cleanup(net);
+	__ip_vs_control_cleanup(net);
+	__ip_vs_estimator_cleanup(net);
 	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
 }

+static void __net_exit __ip_vs_dev_cleanup(struct net *net)
+{
+	EnterFunction(2);
+	atomic_set(&net_ipvs(net)->enable, 0); /* Disable packet reception */
+	__ip_vs_sync_cleanup(net);
+	LeaveFunction(2);
+}
+
 static struct pernet_operations ipvs_core_ops = {
 	.init = __ip_vs_init,
 	.exit = __ip_vs_cleanup,
@@ -1906,6 +1962,10 @@ static struct pernet_operations ipvs_core_ops = {
 	.size = sizeof(struct netns_ipvs),
 };

+static struct pernet_operations ipvs_core_dev_ops = {
+	.exit = __ip_vs_dev_cleanup,
+};
+
 /*
  *	Initialize IP Virtual Server
  */
@@ -1913,10 +1973,6 @@ static int __init ip_vs_init(void)
 {
 	int ret;

-	ret = register_pernet_subsys(&ipvs_core_ops);	/* Alloc ip_vs struct */
-	if (ret < 0)
-		return ret;
-
 	ip_vs_estimator_init();
 	ret = ip_vs_control_init();
 	if (ret < 0) {
@@ -1944,15 +2000,28 @@ static int __init ip_vs_init(void)
 		goto cleanup_conn;
 	}

+	ret = register_pernet_subsys(&ipvs_core_ops);	/* Alloc ip_vs struct */
+	if (ret < 0)
+		goto cleanup_sync;
+
+	ret = register_pernet_device(&ipvs_core_dev_ops);
+	if (ret < 0)
+		goto cleanup_sub;
+
 	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	if (ret < 0) {
 		pr_err("can't register hooks.\n");
-		goto cleanup_sync;
+		goto cleanup_dev;
 	}

 	pr_info("ipvs loaded.\n");
+
 	return ret;

+cleanup_dev:
+	unregister_pernet_device(&ipvs_core_dev_ops);
+cleanup_sub:
+	unregister_pernet_subsys(&ipvs_core_ops);
 cleanup_sync:
 	ip_vs_sync_cleanup();
 cleanup_conn:
@@ -1964,20 +2033,20 @@ cleanup_protocol:
 	ip_vs_control_cleanup();
 cleanup_estimator:
 	ip_vs_estimator_cleanup();
-	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	return ret;
 }

 static void __exit ip_vs_cleanup(void)
 {
 	nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
+	unregister_pernet_device(&ipvs_core_dev_ops);
+	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	ip_vs_sync_cleanup();
 	ip_vs_conn_cleanup();
 	ip_vs_app_cleanup();
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
 	ip_vs_estimator_cleanup();
-	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	pr_info("ipvs unloaded.\n");
 }

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a31a70c..70e7a5e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -69,6 +69,11 @@ int ip_vs_get_debug_level(void)
 }
 #endif

+
+/*  Protos */
+static void __ip_vs_del_service(struct ip_vs_service *svc);
+
+
 #ifdef CONFIG_IP_VS_IPV6
 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
 static int __ip_vs_addr_is_local_v6(struct net *net,
@@ -345,6 +350,9 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)

 	svc->flags &= ~IP_VS_SVC_F_HASHED;
 	atomic_dec(&svc->refcnt);
+	/* No more services then no need for input */
+	if (atomic_read(&svc->refcnt) == 0)
+		atomic_set(&net_ipvs(svc->net)->enable, 0);
 	return 1;
 }

@@ -1214,6 +1222,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	write_unlock_bh(&__ip_vs_svc_lock);

 	*svc_p = svc;
+	/* Now there is a service - full throttle */
+	atomic_set(&ipvs->enable, 1);
 	return 0;


@@ -1472,7 +1482,95 @@ static int ip_vs_flush(struct net *net)
 	return 0;
 }

+/*
+ *	Delete service by {netns} in the service table.
+ *	Called by __ip_vs_cleanup()
+ */
+void __ip_vs_service_cleanup(struct net *net)
+{
+	unsigned hash;
+	struct ip_vs_service *svc, *tmp;
+
+	EnterFunction(2);
+	/* Check for "full" addressed entries */
+	mutex_lock(&__ip_vs_mutex);
+	for (hash = 0; hash < IP_VS_SVC_TAB_SIZE; hash++) {
+		write_lock_bh(&__ip_vs_svc_lock);
+		list_for_each_entry_safe(svc, tmp, &ip_vs_svc_table[hash],
+					 s_list) {
+			if (net_eq(svc->net, net)) {
+				ip_vs_svc_unhash(svc);
+				/*  Wait until all the svc users go away. */
+				IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+				__ip_vs_del_service(svc);
+			}
+		}
+		list_for_each_entry_safe(svc, tmp, &ip_vs_svc_fwm_table[hash],
+					 f_list) {
+			if (net_eq(svc->net, net)) {
+				ip_vs_svc_unhash(svc);
+				/*  Wait until all the svc users go away. */
+				IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
+				__ip_vs_del_service(svc);
+			}
+		}
+		write_unlock_bh(&__ip_vs_svc_lock);
+	}
+	mutex_unlock(&__ip_vs_mutex);
+	LeaveFunction(2);
+}
+static inline void
+__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
+{
+	spin_lock_bh(&dest->dst_lock);
+	if (dest->dst_cache && dest->dst_cache->dev == dev)
+		ip_vs_dst_reset(dest);
+	spin_unlock_bh(&dest->dst_lock);
+}
+
+static void ip_vs_svc_reset(struct ip_vs_service *svc, struct net_device *dev)
+{
+	struct ip_vs_dest *dest;
+
+	list_for_each_entry(dest, &svc->destinations, n_list) {
+		__ip_vs_dev_reset(dest, dev);
+	}
+}

+static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct net *net = dev_net(dev);
+	struct ip_vs_service *svc;
+	struct ip_vs_dest *dest;
+	unsigned int idx;
+
+	if (event != NETDEV_UNREGISTER)
+		return NOTIFY_DONE;
+
+	EnterFunction(2);
+	mutex_lock(&__ip_vs_mutex);
+	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
+		write_lock_bh(&__ip_vs_svc_lock);
+		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
+			if (net_eq(svc->net, net))
+				ip_vs_svc_reset(svc, dev);
+		}
+		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
+			if (net_eq(svc->net, net))
+				ip_vs_svc_reset(svc, dev);
+		}
+		write_unlock_bh(&__ip_vs_svc_lock);
+	}
+
+	list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
+		__ip_vs_dev_reset(dest, dev);
+	}
+	mutex_unlock(&__ip_vs_mutex);
+	LeaveFunction(2);
+	return NOTIFY_DONE;
+}
 /*
  *	Zero counters in a service or all services
  */
@@ -3588,6 +3686,10 @@ void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }

 #endif

+static struct notifier_block ip_vs_dst_notifier = {
+	.notifier_call = ip_vs_dst_event,
+};
+
 int __net_init __ip_vs_control_init(struct net *net)
 {
 	int idx;
@@ -3626,7 +3728,7 @@ err:
 	return -ENOMEM;
 }

-static void __net_exit __ip_vs_control_cleanup(struct net *net)
+void __net_exit __ip_vs_control_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -3639,11 +3741,6 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
 	free_percpu(ipvs->tot_stats.cpustats);
 }

-static struct pernet_operations ipvs_control_ops = {
-	.init = __ip_vs_control_init,
-	.exit = __ip_vs_control_cleanup,
-};
-
 int __init ip_vs_control_init(void)
 {
 	int idx;
@@ -3657,33 +3754,32 @@ int __init ip_vs_control_init(void)
 		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
 	}

-	ret = register_pernet_subsys(&ipvs_control_ops);
-	if (ret) {
-		pr_err("cannot register namespace.\n");
-		goto err;
-	}
-
 	smp_wmb();	/* Do we really need it now ? */

 	ret = nf_register_sockopt(&ip_vs_sockopts);
 	if (ret) {
 		pr_err("cannot register sockopt.\n");
-		goto err_net;
+		goto err_sock;
 	}

 	ret = ip_vs_genl_register();
 	if (ret) {
 		pr_err("cannot register Generic Netlink interface.\n");
-		nf_unregister_sockopt(&ip_vs_sockopts);
-		goto err_net;
+		goto err_genl;
 	}

+	ret = register_netdevice_notifier(&ip_vs_dst_notifier);
+	if (ret < 0)
+		goto err_notf;
+
 	LeaveFunction(2);
 	return 0;

-err_net:
-	unregister_pernet_subsys(&ipvs_control_ops);
-err:
+err_notf:
+	ip_vs_genl_unregister();
+err_genl:
+	nf_unregister_sockopt(&ip_vs_sockopts);
+err_sock:
 	return ret;
 }

@@ -3691,7 +3787,6 @@ err:
 void ip_vs_control_cleanup(void)
 {
 	EnterFunction(2);
-	unregister_pernet_subsys(&ipvs_control_ops);
 	ip_vs_genl_unregister();
 	nf_unregister_sockopt(&ip_vs_sockopts);
 	LeaveFunction(2);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 8c8766c..508cce9 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
 	dst->outbps = (e->outbps + 0xF) >> 5;
 }

-static int __net_init __ip_vs_estimator_init(struct net *net)
+int __net_init __ip_vs_estimator_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -203,24 +203,16 @@ static int __net_init __ip_vs_estimator_init(struct net *net)
 	return 0;
 }

-static void __net_exit __ip_vs_estimator_exit(struct net *net)
+void __net_exit __ip_vs_estimator_cleanup(struct net *net)
 {
 	del_timer_sync(&net_ipvs(net)->est_timer);
 }
-static struct pernet_operations ip_vs_app_ops = {
-	.init = __ip_vs_estimator_init,
-	.exit = __ip_vs_estimator_exit,
-};

 int __init ip_vs_estimator_init(void)
 {
-	int rv;
-
-	rv = register_pernet_subsys(&ip_vs_app_ops);
-	return rv;
+	return 0;
 }

 void ip_vs_estimator_cleanup(void)
 {
-	unregister_pernet_subsys(&ip_vs_app_ops);
 }
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 17484a4..eb86028 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
 /*
  * per network name-space init
  */
-static int __net_init __ip_vs_protocol_init(struct net *net)
+int __net_init __ip_vs_protocol_init(struct net *net)
 {
 #ifdef CONFIG_IP_VS_PROTO_TCP
 	register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
@@ -336,7 +336,7 @@ static int __net_init __ip_vs_protocol_init(struct net *net)
 	return 0;
 }

-static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
+void __net_exit __ip_vs_protocol_cleanup(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_proto_data *pd;
@@ -349,11 +349,6 @@ static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
 	}
 }

-static struct pernet_operations ipvs_proto_ops = {
-	.init = __ip_vs_protocol_init,
-	.exit = __ip_vs_protocol_cleanup,
-};
-
 int __init ip_vs_protocol_init(void)
 {
 	char protocols[64];
@@ -382,7 +377,6 @@ int __init ip_vs_protocol_init(void)
 	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
 #endif
 	pr_info("Registered protocols (%s)\n", &protocols[2]);
-	return register_pernet_subsys(&ipvs_proto_ops);

 	return 0;
 }
@@ -393,7 +387,6 @@ void ip_vs_protocol_cleanup(void)
 	struct ip_vs_protocol *pp;
 	int i;

-	unregister_pernet_subsys(&ipvs_proto_ops);
 	/* unregister all the ipvs protocols */
 	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
 		while ((pp = ip_vs_proto_table[i]) != NULL)
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index c187823..f31ef18 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1663,7 +1663,7 @@ int stop_sync_thread(struct net *net, int state)
 /*
  * Initialize data struct for each netns
  */
-static int __net_init __ip_vs_sync_init(struct net *net)
+int __net_init __ip_vs_sync_init(struct net *net)
 {
 	struct netns_ipvs *ipvs = net_ipvs(net);

@@ -1677,7 +1677,7 @@ static int __net_init __ip_vs_sync_init(struct net *net)
 	return 0;
 }

-static void __ip_vs_sync_cleanup(struct net *net)
+void __ip_vs_sync_cleanup(struct net *net)
 {
 	int retc;

@@ -1690,18 +1690,11 @@ static void __ip_vs_sync_cleanup(struct net *net)
 		pr_err("Failed to stop Backup Daemon\n");
 }

-static struct pernet_operations ipvs_sync_ops = {
-	.init = __ip_vs_sync_init,
-	.exit = __ip_vs_sync_cleanup,
-};
-

^ permalink raw reply related

* [PATCH 2/6] IPVS: labels at pos 0
From: Hans Schillstrom @ 2011-04-20 18:33 UTC (permalink / raw)
  To: horms, ja, ebiederm, lvs-devel, netdev, netfilter-devel
  Cc: hans.schillstrom, Hans Schillstrom
In-Reply-To: <1303324434-14024-1-git-send-email-hans@schillstrom.com>

Put goto labels at the beginig of row
acording to coding style example.

Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
---
 net/netfilter/ipvs/ip_vs_core.c |   10 +++++-----
 net/netfilter/ipvs/ip_vs_ctl.c  |    8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index a0791dc..d536b51 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1388,7 +1388,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 		verdict = NF_DROP;
 	}
 
-  out:
+out:
 	__ip_vs_conn_put(cp);
 
 	return verdict;
@@ -1955,14 +1955,14 @@ static int __init ip_vs_init(void)
 
 cleanup_sync:
 	ip_vs_sync_cleanup();
-  cleanup_conn:
+cleanup_conn:
 	ip_vs_conn_cleanup();
-  cleanup_app:
+cleanup_app:
 	ip_vs_app_cleanup();
-  cleanup_protocol:
+cleanup_protocol:
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
-  cleanup_estimator:
+cleanup_estimator:
 	ip_vs_estimator_cleanup();
 	unregister_pernet_subsys(&ipvs_core_ops);	/* free ip_vs struct */
 	return ret;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ae47090..a31a70c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1327,9 +1327,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 		ip_vs_bind_pe(svc, pe);
 	}
 
-  out_unlock:
+out_unlock:
 	write_unlock_bh(&__ip_vs_svc_lock);
-  out:
+out:
 	ip_vs_scheduler_put(old_sched);
 	ip_vs_pe_put(old_pe);
 	return ret;
@@ -2387,7 +2387,7 @@ __ip_vs_get_service_entries(struct net *net,
 			count++;
 		}
 	}
-  out:
+out:
 	return ret;
 }
 
@@ -2625,7 +2625,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		ret = -EINVAL;
 	}
 
-  out:
+out:
 	mutex_unlock(&__ip_vs_mutex);
 	return ret;
 }
-- 
1.7.2.3


^ permalink raw reply related

* Re: Suspend/resume - slow resume
From: Francois Romieu @ 2011-04-20 18:16 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Ciprian Docan, netdev, Linux Kernel Mailing List, Len Brown,
	Pavel Machek, Rafael, J. Wysocki, Greg KH, nic_swsd
In-Reply-To: <BANLkTi=dke10HougmiQn9hSF15nK0B8mdA@mail.gmail.com>

Linus Torvalds <torvalds@linux-foundation.org> :
[...]
> Francois' patch is fine and improves on things, I only had a few
> arguments with it. The problems I think it had were really about some
> rather special cases.

They may be addressed by the patch below.

I'll test it as soon as my 2.6.39-rc stops softlocking and I have a
working suspend / resume. No joke.

Subject: [PATCH] r8169: don't request firmware when there's no userspace.

The firmware is cached during the first successfull call to open() and
released once the network device is unregistered. The driver uses the
cached firmware between open() and unregister_netdev().

So far the firmware is optional : a failure to load the firmware does
not prevent open() to success. It is thus necessary to 1) unregister
all 816x / 810[23] devices and 2) force a driver probe to issue a new
firmware load.

Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
---
 drivers/net/r8169.c |   92 ++++++++++++++++++++++++++++++++++++---------------
 1 files changed, 65 insertions(+), 27 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 493b0de..fceb954 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -170,6 +170,16 @@ static const struct {
 };
 #undef _R
 
+static const struct rtl_firmware_info {
+	int mac_version;
+	const char *fw_name;
+} rtl_firmware_infos[] = {
+	{ .mac_version = RTL_GIGA_MAC_VER_25, .fw_name = FIRMWARE_8168D_1 },
+	{ .mac_version = RTL_GIGA_MAC_VER_26, .fw_name = FIRMWARE_8168D_2 },
+	{ .mac_version = RTL_GIGA_MAC_VER_29, .fw_name = FIRMWARE_8105E_1 },
+	{ .mac_version = RTL_GIGA_MAC_VER_30, .fw_name = FIRMWARE_8105E_1 }
+};
+
 enum cfg_version {
 	RTL_CFG_0 = 0x00,
 	RTL_CFG_1,
@@ -565,6 +575,7 @@ struct rtl8169_private {
 	u32 saved_wolopts;
 
 	const struct firmware *fw;
+#define RTL_FIRMWARE_UNKNOWN	ERR_PTR(-EAGAIN);
 };
 
 MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@vger.kernel.org>");
@@ -1790,24 +1801,24 @@ rtl_phy_write_fw(struct rtl8169_private *tp, const struct firmware *fw)
 static void rtl_release_firmware(struct rtl8169_private *tp)
 {
 	release_firmware(tp->fw);
-	tp->fw = NULL;
+	tp->fw = RTL_FIRMWARE_UNKNOWN;
 }
 
-static int rtl_apply_firmware(struct rtl8169_private *tp, const char *fw_name)
+static void rtl_apply_firmware(struct rtl8169_private *tp)
 {
-	const struct firmware **fw = &tp->fw;
-	int rc = !*fw;
-
-	if (rc) {
-		rc = request_firmware(fw, fw_name, &tp->pci_dev->dev);
-		if (rc < 0)
-			goto out;
-	}
+	const struct firmware *fw = tp->fw;
 
 	/* TODO: release firmware once rtl_phy_write_fw signals failures. */
-	rtl_phy_write_fw(tp, *fw);
-out:
-	return rc;
+	if (!IS_ERR_OR_NULL(fw))
+		rtl_phy_write_fw(tp, fw);
+}
+
+static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
+{
+	if (rtl_readphy(tp, reg) != val)
+		netif_warn(tp, hw, tp->dev, "chipset not ready for firmware\n");
+	else
+		rtl_apply_firmware(tp);
 }
 
 static void rtl8169s_hw_phy_config(struct rtl8169_private *tp)
@@ -2246,10 +2257,8 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
 
 	rtl_writephy(tp, 0x1f, 0x0005);
 	rtl_writephy(tp, 0x05, 0x001b);
-	if ((rtl_readphy(tp, 0x06) != 0xbf00) ||
-	    (rtl_apply_firmware(tp, FIRMWARE_8168D_1) < 0)) {
-		netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
-	}
+
+	rtl_apply_firmware_cond(tp, MII_EXPANSION, 0xbf00);
 
 	rtl_writephy(tp, 0x1f, 0x0000);
 }
@@ -2351,10 +2360,8 @@ static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp)
 
 	rtl_writephy(tp, 0x1f, 0x0005);
 	rtl_writephy(tp, 0x05, 0x001b);
-	if ((rtl_readphy(tp, 0x06) != 0xb300) ||
-	    (rtl_apply_firmware(tp, FIRMWARE_8168D_2) < 0)) {
-		netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
-	}
+
+	rtl_apply_firmware_cond(tp, MII_EXPANSION, 0xb300);
 
 	rtl_writephy(tp, 0x1f, 0x0000);
 }
@@ -2474,8 +2481,7 @@ static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
 	rtl_writephy(tp, 0x18, 0x0310);
 	msleep(100);
 
-	if (rtl_apply_firmware(tp, FIRMWARE_8105E_1) < 0)
-		netif_warn(tp, probe, tp->dev, "unable to apply firmware patch\n");
+	rtl_apply_firmware(tp);
 
 	rtl_writephy_batch(tp, phy_reg_init, ARRAY_SIZE(phy_reg_init));
 }
@@ -3237,6 +3243,8 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->timer.data = (unsigned long) dev;
 	tp->timer.function = rtl8169_phy_timer;
 
+	tp->fw = RTL_FIRMWARE_UNKNOWN;
+
 	rc = register_netdev(dev);
 	if (rc < 0)
 		goto err_out_msi_4;
@@ -3288,10 +3296,10 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
 
 	cancel_delayed_work_sync(&tp->task);
 
-	rtl_release_firmware(tp);
-
 	unregister_netdev(dev);
 
+	rtl_release_firmware(tp);
+
 	if (pci_dev_run_wake(pdev))
 		pm_runtime_get_noresume(&pdev->dev);
 
@@ -3303,6 +3311,33 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 }
 
+static void rtl_request_firmware(struct rtl8169_private *tp)
+{
+	int i;
+
+	/* Return early if the firmware is already loaded / cached. */
+	if (!IS_ERR(tp->fw))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(rtl_firmware_infos); i++) {
+		const struct rtl_firmware_info *info = rtl_firmware_infos + i;
+
+		if (info->mac_version == tp->mac_version) {
+			const char *name = info->fw_name;
+			int rc;
+
+			rc = request_firmware(&tp->fw, name, &tp->pci_dev->dev);
+			if (rc < 0) {
+				netif_warn(tp, ifup, tp->dev, "unable to load "
+					"firmware patch %s (%d)\n", name, rc);
+			}
+			return;
+		}
+	}
+
+	tp->fw = NULL;
+}
+
 static int rtl8169_open(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
@@ -3334,11 +3369,13 @@ static int rtl8169_open(struct net_device *dev)
 
 	smp_mb();
 
+	rtl_request_firmware(tp);
+
 	retval = request_irq(dev->irq, rtl8169_interrupt,
 			     (tp->features & RTL_FEATURE_MSI) ? 0 : IRQF_SHARED,
 			     dev->name, dev);
 	if (retval < 0)
-		goto err_release_ring_2;
+		goto err_release_fw_2;
 
 	napi_enable(&tp->napi);
 
@@ -3359,7 +3396,8 @@ static int rtl8169_open(struct net_device *dev)
 out:
 	return retval;
 
-err_release_ring_2:
+err_release_fw_2:
+	rtl_release_firmware(tp);
 	rtl8169_rx_clear(tp);
 err_free_rx_1:
 	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
-- 
1.7.4.2

^ permalink raw reply related

* Re: Fix atl1c event race (was Re: 2.6.38 dev_watchdog WARNING)
From: Ben Hutchings @ 2011-04-20 18:13 UTC (permalink / raw)
  To: tim.gardner; +Cc: netdev
In-Reply-To: <4DAF1F1D.1020108@canonical.com>

On Wed, 2011-04-20 at 11:59 -0600, Tim Gardner wrote:
[...]
> I've been focusing on atl1c while trying to understand why link status 
> flapping could cause these watchdog timeouts. I've a couple of log files 
> with link state change information:
> 
> http://bugs.launchpad.net/bugs/766273
> https://launchpadlibrarian.net/69926580/BootDmesg.txt
> https://launchpadlibrarian.net/69926583/CurrentDmesg.txt
> 
> One thing of note is that there are 2 link UP messages in a row, 
> something that should only be able to happen if there has been an 
> intervening device reset (which is not evident in the logs). I've 
> noticed that the work event scheduling is kind of racy, so perhaps this 
> will help. See attached.

I'm not going to spend any significant time looking at atl1c, as that's
really not my job!  The atlx maintainers may be covering atl1c as well,
so try cc'ing them.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply

* Fix atl1c event race (was Re: 2.6.38 dev_watchdog WARNING)
From: Tim Gardner @ 2011-04-20 17:59 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: netdev
In-Reply-To: <1303238959.2988.30.camel@bwh-desktop>

[-- Attachment #1: Type: text/plain, Size: 2234 bytes --]

On 04/19/2011 12:49 PM, Ben Hutchings wrote:
> On Tue, 2011-04-19 at 11:40 -0600, Tim Gardner wrote:
>> I'm seeing a lot of these kinds of bugs: WARNING: at
>> /build/buildd/linux-2.6.38/net/sched/sch_generic.c:256
>> dev_watchdog+0x213/0x220()
>>
>> The kernel is 2.6.38.2 plus Ubuntu cruft.
>>
>> A spot check of the 200+ hits on this string indicates they are
>> primarily due to these drivers:
>>
>> ipheth
>> atl1c
>> sis900
>> r8169
>>
>> As far as I can tell the warning happens when link is down on the media
>> (and has never been link UP) and are sent a transmit packet which never
>> completes. Is there a net/core or net/sched requirement to which these
>> drivers do not conform ? Are they not correctly indicating link status?
>
> The watchdog fires when the software queue has been stopped *and* the
> link has been reported as up for over dev->watchdog_timeo ticks.
>
> The software queue should be stopped iff the hardware queue is full or
> nearly full.  If the software queue remains stopped and the link is
> still reported up, then one of these things is happening:
>
> 1. The link went down but the driver didn't notice
> 2. TX completions are not being indicated or handled correctly
> 3. The hardware TX path has locked up
> 4. The link is stalled by excessive pause frames or collisions
> 5. Timeout is too low and/or low watermark is too high
> (there may be other explanations)
>
> I think the watchdog is primarily meant to deal with case 3, though all
> of cases 1-3 may be worked around by resetting the hardware.
>
> Ben.
>

I've been focusing on atl1c while trying to understand why link status 
flapping could cause these watchdog timeouts. I've a couple of log files 
with link state change information:

http://bugs.launchpad.net/bugs/766273
https://launchpadlibrarian.net/69926580/BootDmesg.txt
https://launchpadlibrarian.net/69926583/CurrentDmesg.txt

One thing of note is that there are 2 link UP messages in a row, 
something that should only be able to happen if there has been an 
intervening device reset (which is not evident in the logs). I've 
noticed that the work event scheduling is kind of racy, so perhaps this 
will help. See attached.

rtg
-- 
Tim Gardner tim.gardner@canonical.com

[-- Attachment #2: 0001-atl1c-Fix-work-event-interrupt-task-races.patch --]
[-- Type: text/x-patch, Size: 2790 bytes --]

>From 6e92d53121dae5fa13c95c19b6076009df686de8 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Wed, 20 Apr 2011 11:31:09 -0600
Subject: [PATCH] atl1c: Fix work event interrupt/task races

The mechanism used to initiate work events from the interrupt
handler has a classic read/modify/write race between the interrupt
handler that sets the condition, and the worker task that reads and
clears the condition. Close these races by using atomic
bit fields.

Cc: stable@kernel.org
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
---
 drivers/net/atl1c/atl1c.h      |    6 +++---
 drivers/net/atl1c/atl1c_main.c |   14 +++++---------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/net/atl1c/atl1c.h b/drivers/net/atl1c/atl1c.h
index 9ab5809..dec8110 100644
--- a/drivers/net/atl1c/atl1c.h
+++ b/drivers/net/atl1c/atl1c.h
@@ -566,9 +566,9 @@ struct atl1c_adapter {
 #define __AT_TESTING        0x0001
 #define __AT_RESETTING      0x0002
 #define __AT_DOWN           0x0003
-	u8 work_event;
-#define ATL1C_WORK_EVENT_RESET 		0x01
-#define ATL1C_WORK_EVENT_LINK_CHANGE	0x02
+	unsigned long work_event;
+#define	ATL1C_WORK_EVENT_RESET		0
+#define	ATL1C_WORK_EVENT_LINK_CHANGE	1
 	u32 msg_enable;
 
 	bool have_msi;
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index 3824382..dffc7f7 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -325,7 +325,7 @@ static void atl1c_link_chg_event(struct atl1c_adapter *adapter)
 		}
 	}
 
-	adapter->work_event |= ATL1C_WORK_EVENT_LINK_CHANGE;
+	set_bit(ATL1C_WORK_EVENT_LINK_CHANGE, &adapter->work_event);
 	schedule_work(&adapter->common_task);
 }
 
@@ -337,20 +337,16 @@ static void atl1c_common_task(struct work_struct *work)
 	adapter = container_of(work, struct atl1c_adapter, common_task);
 	netdev = adapter->netdev;
 
-	if (adapter->work_event & ATL1C_WORK_EVENT_RESET) {
-		adapter->work_event &= ~ATL1C_WORK_EVENT_RESET;
+	if (test_and_clear_bit(ATL1C_WORK_EVENT_RESET, &adapter->work_event)) {
 		netif_device_detach(netdev);
 		atl1c_down(adapter);
 		atl1c_up(adapter);
 		netif_device_attach(netdev);
-		return;
 	}
 
-	if (adapter->work_event & ATL1C_WORK_EVENT_LINK_CHANGE) {
-		adapter->work_event &= ~ATL1C_WORK_EVENT_LINK_CHANGE;
+	if (test_and_clear_bit(ATL1C_WORK_EVENT_LINK_CHANGE,
+		&adapter->work_event))
 		atl1c_check_link_status(adapter);
-	}
-	return;
 }
 
 
@@ -369,7 +365,7 @@ static void atl1c_tx_timeout(struct net_device *netdev)
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 
 	/* Do the reset outside of interrupt context */
-	adapter->work_event |= ATL1C_WORK_EVENT_RESET;
+	set_bit(ATL1C_WORK_EVENT_RESET, &adapter->work_event);
 	schedule_work(&adapter->common_task);
 }
 
-- 
1.7.0.4


^ permalink raw reply related

* [PATCH net-next 7/9] tg3: Add write accessor for AUX CTRL phy reg
From: Matt Carlson @ 2011-04-20 17:57 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patch adds a write accessor for the aux ctrl phy register.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |   48 +++++++++++++++++++++++++++++++-----------------
 drivers/net/tg3.h |    4 ++++
 2 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 7be10cf..69cd7cf 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -962,6 +962,14 @@ static int tg3_phy_auxctl_read(struct tg3 *tp, int reg, u32 *val)
 	return err;
 }
 
+static int tg3_phy_auxctl_write(struct tg3 *tp, int reg, u32 set)
+{
+	if (reg == MII_TG3_AUXCTL_SHDWSEL_MISC)
+		set |= MII_TG3_AUXCTL_MISC_WREN;
+
+	return tg3_writephy(tp, MII_TG3_AUX_CTRL, set | reg);
+}
+
 static int tg3_bmcr_reset(struct tg3 *tp)
 {
 	u32 phy_control;
@@ -1701,8 +1709,8 @@ static void tg3_phy_toggle_automdix(struct tg3 *tp, int enable)
 				phy |= MII_TG3_AUXCTL_MISC_FORCE_AMDIX;
 			else
 				phy &= ~MII_TG3_AUXCTL_MISC_FORCE_AMDIX;
-			phy |= MII_TG3_AUXCTL_MISC_WREN;
-			tg3_writephy(tp, MII_TG3_AUX_CTRL, phy);
+			tg3_phy_auxctl_write(tp,
+					     MII_TG3_AUXCTL_SHDWSEL_MISC, phy);
 		}
 	}
 }
@@ -1717,8 +1725,8 @@ static void tg3_phy_set_wirespeed(struct tg3 *tp)
 
 	ret = tg3_phy_auxctl_read(tp, MII_TG3_AUXCTL_SHDWSEL_MISC, &val);
 	if (!ret)
-		tg3_writephy(tp, MII_TG3_AUX_CTRL,
-			     (val | (1 << 15) | (1 << 4)));
+		tg3_phy_auxctl_write(tp, MII_TG3_AUXCTL_SHDWSEL_MISC,
+				     val | MII_TG3_AUXCTL_MISC_WIRESPD_EN);
 }
 
 static void tg3_phy_apply_otp(struct tg3 *tp)
@@ -2104,13 +2112,14 @@ out:
 	/* support jumbo frames */
 	if ((tp->phy_id & TG3_PHY_ID_MASK) == TG3_PHY_ID_BCM5401) {
 		/* Cannot do read-modify-write on 5401 */
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4c20);
+		tg3_phy_auxctl_write(tp, MII_TG3_AUXCTL_SHDWSEL_AUXCTL, 0x4c20);
 	} else if (tp->tg3_flags & TG3_FLAG_JUMBO_CAPABLE) {
 		/* Set bit 14 with read-modify-write to preserve other bits */
 		err = tg3_phy_auxctl_read(tp,
 					  MII_TG3_AUXCTL_SHDWSEL_AUXCTL, &val);
 		if (!err)
-			tg3_writephy(tp, MII_TG3_AUX_CTRL, val | 0x4000);
+			tg3_phy_auxctl_write(tp, MII_TG3_AUXCTL_SHDWSEL_AUXCTL,
+					   val | MII_TG3_AUXCTL_ACTL_EXTPKTLEN);
 	}
 
 	/* Set phy register 0x10 bit 0 to high fifo elasticity to support
@@ -2319,11 +2328,10 @@ static void tg3_power_down_phy(struct tg3 *tp, bool do_low_power)
 		tg3_writephy(tp, MII_TG3_EXT_CTRL,
 			     MII_TG3_EXT_CTRL_FORCE_LED_OFF);
 
-		tg3_writephy(tp, MII_TG3_AUX_CTRL,
-			     MII_TG3_AUXCTL_SHDWSEL_PWRCTL |
-			     MII_TG3_AUXCTL_PCTL_100TX_LPWR |
-			     MII_TG3_AUXCTL_PCTL_SPR_ISOLATE |
-			     MII_TG3_AUXCTL_PCTL_VREG_11V);
+		val = MII_TG3_AUXCTL_PCTL_100TX_LPWR |
+		      MII_TG3_AUXCTL_PCTL_SPR_ISOLATE |
+		      MII_TG3_AUXCTL_PCTL_VREG_11V;
+		tg3_phy_auxctl_write(tp, MII_TG3_AUXCTL_SHDWSEL_PWRCTL, val);
 	}
 
 	/* The PHY should not be powered down on some chips because
@@ -2717,8 +2725,13 @@ static int tg3_power_down_prepare(struct tg3 *tp)
 		u32 mac_mode;
 
 		if (!(tp->phy_flags & TG3_PHYFLG_PHY_SERDES)) {
-			if (do_low_power) {
-				tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x5a);
+			if (do_low_power &&
+			    !(tp->phy_flags & TG3_PHYFLG_IS_FET)) {
+				tg3_phy_auxctl_write(tp,
+					       MII_TG3_AUXCTL_SHDWSEL_PWRCTL,
+					       MII_TG3_AUXCTL_PCTL_WOL_EN |
+					       MII_TG3_AUXCTL_PCTL_100TX_LPWR |
+					       MII_TG3_AUXCTL_PCTL_CL_AB_TXDAC);
 				udelay(40);
 			}
 
@@ -3092,7 +3105,7 @@ static int tg3_init_5401phy_dsp(struct tg3 *tp)
 
 	/* Turn off tap power management. */
 	/* Set Extended packet length bit */
-	err  = tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4c20);
+	err = tg3_phy_auxctl_write(tp, MII_TG3_AUXCTL_SHDWSEL_AUXCTL, 0x4c20);
 
 	err |= tg3_phydsp_write(tp, 0x0012, 0x1804);
 	err |= tg3_phydsp_write(tp, 0x0013, 0x1204);
@@ -3198,7 +3211,7 @@ static int tg3_setup_copper_phy(struct tg3 *tp, int force_reset)
 		udelay(80);
 	}
 
-	tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x02);
+	tg3_phy_auxctl_write(tp, MII_TG3_AUXCTL_SHDWSEL_PWRCTL, 0);
 
 	/* Some third-party PHYs need to be reset on link going
 	 * down.
@@ -3283,8 +3296,9 @@ static int tg3_setup_copper_phy(struct tg3 *tp, int force_reset)
 					  MII_TG3_AUXCTL_SHDWSEL_MISCTEST,
 					  &val);
 		if (!err && !(val & (1 << 10))) {
-			val |= (1 << 10);
-			tg3_writephy(tp, MII_TG3_AUX_CTRL, val);
+			tg3_phy_auxctl_write(tp,
+					     MII_TG3_AUXCTL_SHDWSEL_MISCTEST,
+					     val | (1 << 10));
 			goto relink;
 		}
 	}
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index b9382f1..eaa7669 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2197,15 +2197,19 @@
 #define MII_TG3_AUXCTL_SHDWSEL_AUXCTL	0x0000
 #define MII_TG3_AUXCTL_ACTL_TX_6DB	0x0400
 #define MII_TG3_AUXCTL_ACTL_SMDSP_ENA	0x0800
+#define MII_TG3_AUXCTL_ACTL_EXTPKTLEN	0x4000
 
 #define MII_TG3_AUXCTL_SHDWSEL_PWRCTL	0x0002
+#define MII_TG3_AUXCTL_PCTL_WOL_EN	0x0008
 #define MII_TG3_AUXCTL_PCTL_100TX_LPWR	0x0010
 #define MII_TG3_AUXCTL_PCTL_SPR_ISOLATE	0x0020
+#define MII_TG3_AUXCTL_PCTL_CL_AB_TXDAC	0x0040
 #define MII_TG3_AUXCTL_PCTL_VREG_11V	0x0180
 
 #define MII_TG3_AUXCTL_SHDWSEL_MISCTEST	0x0004
 
 #define MII_TG3_AUXCTL_SHDWSEL_MISC	0x0007
+#define MII_TG3_AUXCTL_MISC_WIRESPD_EN	0x0010
 #define MII_TG3_AUXCTL_MISC_FORCE_AMDIX	0x0200
 #define MII_TG3_AUXCTL_MISC_RDSEL_SHIFT	12
 #define MII_TG3_AUXCTL_MISC_WREN	0x8000
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 4/9] tg3: Only allow phy ioctls while netif_running
From: Matt Carlson @ 2011-04-20 17:57 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

When tg3 was new, phy accesses through ioctl were allowable at any time.
Then, the driver started shutting down the phy when the device was
closed.  Phy accesses would be allowed when the driver first attached to
the device, but then would be forbidden after the device had been up'd
and down'd.  After that, management firmware made it illegal to access
the phy unless the driver "owned" the device.  Now that most firmware
is being moved over to the APE, it is less clear when phy accesses are
safe.

While it is possible to attempt to identify these conditions and code
the driver to navigate through the pitfalls, it could be perplexing to
the admin why phy accesses work in some cases and not others.  This
patch brings some uniformity to the problem by only allowing phy
accesses while the driver has control of the device.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |    8 ++------
 1 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 1f233c4..e134e48 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -11444,9 +11444,7 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES)
 			break;			/* We have no PHY */
 
-		if ((tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) ||
-		    ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) &&
-		     !netif_running(dev)))
+		if (!netif_running(dev))
 			return -EAGAIN;
 
 		spin_lock_bh(&tp->lock);
@@ -11462,9 +11460,7 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (tp->phy_flags & TG3_PHYFLG_PHY_SERDES)
 			break;			/* We have no PHY */
 
-		if ((tp->phy_flags & TG3_PHYFLG_IS_LOW_POWER) ||
-		    ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) &&
-		     !netif_running(dev)))
+		if (!netif_running(dev))
 			return -EAGAIN;
 
 		spin_lock_bh(&tp->lock);
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 8/9] tg3: Add macro for SMDSP toggling
From: Matt Carlson @ 2011-04-20 17:57 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

A common AUX CTRL operation in the driver is to enable and disable the
SMDSP.  This patch consolidates the code so that the details of the
operation are in one place.  This patch also adds code to make sure the
SMDSP is enabled before executing code that relies on it.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |  102 ++++++++++++++++++++++++-----------------------------
 1 files changed, 46 insertions(+), 56 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 69cd7cf..9ed6bfb 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -970,6 +970,15 @@ static int tg3_phy_auxctl_write(struct tg3 *tp, int reg, u32 set)
 	return tg3_writephy(tp, MII_TG3_AUX_CTRL, set | reg);
 }
 
+#define TG3_PHY_AUXCTL_SMDSP_ENABLE(tp) \
+	tg3_phy_auxctl_write((tp), MII_TG3_AUXCTL_SHDWSEL_AUXCTL, \
+			     MII_TG3_AUXCTL_ACTL_SMDSP_ENA | \
+			     MII_TG3_AUXCTL_ACTL_TX_6DB)
+
+#define TG3_PHY_AUXCTL_SMDSP_DISABLE(tp) \
+	tg3_phy_auxctl_write((tp), MII_TG3_AUXCTL_SHDWSEL_AUXCTL, \
+			     MII_TG3_AUXCTL_ACTL_TX_6DB);
+
 static int tg3_bmcr_reset(struct tg3 *tp)
 {
 	u32 phy_control;
@@ -1738,11 +1747,8 @@ static void tg3_phy_apply_otp(struct tg3 *tp)
 
 	otp = tp->phy_otp;
 
-	/* Enable SM_DSP clock and tx 6dB coding. */
-	phy = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
-	      MII_TG3_AUXCTL_ACTL_SMDSP_ENA |
-	      MII_TG3_AUXCTL_ACTL_TX_6DB;
-	tg3_writephy(tp, MII_TG3_AUX_CTRL, phy);
+	if (TG3_PHY_AUXCTL_SMDSP_ENABLE(tp))
+		return;
 
 	phy = ((otp & TG3_OTP_AGCTGT_MASK) >> TG3_OTP_AGCTGT_SHIFT);
 	phy |= MII_TG3_DSP_TAP1_AGCTGT_DFLT;
@@ -1766,10 +1772,7 @@ static void tg3_phy_apply_otp(struct tg3 *tp)
 	      ((otp & TG3_OTP_RCOFF_MASK) >> TG3_OTP_RCOFF_SHIFT);
 	tg3_phydsp_write(tp, MII_TG3_DSP_EXP97, phy);
 
-	/* Turn off SM_DSP clock. */
-	phy = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
-	      MII_TG3_AUXCTL_ACTL_TX_6DB;
-	tg3_writephy(tp, MII_TG3_AUX_CTRL, phy);
+	TG3_PHY_AUXCTL_SMDSP_DISABLE(tp);
 }
 
 static void tg3_phy_eee_adjust(struct tg3 *tp, u32 current_link_up)
@@ -1804,18 +1807,11 @@ static void tg3_phy_eee_adjust(struct tg3 *tp, u32 current_link_up)
 			case ASIC_REV_5717:
 			case ASIC_REV_5719:
 			case ASIC_REV_57765:
-				/* Enable SM_DSP clock and tx 6dB coding. */
-				val = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
-				      MII_TG3_AUXCTL_ACTL_SMDSP_ENA |
-				      MII_TG3_AUXCTL_ACTL_TX_6DB;
-				tg3_writephy(tp, MII_TG3_AUX_CTRL, val);
-
-				tg3_phydsp_write(tp, MII_TG3_DSP_TAP26, 0x0000);
-
-				/* Turn off SM_DSP clock. */
-				val = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
-				      MII_TG3_AUXCTL_ACTL_TX_6DB;
-				tg3_writephy(tp, MII_TG3_AUX_CTRL, val);
+				if (!TG3_PHY_AUXCTL_SMDSP_ENABLE(tp)) {
+					tg3_phydsp_write(tp, MII_TG3_DSP_TAP26,
+							 0x0000);
+					TG3_PHY_AUXCTL_SMDSP_DISABLE(tp);
+				}
 			}
 			/* Fallthrough */
 		case TG3_CL45_D7_EEERES_STAT_LP_100TX:
@@ -1967,8 +1963,9 @@ static int tg3_phy_reset_5703_4_5(struct tg3 *tp)
 			     (MII_TG3_CTRL_AS_MASTER |
 			      MII_TG3_CTRL_ENABLE_AS_MASTER));
 
-		/* Enable SM_DSP_CLOCK and 6dB.  */
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c00);
+		err = TG3_PHY_AUXCTL_SMDSP_ENABLE(tp);
+		if (err)
+			return err;
 
 		/* Block the PHY control access.  */
 		tg3_phydsp_write(tp, 0x8005, 0x0800);
@@ -1987,13 +1984,7 @@ static int tg3_phy_reset_5703_4_5(struct tg3 *tp)
 	tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x8200);
 	tg3_writephy(tp, MII_TG3_DSP_CONTROL, 0x0000);
 
-	if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 ||
-	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5704) {
-		/* Set Extended packet length bit for jumbo frames */
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4400);
-	} else {
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0400);
-	}
+	TG3_PHY_AUXCTL_SMDSP_DISABLE(tp);
 
 	tg3_writephy(tp, MII_TG3_CTRL, phy9_orig);
 
@@ -2081,33 +2072,39 @@ static int tg3_phy_reset(struct tg3 *tp)
 		tg3_phy_toggle_apd(tp, false);
 
 out:
-	if (tp->phy_flags & TG3_PHYFLG_ADC_BUG) {
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c00);
+	if ((tp->phy_flags & TG3_PHYFLG_ADC_BUG) &&
+	    !TG3_PHY_AUXCTL_SMDSP_ENABLE(tp)) {
 		tg3_phydsp_write(tp, 0x201f, 0x2aaa);
 		tg3_phydsp_write(tp, 0x000a, 0x0323);
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0400);
+		TG3_PHY_AUXCTL_SMDSP_DISABLE(tp);
 	}
+
 	if (tp->phy_flags & TG3_PHYFLG_5704_A0_BUG) {
 		tg3_writephy(tp, MII_TG3_MISC_SHDW, 0x8d68);
 		tg3_writephy(tp, MII_TG3_MISC_SHDW, 0x8d68);
 	}
+
 	if (tp->phy_flags & TG3_PHYFLG_BER_BUG) {
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c00);
-		tg3_phydsp_write(tp, 0x000a, 0x310b);
-		tg3_phydsp_write(tp, 0x201f, 0x9506);
-		tg3_phydsp_write(tp, 0x401f, 0x14e2);
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0400);
+		if (!TG3_PHY_AUXCTL_SMDSP_ENABLE(tp)) {
+			tg3_phydsp_write(tp, 0x000a, 0x310b);
+			tg3_phydsp_write(tp, 0x201f, 0x9506);
+			tg3_phydsp_write(tp, 0x401f, 0x14e2);
+			TG3_PHY_AUXCTL_SMDSP_DISABLE(tp);
+		}
 	} else if (tp->phy_flags & TG3_PHYFLG_JITTER_BUG) {
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0c00);
-		tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x000a);
-		if (tp->phy_flags & TG3_PHYFLG_ADJUST_TRIM) {
-			tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x110b);
-			tg3_writephy(tp, MII_TG3_TEST1,
-				     MII_TG3_TEST1_TRIM_EN | 0x4);
-		} else
-			tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x010b);
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0400);
+		if (!TG3_PHY_AUXCTL_SMDSP_ENABLE(tp)) {
+			tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x000a);
+			if (tp->phy_flags & TG3_PHYFLG_ADJUST_TRIM) {
+				tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x110b);
+				tg3_writephy(tp, MII_TG3_TEST1,
+					     MII_TG3_TEST1_TRIM_EN | 0x4);
+			} else
+				tg3_writephy(tp, MII_TG3_DSP_RW_PORT, 0x010b);
+
+			TG3_PHY_AUXCTL_SMDSP_DISABLE(tp);
+		}
 	}
+
 	/* Set Extended packet length bit (bit 14) on all chips that */
 	/* support jumbo frames */
 	if ((tp->phy_id & TG3_PHY_ID_MASK) == TG3_PHY_ID_BCM5401) {
@@ -3011,11 +3008,7 @@ static void tg3_phy_copper_begin(struct tg3 *tp)
 		tw32(TG3_CPMU_EEE_MODE,
 		     tr32(TG3_CPMU_EEE_MODE) & ~TG3_CPMU_EEEMD_LPI_ENABLE);
 
-		/* Enable SM_DSP clock and tx 6dB coding. */
-		val = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
-		      MII_TG3_AUXCTL_ACTL_SMDSP_ENA |
-		      MII_TG3_AUXCTL_ACTL_TX_6DB;
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, val);
+		TG3_PHY_AUXCTL_SMDSP_ENABLE(tp);
 
 		switch (GET_ASIC_REV(tp->pci_chip_rev_id)) {
 		case ASIC_REV_5717:
@@ -3044,10 +3037,7 @@ static void tg3_phy_copper_begin(struct tg3 *tp)
 		}
 		tg3_phy_cl45_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, val);
 
-		/* Turn off SM_DSP clock. */
-		val = MII_TG3_AUXCTL_SHDWSEL_AUXCTL |
-		      MII_TG3_AUXCTL_ACTL_TX_6DB;
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, val);
+		TG3_PHY_AUXCTL_SMDSP_DISABLE(tp);
 	}
 
 	if (tp->link_config.autoneg == AUTONEG_DISABLE &&
-- 
1.7.3.4



^ permalink raw reply related

* [PATCH net-next 6/9] tg3: Add read accessor for AUX CTRL phy reg
From: Matt Carlson @ 2011-04-20 17:57 UTC (permalink / raw)
  To: davem; +Cc: netdev, mcarlson

This patch adds a read accessor for the aux ctrl register.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Reviewed-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/tg3.c |   39 ++++++++++++++++++++++++++++-----------
 drivers/net/tg3.h |   17 ++++++++++-------
 2 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index ea41d76..7be10cf 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -949,6 +949,19 @@ static int tg3_phydsp_write(struct tg3 *tp, u32 reg, u32 val)
 	return err;
 }
 
+static int tg3_phy_auxctl_read(struct tg3 *tp, int reg, u32 *val)
+{
+	int err;
+
+	err = tg3_writephy(tp, MII_TG3_AUX_CTRL,
+			   (reg << MII_TG3_AUXCTL_MISC_RDSEL_SHIFT) |
+			   MII_TG3_AUXCTL_SHDWSEL_MISC);
+	if (!err)
+		err = tg3_readphy(tp, MII_TG3_AUX_CTRL, val);
+
+	return err;
+}
+
 static int tg3_bmcr_reset(struct tg3 *tp)
 {
 	u32 phy_control;
@@ -1679,10 +1692,11 @@ static void tg3_phy_toggle_automdix(struct tg3 *tp, int enable)
 			tg3_writephy(tp, MII_TG3_FET_TEST, ephy);
 		}
 	} else {
-		phy = MII_TG3_AUXCTL_MISC_RDSEL_MISC |
-		      MII_TG3_AUXCTL_SHDWSEL_MISC;
-		if (!tg3_writephy(tp, MII_TG3_AUX_CTRL, phy) &&
-		    !tg3_readphy(tp, MII_TG3_AUX_CTRL, &phy)) {
+		int ret;
+
+		ret = tg3_phy_auxctl_read(tp,
+					  MII_TG3_AUXCTL_SHDWSEL_MISC, &phy);
+		if (!ret) {
 			if (enable)
 				phy |= MII_TG3_AUXCTL_MISC_FORCE_AMDIX;
 			else
@@ -1695,13 +1709,14 @@ static void tg3_phy_toggle_automdix(struct tg3 *tp, int enable)
 
 static void tg3_phy_set_wirespeed(struct tg3 *tp)
 {
+	int ret;
 	u32 val;
 
 	if (tp->phy_flags & TG3_PHYFLG_NO_ETH_WIRE_SPEED)
 		return;
 
-	if (!tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x7007) &&
-	    !tg3_readphy(tp, MII_TG3_AUX_CTRL, &val))
+	ret = tg3_phy_auxctl_read(tp, MII_TG3_AUXCTL_SHDWSEL_MISC, &val);
+	if (!ret)
 		tg3_writephy(tp, MII_TG3_AUX_CTRL,
 			     (val | (1 << 15) | (1 << 4)));
 }
@@ -2092,8 +2107,9 @@ out:
 		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4c20);
 	} else if (tp->tg3_flags & TG3_FLAG_JUMBO_CAPABLE) {
 		/* Set bit 14 with read-modify-write to preserve other bits */
-		if (!tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x0007) &&
-		    !tg3_readphy(tp, MII_TG3_AUX_CTRL, &val))
+		err = tg3_phy_auxctl_read(tp,
+					  MII_TG3_AUXCTL_SHDWSEL_AUXCTL, &val);
+		if (!err)
 			tg3_writephy(tp, MII_TG3_AUX_CTRL, val | 0x4000);
 	}
 
@@ -3263,9 +3279,10 @@ static int tg3_setup_copper_phy(struct tg3 *tp, int force_reset)
 	current_duplex = DUPLEX_INVALID;
 
 	if (tp->phy_flags & TG3_PHYFLG_CAPACITIVE_COUPLING) {
-		tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x4007);
-		tg3_readphy(tp, MII_TG3_AUX_CTRL, &val);
-		if (!(val & (1 << 10))) {
+		err = tg3_phy_auxctl_read(tp,
+					  MII_TG3_AUXCTL_SHDWSEL_MISCTEST,
+					  &val);
+		if (!err && !(val & (1 << 10))) {
 			val |= (1 << 10);
 			tg3_writephy(tp, MII_TG3_AUX_CTRL, val);
 			goto relink;
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index dd331f8..b9382f1 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2194,19 +2194,22 @@
 
 #define MII_TG3_AUX_CTRL		0x18 /* auxiliary control register */
 
+#define MII_TG3_AUXCTL_SHDWSEL_AUXCTL	0x0000
+#define MII_TG3_AUXCTL_ACTL_TX_6DB	0x0400
+#define MII_TG3_AUXCTL_ACTL_SMDSP_ENA	0x0800
+
+#define MII_TG3_AUXCTL_SHDWSEL_PWRCTL	0x0002
 #define MII_TG3_AUXCTL_PCTL_100TX_LPWR	0x0010
 #define MII_TG3_AUXCTL_PCTL_SPR_ISOLATE	0x0020
 #define MII_TG3_AUXCTL_PCTL_VREG_11V	0x0180
-#define MII_TG3_AUXCTL_SHDWSEL_PWRCTL	0x0002
 
-#define MII_TG3_AUXCTL_MISC_WREN	0x8000
-#define MII_TG3_AUXCTL_MISC_FORCE_AMDIX	0x0200
-#define MII_TG3_AUXCTL_MISC_RDSEL_MISC	0x7000
+#define MII_TG3_AUXCTL_SHDWSEL_MISCTEST	0x0004
+
 #define MII_TG3_AUXCTL_SHDWSEL_MISC	0x0007
+#define MII_TG3_AUXCTL_MISC_FORCE_AMDIX	0x0200
+#define MII_TG3_AUXCTL_MISC_RDSEL_SHIFT	12
+#define MII_TG3_AUXCTL_MISC_WREN	0x8000
 
-#define MII_TG3_AUXCTL_ACTL_SMDSP_ENA	0x0800
-#define MII_TG3_AUXCTL_ACTL_TX_6DB	0x0400
-#define MII_TG3_AUXCTL_SHDWSEL_AUXCTL	0x0000
 
 #define MII_TG3_AUX_STAT		0x19 /* auxiliary status register */
 #define MII_TG3_AUX_STAT_LPASS		0x0004
-- 
1.7.3.4



^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox