netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] seq_file: add RCU versions of new hlist/list iterators (v2)
       [not found] <20100219054145.959067404@vyatta.com>
@ 2010-02-19  5:41 ` Stephen Hemminger
  2010-02-19  5:41 ` [PATCH 2/2] packet: convert socket list to RCU Stephen Hemminger
  1 sibling, 0 replies; 3+ messages in thread
From: Stephen Hemminger @ 2010-02-19  5:41 UTC (permalink / raw)
  To: David S. Miller, Li Zefan; +Cc: netdev, linux-kernel

[-- Attachment #1: seq_hlist_rcu.patch --]
[-- Type: text/plain, Size: 5194 bytes --]

Many usages of seq_file use RCU protected lists, so non RCU
iterators will not work safely. Needed to make __hlist_for_each_rcu
macro (like __list_for_each_rcu).

Also indented prototypes to match existing style.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

---
 fs/seq_file.c            |   88 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/rculist.h  |    5 ++
 include/linux/seq_file.h |   27 +++++++++-----
 3 files changed, 111 insertions(+), 9 deletions(-)

--- a/fs/seq_file.c	2010-02-18 15:08:53.228872265 -0800
+++ b/fs/seq_file.c	2010-02-18 15:37:09.660996801 -0800
@@ -695,6 +695,38 @@ struct list_head *seq_list_next(void *v,
 }
 EXPORT_SYMBOL(seq_list_next);
 
+struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos)
+{
+	struct list_head *lh;
+
+	__list_for_each_rcu(lh, head)
+		if (pos-- == 0)
+			return lh;
+
+	return NULL;
+}
+EXPORT_SYMBOL(seq_list_start_rcu);
+
+struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos)
+{
+	if (!pos)
+		return head;
+
+	return seq_list_start_rcu(head, pos - 1);
+}
+EXPORT_SYMBOL(seq_list_start_head_rcu);
+
+struct list_head *seq_list_next_rcu(void *v, struct list_head *head,
+				    loff_t *ppos)
+{
+	struct list_head *lh = v;
+
+	lh = rcu_dereference(lh->next);
+	++*ppos;
+	return lh == head ? NULL : lh;
+}
+EXPORT_SYMBOL(seq_list_next_rcu);
+
 /**
  * seq_hlist_start - start an iteration of a hlist
  * @head: the head of the hlist
@@ -750,3 +782,59 @@ struct hlist_node *seq_hlist_next(void *
 		return node->next;
 }
 EXPORT_SYMBOL(seq_hlist_next);
+
+/**
+ * seq_hlist_start_rcu - start an iteration of a hlist protected by RCU
+ * @head: the head of the hlist
+ * @pos:  the start position of the sequence
+ *
+ * Called at seq_file->op->start().
+ */
+struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head, loff_t pos)
+{
+	struct hlist_node *node;
+
+	__hlist_for_each_rcu(node, head)
+		if (pos-- == 0)
+			return node;
+	return NULL;
+}
+EXPORT_SYMBOL(seq_hlist_start_rcu);
+
+/**
+ * seq_hlist_start_head_rcu - start an iteration of a hlist protected by RCU
+ * @head: the head of the hlist
+ * @pos:  the start position of the sequence
+ *
+ * Called at seq_file->op->start(). Call this function if you want to
+ * print a header at the top of the output.
+ */
+struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head, loff_t pos)
+{
+	if (!pos)
+		return SEQ_START_TOKEN;
+
+	return seq_hlist_start_rcu(head, pos - 1);
+}
+EXPORT_SYMBOL(seq_hlist_start_head_rcu);
+
+/**
+ * seq_hlist_next_rcu - move to the next position of the hlist protected by RCU
+ * @v:    the current iterator
+ * @head: the head of the hlist
+ * @pos:  the current posision
+ *
+ * Called at seq_file->op->next().
+ */
+struct hlist_node *seq_hlist_next_rcu(void *v, struct hlist_head *head,
+				      loff_t *ppos)
+{
+	struct hlist_node *node = v;
+
+	++*ppos;
+	if (v == SEQ_START_TOKEN)
+		return rcu_dereference(head->first);
+	else
+		return rcu_dereference(node->next);
+}
+EXPORT_SYMBOL(seq_hlist_next_rcu);
--- a/include/linux/seq_file.h	2010-02-18 15:08:53.256872028 -0800
+++ b/include/linux/seq_file.h	2010-02-18 15:09:14.128997914 -0800
@@ -127,23 +127,32 @@ int seq_release_private(struct inode *, 
 /*
  * Helpers for iteration over list_head-s in seq_files
  */
-
 extern struct list_head *seq_list_start(struct list_head *head,
-		loff_t pos);
+					loff_t pos);
 extern struct list_head *seq_list_start_head(struct list_head *head,
-		loff_t pos);
+					     loff_t pos);
 extern struct list_head *seq_list_next(void *v, struct list_head *head,
-		loff_t *ppos);
-
+				       loff_t *ppos);
+extern struct list_head *seq_list_start_rcu(struct list_head *head,
+					    loff_t pos);
+extern struct list_head *seq_list_start_head_rcu(struct list_head *head,
+						 loff_t pos);
+extern struct list_head *seq_list_next_rcu(void *v, struct list_head *head,
+					   loff_t *ppos);
 /*
  * Helpers for iteration over hlist_head-s in seq_files
  */
-
 extern struct hlist_node *seq_hlist_start(struct hlist_head *head,
-		loff_t pos);
+					  loff_t pos);
 extern struct hlist_node *seq_hlist_start_head(struct hlist_head *head,
-		loff_t pos);
+					       loff_t pos);
 extern struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head,
-		loff_t *ppos);
+					 loff_t *ppos);
 
+extern struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head,
+					      loff_t pos);
+extern struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head,
+						   loff_t pos);
+extern struct hlist_node *seq_hlist_next_rcu(void *v, struct hlist_head *head,
+					     loff_t *ppos);
 #endif
--- a/include/linux/rculist.h	2010-02-18 15:25:03.681496691 -0800
+++ b/include/linux/rculist.h	2010-02-18 15:33:26.352872829 -0800
@@ -406,6 +406,11 @@ static inline void hlist_add_after_rcu(s
 		n->next->pprev = &n->next;
 }
 
+#define __hlist_for_each_rcu(pos, head)			\
+	for (pos = rcu_dereference((head)->first);	\
+	     pos && ({ prefetch(pos->next); 1; });	\
+	     pos = rcu_dereference(pos->next))
+
 /**
  * hlist_for_each_entry_rcu - iterate over rcu list of given type
  * @tpos:	the type * to use as a loop cursor.

-- 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 2/2] packet: convert socket list to RCU
       [not found] <20100219054145.959067404@vyatta.com>
  2010-02-19  5:41 ` [PATCH 1/2] seq_file: add RCU versions of new hlist/list iterators (v2) Stephen Hemminger
@ 2010-02-19  5:41 ` Stephen Hemminger
  2010-02-19 15:16   ` Eric Dumazet
  1 sibling, 1 reply; 3+ messages in thread
From: Stephen Hemminger @ 2010-02-19  5:41 UTC (permalink / raw)
  To: David S. Miller, Li Zefan; +Cc: netdev, linux-kernel

[-- Attachment #1: packet-list-rcu.patch --]
[-- Type: text/plain, Size: 5413 bytes --]

Convert AF_PACKET to use RCU, eliminating one more reader/writer lock.

I needed to create some minor additional socket list RCU infrastructure
to make this work. Note: there is no need for a real sk_del_node_init_rcu(), 
because sk_del_node_init is doing the equivalent thing to 
hlst_del_init_rcu already; but added some comments to try and make that obvious.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>

---
 include/net/netns/packet.h |    4 ++--
 include/net/sock.h         |   10 ++++++++++
 net/packet/af_packet.c     |   42 ++++++++++++++++++++----------------------
 3 files changed, 32 insertions(+), 24 deletions(-)

--- a/include/net/netns/packet.h	2010-02-18 15:08:59.532872158 -0800
+++ b/include/net/netns/packet.h	2010-02-18 15:09:16.433496523 -0800
@@ -4,11 +4,11 @@
 #ifndef __NETNS_PACKET_H__
 #define __NETNS_PACKET_H__
 
-#include <linux/list.h>
+#include <linux/rculist.h>
 #include <linux/spinlock.h>
 
 struct netns_packet {
-	rwlock_t		sklist_lock;
+	spinlock_t		sklist_lock;
 	struct hlist_head	sklist;
 };
 
--- a/net/packet/af_packet.c	2010-02-18 15:08:59.520873035 -0800
+++ b/net/packet/af_packet.c	2010-02-18 15:10:33.908871539 -0800
@@ -1262,18 +1262,15 @@ static int packet_release(struct socket 
 	net = sock_net(sk);
 	po = pkt_sk(sk);
 
-	write_lock_bh(&net->packet.sklist_lock);
-	sk_del_node_init(sk);
+	spin_lock_bh(&net->packet.sklist_lock);
+	sk_del_node_init_rcu(sk);
 	sock_prot_inuse_add(net, sk->sk_prot, -1);
-	write_unlock_bh(&net->packet.sklist_lock);
-
-	/*
-	 *	Unhook packet receive handler.
-	 */
+	spin_unlock_bh(&net->packet.sklist_lock);
 
 	if (po->running) {
 		/*
-		 *	Remove the protocol hook
+		 * Remove from protocol table
+		 *  does synchronize_net()
 		 */
 		dev_remove_pack(&po->prot_hook);
 		po->running = 0;
@@ -1478,10 +1475,11 @@ static int packet_create(struct net *net
 		po->running = 1;
 	}
 
-	write_lock_bh(&net->packet.sklist_lock);
-	sk_add_node(sk, &net->packet.sklist);
+	spin_lock_bh(&net->packet.sklist_lock);
+	sk_add_node_rcu(sk, &net->packet.sklist);
 	sock_prot_inuse_add(net, &packet_proto, 1);
-	write_unlock_bh(&net->packet.sklist_lock);
+	spin_unlock_bh(&net->packet.sklist_lock);
+
 	return 0;
 out:
 	return err;
@@ -2075,8 +2073,8 @@ static int packet_notifier(struct notifi
 	struct net_device *dev = data;
 	struct net *net = dev_net(dev);
 
-	read_lock(&net->packet.sklist_lock);
-	sk_for_each(sk, node, &net->packet.sklist) {
+	rcu_read_lock();
+	sk_for_each_rcu(sk, node, &net->packet.sklist) {
 		struct packet_sock *po = pkt_sk(sk);
 
 		switch (msg) {
@@ -2115,7 +2113,7 @@ static int packet_notifier(struct notifi
 			break;
 		}
 	}
-	read_unlock(&net->packet.sklist_lock);
+	rcu_read_unlock();
 	return NOTIFY_DONE;
 }
 
@@ -2512,24 +2510,24 @@ static struct notifier_block packet_netd
 #ifdef CONFIG_PROC_FS
 
 static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(seq_file_net(seq)->packet.sklist_lock)
+	__acquires(RCU)
 {
 	struct net *net = seq_file_net(seq);
-	read_lock(&net->packet.sklist_lock);
-	return seq_hlist_start_head(&net->packet.sklist, *pos);
+
+	rcu_read_lock();
+	return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
 }
 
 static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct net *net = seq_file_net(seq);
-	return seq_hlist_next(v, &net->packet.sklist, pos);
+	return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
 }
 
 static void packet_seq_stop(struct seq_file *seq, void *v)
-	__releases(seq_file_net(seq)->packet.sklist_lock)
+	__releases(RCU)
 {
-	struct net *net = seq_file_net(seq);
-	read_unlock(&net->packet.sklist_lock);
+	rcu_read_unlock();
 }
 
 static int packet_seq_show(struct seq_file *seq, void *v)
@@ -2581,7 +2579,7 @@ static const struct file_operations pack
 
 static int __net_init packet_net_init(struct net *net)
 {
-	rwlock_init(&net->packet.sklist_lock);
+	spin_lock_init(&net->packet.sklist_lock);
 	INIT_HLIST_HEAD(&net->packet.sklist);
 
 	if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
--- a/include/net/sock.h	2010-02-18 15:08:59.548872240 -0800
+++ b/include/net/sock.h	2010-02-18 15:09:16.437496392 -0800
@@ -381,6 +381,7 @@ static __inline__ void __sk_del_node(str
 	__hlist_del(&sk->sk_node);
 }
 
+/* NB: equivalent to hlist_del_init_rcu */
 static __inline__ int __sk_del_node_init(struct sock *sk)
 {
 	if (sk_hashed(sk)) {
@@ -421,6 +422,7 @@ static __inline__ int sk_del_node_init(s
 	}
 	return rc;
 }
+#define sk_del_node_init_rcu(sk)	sk_del_node_init(sk)
 
 static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk)
 {
@@ -454,6 +456,12 @@ static __inline__ void sk_add_node(struc
 	__sk_add_node(sk, list);
 }
 
+static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
+{
+	sock_hold(sk);
+	hlist_add_head_rcu(&sk->sk_node, list);
+}
+
 static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
 {
 	hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
@@ -478,6 +486,8 @@ static __inline__ void sk_add_bind_node(
 
 #define sk_for_each(__sk, node, list) \
 	hlist_for_each_entry(__sk, node, list, sk_node)
+#define sk_for_each_rcu(__sk, node, list) \
+	hlist_for_each_entry_rcu(__sk, node, list, sk_node)
 #define sk_nulls_for_each(__sk, node, list) \
 	hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
 #define sk_nulls_for_each_rcu(__sk, node, list) \

-- 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 2/2] packet: convert socket list to RCU
  2010-02-19  5:41 ` [PATCH 2/2] packet: convert socket list to RCU Stephen Hemminger
@ 2010-02-19 15:16   ` Eric Dumazet
  0 siblings, 0 replies; 3+ messages in thread
From: Eric Dumazet @ 2010-02-19 15:16 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: David S. Miller, Li Zefan, netdev, linux-kernel

Le jeudi 18 février 2010 à 21:41 -0800, Stephen Hemminger a écrit :

> Convert AF_PACKET to use RCU, eliminating one more reader/writer lock.
> 
> I needed to create some minor additional socket list RCU infrastructure
> to make this work. Note: there is no need for a real sk_del_node_init_rcu(), 
> because sk_del_node_init is doing the equivalent thing to 
> hlst_del_init_rcu already; but added some comments to try and make that obvious.
> 
> Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
> 

Stephen, I am a bit worried by the interaction between packet_release()
and packet_notifier()

With your version, packet_notifier() can run and let another cpu run
packet_release() un-contented. Both cpus could manipulate same po (and
particularly po->running)

Before your patch, the read_lock() done in packet_notifier() was
preventing packet_release() runnning at the same time.

Maybe packet_release() should lock po->bind_lock before manipulating
po->running, avoiding a refcount error.

Something like this preliminary patch :

[PATCH] packet: fix a race in packet_release

packet_release() has a potential race with packet_notifier(NETDEV_DOWN),
leading to a double __sock_put(). (dev_remove_pack() is safe)

Fix is to always use po->bind_lock before accessing po->running

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 10f7295..b706031 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1271,15 +1271,15 @@ static int packet_release(struct socket *sock)
 	 *	Unhook packet receive handler.
 	 */
 
+	spin_lock(&po->bind_lock);
 	if (po->running) {
-		/*
-		 *	Remove the protocol hook
-		 */
-		dev_remove_pack(&po->prot_hook);
+		__sock_put(sk);
 		po->running = 0;
 		po->num = 0;
-		__sock_put(sk);
-	}
+		spin_unlock(&po->bind_lock);
+		dev_remove_pack(&po->prot_hook);
+	} else
+		spin_unlock(&po->bind_lock);
 
 	packet_flush_mclist(sk);
 



^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-02-19 15:16 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20100219054145.959067404@vyatta.com>
2010-02-19  5:41 ` [PATCH 1/2] seq_file: add RCU versions of new hlist/list iterators (v2) Stephen Hemminger
2010-02-19  5:41 ` [PATCH 2/2] packet: convert socket list to RCU Stephen Hemminger
2010-02-19 15:16   ` Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).