* [PATCH 1/3] seq_file: add RCU versions of new hlist/list iterators (v3)
[not found] <20100222175716.900955428@vyatta.com>
@ 2010-02-22 17:57 ` Stephen Hemminger
2010-02-22 23:46 ` David Miller
2010-02-22 17:57 ` [PATCH 2/3] packet: convert socket list to RCU (v3) Stephen Hemminger
2010-02-22 17:57 ` [PATCH 3/3] af_key: locking change Stephen Hemminger
2 siblings, 1 reply; 6+ messages in thread
From: Stephen Hemminger @ 2010-02-22 17:57 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev
[-- Attachment #1: seq_hlist_rcu.patch --]
[-- Type: text/plain, Size: 4142 bytes --]
Many usages of seq_file use RCU protected lists, so non RCU
iterators will not work safely.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
Add comment (same as other RCU functions)
fs/seq_file.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++
include/linux/rculist.h | 5 ++++
include/linux/seq_file.h | 12 +++++++---
3 files changed, 70 insertions(+), 3 deletions(-)
--- a/fs/seq_file.c 2010-02-22 09:17:18.362742798 -0800
+++ b/fs/seq_file.c 2010-02-22 09:20:54.481180584 -0800
@@ -750,3 +750,74 @@ struct hlist_node *seq_hlist_next(void *
return node->next;
}
EXPORT_SYMBOL(seq_hlist_next);
+
+/**
+ * seq_hlist_start_rcu - start an iteration of a hlist protected by RCU
+ * @head: the head of the hlist
+ * @pos: the start position of the sequence
+ *
+ * Called at seq_file->op->start().
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head,
+ loff_t pos)
+{
+ struct hlist_node *node;
+
+ __hlist_for_each_rcu(node, head)
+ if (pos-- == 0)
+ return node;
+ return NULL;
+}
+EXPORT_SYMBOL(seq_hlist_start_rcu);
+
+/**
+ * seq_hlist_start_head_rcu - start an iteration of a hlist protected by RCU
+ * @head: the head of the hlist
+ * @pos: the start position of the sequence
+ *
+ * Called at seq_file->op->start(). Call this function if you want to
+ * print a header at the top of the output.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head,
+ loff_t pos)
+{
+ if (!pos)
+ return SEQ_START_TOKEN;
+
+ return seq_hlist_start_rcu(head, pos - 1);
+}
+EXPORT_SYMBOL(seq_hlist_start_head_rcu);
+
+/**
+ * seq_hlist_next_rcu - move to the next position of the hlist protected by RCU
+ * @v: the current iterator
+ * @head: the head of the hlist
+ * @pos: the current posision
+ *
+ * Called at seq_file->op->next().
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+struct hlist_node *seq_hlist_next_rcu(void *v,
+ struct hlist_head *head,
+ loff_t *ppos)
+{
+ struct hlist_node *node = v;
+
+ ++*ppos;
+ if (v == SEQ_START_TOKEN)
+ return rcu_dereference(head->first);
+ else
+ return rcu_dereference(node->next);
+}
+EXPORT_SYMBOL(seq_hlist_next_rcu);
--- a/include/linux/seq_file.h 2010-02-22 09:17:18.382742869 -0800
+++ b/include/linux/seq_file.h 2010-02-22 09:21:10.841180579 -0800
@@ -140,10 +140,17 @@ extern struct list_head *seq_list_next(v
*/
extern struct hlist_node *seq_hlist_start(struct hlist_head *head,
- loff_t pos);
+ loff_t pos);
extern struct hlist_node *seq_hlist_start_head(struct hlist_head *head,
- loff_t pos);
+ loff_t pos);
extern struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head,
- loff_t *ppos);
+ loff_t *ppos);
+extern struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head,
+ loff_t pos);
+extern struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head,
+ loff_t pos);
+extern struct hlist_node *seq_hlist_next_rcu(void *v,
+ struct hlist_head *head,
+ loff_t *ppos);
#endif
--- a/include/linux/rculist.h 2010-02-22 09:17:18.372742980 -0800
+++ b/include/linux/rculist.h 2010-02-22 09:17:26.771810143 -0800
@@ -406,6 +406,11 @@ static inline void hlist_add_after_rcu(s
n->next->pprev = &n->next;
}
+#define __hlist_for_each_rcu(pos, head) \
+ for (pos = rcu_dereference((head)->first); \
+ pos && ({ prefetch(pos->next); 1; }); \
+ pos = rcu_dereference(pos->next))
+
/**
* hlist_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
--
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 2/3] packet: convert socket list to RCU (v3)
[not found] <20100222175716.900955428@vyatta.com>
2010-02-22 17:57 ` [PATCH 1/3] seq_file: add RCU versions of new hlist/list iterators (v3) Stephen Hemminger
@ 2010-02-22 17:57 ` Stephen Hemminger
2010-02-22 23:46 ` David Miller
2010-02-22 17:57 ` [PATCH 3/3] af_key: locking change Stephen Hemminger
2 siblings, 1 reply; 6+ messages in thread
From: Stephen Hemminger @ 2010-02-22 17:57 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev
[-- Attachment #1: packet-list-rcu.patch --]
[-- Type: text/plain, Size: 6199 bytes --]
Convert AF_PACKET to use RCU, eliminating one more reader/writer lock.
There is no need for a real sk_del_node_init_rcu(), because sk_del_node_init
is doing the equivalent thing to hlst_del_init_rcu already; but added
some comments to try and make that obvious.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
---
include/net/netns/packet.h | 4 ++--
include/net/sock.h | 10 ++++++++++
net/packet/af_packet.c | 42 ++++++++++++++++++++----------------------
3 files changed, 32 insertions(+), 24 deletions(-)
--- a/include/net/netns/packet.h 2010-02-22 09:07:28.562430023 -0800
+++ b/include/net/netns/packet.h 2010-02-22 09:11:00.042269423 -0800
@@ -4,11 +4,11 @@
#ifndef __NETNS_PACKET_H__
#define __NETNS_PACKET_H__
-#include <linux/list.h>
+#include <linux/rculist.h>
#include <linux/spinlock.h>
struct netns_packet {
- rwlock_t sklist_lock;
+ spinlock_t sklist_lock;
struct hlist_head sklist;
};
--- a/net/packet/af_packet.c 2010-02-22 09:07:28.551185873 -0800
+++ b/net/packet/af_packet.c 2010-02-22 09:11:00.063367114 -0800
@@ -1262,24 +1262,22 @@ static int packet_release(struct socket
net = sock_net(sk);
po = pkt_sk(sk);
- write_lock_bh(&net->packet.sklist_lock);
- sk_del_node_init(sk);
+ spin_lock_bh(&net->packet.sklist_lock);
+ sk_del_node_init_rcu(sk);
sock_prot_inuse_add(net, sk->sk_prot, -1);
- write_unlock_bh(&net->packet.sklist_lock);
-
- /*
- * Unhook packet receive handler.
- */
+ spin_unlock_bh(&net->packet.sklist_lock);
+ spin_lock(&po->bind_lock);
if (po->running) {
/*
- * Remove the protocol hook
+ * Remove from protocol table
*/
- dev_remove_pack(&po->prot_hook);
po->running = 0;
po->num = 0;
+ __dev_remove_pack(&po->prot_hook);
__sock_put(sk);
}
+ spin_unlock(&po->bind_lock);
packet_flush_mclist(sk);
@@ -1291,10 +1289,10 @@ static int packet_release(struct socket
if (po->tx_ring.pg_vec)
packet_set_ring(sk, &req, 1, 1);
+ synchronize_net();
/*
* Now the socket is dead. No more input will appear.
*/
-
sock_orphan(sk);
sock->sk = NULL;
@@ -1478,10 +1476,11 @@ static int packet_create(struct net *net
po->running = 1;
}
- write_lock_bh(&net->packet.sklist_lock);
- sk_add_node(sk, &net->packet.sklist);
+ spin_lock_bh(&net->packet.sklist_lock);
+ sk_add_node_rcu(sk, &net->packet.sklist);
sock_prot_inuse_add(net, &packet_proto, 1);
- write_unlock_bh(&net->packet.sklist_lock);
+ spin_unlock_bh(&net->packet.sklist_lock);
+
return 0;
out:
return err;
@@ -2075,8 +2074,8 @@ static int packet_notifier(struct notifi
struct net_device *dev = data;
struct net *net = dev_net(dev);
- read_lock(&net->packet.sklist_lock);
- sk_for_each(sk, node, &net->packet.sklist) {
+ rcu_read_lock();
+ sk_for_each_rcu(sk, node, &net->packet.sklist) {
struct packet_sock *po = pkt_sk(sk);
switch (msg) {
@@ -2104,18 +2103,19 @@ static int packet_notifier(struct notifi
}
break;
case NETDEV_UP:
- spin_lock(&po->bind_lock);
- if (dev->ifindex == po->ifindex && po->num &&
- !po->running) {
- dev_add_pack(&po->prot_hook);
- sock_hold(sk);
- po->running = 1;
+ if (dev->ifindex == po->ifindex) {
+ spin_lock(&po->bind_lock);
+ if (po->num && !po->running) {
+ dev_add_pack(&po->prot_hook);
+ sock_hold(sk);
+ po->running = 1;
+ }
+ spin_unlock(&po->bind_lock);
}
- spin_unlock(&po->bind_lock);
break;
}
}
- read_unlock(&net->packet.sklist_lock);
+ rcu_read_unlock();
return NOTIFY_DONE;
}
@@ -2512,24 +2512,24 @@ static struct notifier_block packet_netd
#ifdef CONFIG_PROC_FS
static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(seq_file_net(seq)->packet.sklist_lock)
+ __acquires(RCU)
{
struct net *net = seq_file_net(seq);
- read_lock(&net->packet.sklist_lock);
- return seq_hlist_start_head(&net->packet.sklist, *pos);
+
+ rcu_read_lock();
+ return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
}
static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct net *net = seq_file_net(seq);
- return seq_hlist_next(v, &net->packet.sklist, pos);
+ return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
}
static void packet_seq_stop(struct seq_file *seq, void *v)
- __releases(seq_file_net(seq)->packet.sklist_lock)
+ __releases(RCU)
{
- struct net *net = seq_file_net(seq);
- read_unlock(&net->packet.sklist_lock);
+ rcu_read_unlock();
}
static int packet_seq_show(struct seq_file *seq, void *v)
@@ -2581,7 +2581,7 @@ static const struct file_operations pack
static int __net_init packet_net_init(struct net *net)
{
- rwlock_init(&net->packet.sklist_lock);
+ spin_lock_init(&net->packet.sklist_lock);
INIT_HLIST_HEAD(&net->packet.sklist);
if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
--- a/include/net/sock.h 2010-02-22 09:07:28.572429848 -0800
+++ b/include/net/sock.h 2010-02-22 09:11:00.063367114 -0800
@@ -381,6 +381,7 @@ static __inline__ void __sk_del_node(str
__hlist_del(&sk->sk_node);
}
+/* NB: equivalent to hlist_del_init_rcu */
static __inline__ int __sk_del_node_init(struct sock *sk)
{
if (sk_hashed(sk)) {
@@ -421,6 +422,7 @@ static __inline__ int sk_del_node_init(s
}
return rc;
}
+#define sk_del_node_init_rcu(sk) sk_del_node_init(sk)
static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk)
{
@@ -454,6 +456,12 @@ static __inline__ void sk_add_node(struc
__sk_add_node(sk, list);
}
+static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
+{
+ sock_hold(sk);
+ hlist_add_head_rcu(&sk->sk_node, list);
+}
+
static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
@@ -478,6 +486,8 @@ static __inline__ void sk_add_bind_node(
#define sk_for_each(__sk, node, list) \
hlist_for_each_entry(__sk, node, list, sk_node)
+#define sk_for_each_rcu(__sk, node, list) \
+ hlist_for_each_entry_rcu(__sk, node, list, sk_node)
#define sk_nulls_for_each(__sk, node, list) \
hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
#define sk_nulls_for_each_rcu(__sk, node, list) \
--
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 3/3] af_key: locking change
[not found] <20100222175716.900955428@vyatta.com>
2010-02-22 17:57 ` [PATCH 1/3] seq_file: add RCU versions of new hlist/list iterators (v3) Stephen Hemminger
2010-02-22 17:57 ` [PATCH 2/3] packet: convert socket list to RCU (v3) Stephen Hemminger
@ 2010-02-22 17:57 ` Stephen Hemminger
2010-02-22 23:46 ` David Miller
2 siblings, 1 reply; 6+ messages in thread
From: Stephen Hemminger @ 2010-02-22 17:57 UTC (permalink / raw)
To: David S. Miller; +Cc: netdev
[-- Attachment #1: pfkey-rcu.patch --]
[-- Type: text/plain, Size: 4076 bytes --]
Get rid of custom locking that was using wait queue, lock, and atomic
to basically build a queued mutex. Use RCU for read side.
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
--- a/net/key/af_key.c 2010-02-22 09:19:03.242742827 -0800
+++ b/net/key/af_key.c 2010-02-22 09:22:53.921492802 -0800
@@ -41,9 +41,7 @@ struct netns_pfkey {
struct hlist_head table;
atomic_t socks_nr;
};
-static DECLARE_WAIT_QUEUE_HEAD(pfkey_table_wait);
-static DEFINE_RWLOCK(pfkey_table_lock);
-static atomic_t pfkey_table_users = ATOMIC_INIT(0);
+static DEFINE_MUTEX(pfkey_mutex);
struct pfkey_sock {
/* struct sock must be the first member of struct pfkey_sock */
@@ -108,50 +106,6 @@ static void pfkey_sock_destruct(struct s
atomic_dec(&net_pfkey->socks_nr);
}
-static void pfkey_table_grab(void)
-{
- write_lock_bh(&pfkey_table_lock);
-
- if (atomic_read(&pfkey_table_users)) {
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue_exclusive(&pfkey_table_wait, &wait);
- for(;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (atomic_read(&pfkey_table_users) == 0)
- break;
- write_unlock_bh(&pfkey_table_lock);
- schedule();
- write_lock_bh(&pfkey_table_lock);
- }
-
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&pfkey_table_wait, &wait);
- }
-}
-
-static __inline__ void pfkey_table_ungrab(void)
-{
- write_unlock_bh(&pfkey_table_lock);
- wake_up(&pfkey_table_wait);
-}
-
-static __inline__ void pfkey_lock_table(void)
-{
- /* read_lock() synchronizes us to pfkey_table_grab */
-
- read_lock(&pfkey_table_lock);
- atomic_inc(&pfkey_table_users);
- read_unlock(&pfkey_table_lock);
-}
-
-static __inline__ void pfkey_unlock_table(void)
-{
- if (atomic_dec_and_test(&pfkey_table_users))
- wake_up(&pfkey_table_wait);
-}
-
-
static const struct proto_ops pfkey_ops;
static void pfkey_insert(struct sock *sk)
@@ -159,16 +113,16 @@ static void pfkey_insert(struct sock *sk
struct net *net = sock_net(sk);
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
- pfkey_table_grab();
- sk_add_node(sk, &net_pfkey->table);
- pfkey_table_ungrab();
+ mutex_lock(&pfkey_mutex);
+ sk_add_node_rcu(sk, &net_pfkey->table);
+ mutex_unlock(&pfkey_mutex);
}
static void pfkey_remove(struct sock *sk)
{
- pfkey_table_grab();
- sk_del_node_init(sk);
- pfkey_table_ungrab();
+ mutex_lock(&pfkey_mutex);
+ sk_del_node_init_rcu(sk);
+ mutex_unlock(&pfkey_mutex);
}
static struct proto key_proto = {
@@ -223,6 +177,8 @@ static int pfkey_release(struct socket *
sock_orphan(sk);
sock->sk = NULL;
skb_queue_purge(&sk->sk_write_queue);
+
+ synchronize_rcu();
sock_put(sk);
return 0;
@@ -277,8 +233,8 @@ static int pfkey_broadcast(struct sk_buf
if (!skb)
return -ENOMEM;
- pfkey_lock_table();
- sk_for_each(sk, node, &net_pfkey->table) {
+ rcu_read_lock();
+ sk_for_each_rcu(sk, node, &net_pfkey->table) {
struct pfkey_sock *pfk = pfkey_sk(sk);
int err2;
@@ -309,7 +265,7 @@ static int pfkey_broadcast(struct sk_buf
if ((broadcast_flags & BROADCAST_REGISTERED) && err)
err = err2;
}
- pfkey_unlock_table();
+ rcu_read_unlock();
if (one_sk != NULL)
err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
@@ -3702,8 +3658,8 @@ static void *pfkey_seq_start(struct seq_
struct net *net = seq_file_net(f);
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
- read_lock(&pfkey_table_lock);
- return seq_hlist_start_head(&net_pfkey->table, *ppos);
+ rcu_read_lock();
+ return seq_hlist_start_head_rcu(&net_pfkey->table, *ppos);
}
static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos)
@@ -3711,12 +3667,12 @@ static void *pfkey_seq_next(struct seq_f
struct net *net = seq_file_net(f);
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
- return seq_hlist_next(v, &net_pfkey->table, ppos);
+ return seq_hlist_next_rcu(v, &net_pfkey->table, ppos);
}
static void pfkey_seq_stop(struct seq_file *f, void *v)
{
- read_unlock(&pfkey_table_lock);
+ rcu_read_unlock();
}
static const struct seq_operations pfkey_seq_ops = {
--
^ permalink raw reply [flat|nested] 6+ messages in thread