* [RFC 0/7] neighbour table lockless read
@ 2006-08-14 21:20 Stephen Hemminger
2006-08-14 21:20 ` [RFC 1/7] net neighbor: convert top level list to RCU Stephen Hemminger
` (6 more replies)
0 siblings, 7 replies; 12+ messages in thread
From: Stephen Hemminger @ 2006-08-14 21:20 UTC (permalink / raw)
To: David Miller; +Cc: netdev
This set of patches changes the neighbour table used by ARP and
IPV6 neighbour discovery to use RCU and seqlock's. This improves
performance by eliminated locked instructions in the common
case.
--
^ permalink raw reply [flat|nested] 12+ messages in thread
* [RFC 1/7] net neighbor: convert top level list to RCU
2006-08-14 21:20 [RFC 0/7] neighbour table lockless read Stephen Hemminger
@ 2006-08-14 21:20 ` Stephen Hemminger
2006-08-14 21:20 ` [RFC 2/7] neighbour: convert neighbour hash table to hlist Stephen Hemminger
` (5 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Stephen Hemminger @ 2006-08-14 21:20 UTC (permalink / raw)
To: David Miller; +Cc: netdev
[-- Attachment #1: neigh-top-lock.patch --]
[-- Type: text/plain, Size: 6769 bytes --]
Change the top level list of neighbor tables to use RCU.
Minor change to BUG() if a neighbor table is registered twice.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
---
include/net/neighbour.h | 2 -
net/core/neighbour.c | 89 ++++++++++++++++++++++--------------------------
2 files changed, 42 insertions(+), 49 deletions(-)
--- net-2.6.19.orig/net/core/neighbour.c
+++ net-2.6.19/net/core/neighbour.c
@@ -61,7 +61,7 @@ static void neigh_app_notify(struct neig
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
-static struct neigh_table *neigh_tables;
+static LIST_HEAD(neigh_tables);
#ifdef CONFIG_PROC_FS
static struct file_operations neigh_stat_seq_fops;
#endif
@@ -93,11 +93,11 @@ static struct file_operations neigh_stat
It is supposed, that dev->hard_header is simplistic and does
not make callbacks to neighbour tables.
- The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
- list of neighbour tables. This list is used only in process context,
+ The last lock is neigh_tbl_lock. It controls update to the
+ set of neighbor tables. Typically used during initialization
*/
-static DEFINE_RWLOCK(neigh_tbl_lock);
+static DEFINE_SPINLOCK(neigh_tbl_lock);
static int neigh_blackhole(struct sk_buff *skb)
{
@@ -1391,26 +1391,18 @@ void neigh_table_init(struct neigh_table
struct neigh_table *tmp;
neigh_table_init_no_netlink(tbl);
- write_lock(&neigh_tbl_lock);
- for (tmp = neigh_tables; tmp; tmp = tmp->next) {
- if (tmp->family == tbl->family)
- break;
- }
- tbl->next = neigh_tables;
- neigh_tables = tbl;
- write_unlock(&neigh_tbl_lock);
-
- if (unlikely(tmp)) {
- printk(KERN_ERR "NEIGH: Registering multiple tables for "
- "family %d\n", tbl->family);
- dump_stack();
+
+ spin_lock(&neigh_tbl_lock);
+ list_for_each_entry(tmp, &neigh_tables, list) {
+ BUG_ON(tmp->family == tbl->family);
}
+
+ list_add_rcu(&tbl->list, &neigh_tables);
+ spin_unlock(&neigh_tbl_lock);
}
int neigh_table_clear(struct neigh_table *tbl)
{
- struct neigh_table **tp;
-
/* It is not clean... Fix it to unload IPv6 module safely */
del_timer_sync(&tbl->gc_timer);
del_timer_sync(&tbl->proxy_timer);
@@ -1418,14 +1410,12 @@ int neigh_table_clear(struct neigh_table
neigh_ifdown(tbl, NULL);
if (atomic_read(&tbl->entries))
printk(KERN_CRIT "neighbour leakage\n");
- write_lock(&neigh_tbl_lock);
- for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
- if (*tp == tbl) {
- *tp = tbl->next;
- break;
- }
- }
- write_unlock(&neigh_tbl_lock);
+
+ spin_lock(&neigh_tbl_lock);
+ list_del_rcu(&tbl->list);
+ spin_unlock(&neigh_tbl_lock);
+
+ synchronize_rcu();
neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
tbl->hash_buckets = NULL;
@@ -1460,13 +1450,12 @@ int neigh_delete(struct sk_buff *skb, st
}
}
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(tbl, &neigh_tables, list) {
struct neighbour *neigh;
if (tbl->family != ndm->ndm_family)
continue;
- read_unlock(&neigh_tbl_lock);
if (nla_len(dst_attr) < tbl->key_len)
goto out_dev_put;
@@ -1491,12 +1480,12 @@ int neigh_delete(struct sk_buff *skb, st
neigh_release(neigh);
goto out_dev_put;
}
- read_unlock(&neigh_tbl_lock);
err = -EAFNOSUPPORT;
out_dev_put:
if (dev)
dev_put(dev);
+ rcu_read_unlock();
out:
return err;
}
@@ -1529,15 +1518,14 @@ int neigh_add(struct sk_buff *skb, struc
goto out_dev_put;
}
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(tbl, &neigh_tables, list) {
int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
struct neighbour *neigh;
void *dst, *lladdr;
if (tbl->family != ndm->ndm_family)
continue;
- read_unlock(&neigh_tbl_lock);
if (nla_len(tb[NDA_DST]) < tbl->key_len)
goto out_dev_put;
@@ -1582,12 +1570,12 @@ int neigh_add(struct sk_buff *skb, struc
goto out_dev_put;
}
- read_unlock(&neigh_tbl_lock);
err = -EAFNOSUPPORT;
out_dev_put:
if (dev)
dev_put(dev);
+ rcu_read_unlock();
out:
return err;
}
@@ -1792,8 +1780,8 @@ int neightbl_set(struct sk_buff *skb, st
}
ndtmsg = nlmsg_data(nlh);
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables; tbl; tbl = tbl->next) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(tbl, &neigh_tables, list) {
if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
continue;
@@ -1893,26 +1881,26 @@ int neightbl_set(struct sk_buff *skb, st
errout_tbl_lock:
write_unlock_bh(&tbl->lock);
errout_locked:
- read_unlock(&neigh_tbl_lock);
+ rcu_read_unlock();
errout:
return err;
}
int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
- int family, tidx, nidx = 0;
+ int family, tidx = 0, nidx = 0;
int tbl_skip = cb->args[0];
int neigh_skip = cb->args[1];
struct neigh_table *tbl;
family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
- read_lock(&neigh_tbl_lock);
- for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(tbl, &neigh_tables, list) {
struct neigh_parms *p;
if (tidx < tbl_skip || (family && tbl->family != family))
- continue;
+ goto next_tbl;
if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
@@ -1932,9 +1920,11 @@ int neightbl_dump_info(struct sk_buff *s
}
neigh_skip = 0;
+next_tbl:
+ ++tidx;
}
out:
- read_unlock(&neigh_tbl_lock);
+ rcu_read_unlock();
cb->args[0] = tidx;
cb->args[1] = nidx;
@@ -2024,22 +2014,25 @@ out:
int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
{
struct neigh_table *tbl;
- int t, family, s_t;
+ int t = 0, family, s_t;
+
- read_lock(&neigh_tbl_lock);
family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
s_t = cb->args[0];
- for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(tbl, &neigh_tables, list) {
if (t < s_t || (family && tbl->family != family))
- continue;
+ goto next;
if (t > s_t)
memset(&cb->args[1], 0, sizeof(cb->args) -
sizeof(cb->args[0]));
if (neigh_dump_table(tbl, skb, cb) < 0)
break;
+next:
+ ++t;
}
- read_unlock(&neigh_tbl_lock);
+ rcu_read_unlock();
cb->args[0] = t;
return skb->len;
--- net-2.6.19.orig/include/net/neighbour.h
+++ net-2.6.19/include/net/neighbour.h
@@ -136,7 +136,7 @@ struct pneigh_entry
struct neigh_table
{
- struct neigh_table *next;
+ struct list_head list;
int family;
int entry_size;
int key_len;
--
^ permalink raw reply [flat|nested] 12+ messages in thread
* [RFC 2/7] neighbour: convert neighbour hash table to hlist
2006-08-14 21:20 [RFC 0/7] neighbour table lockless read Stephen Hemminger
2006-08-14 21:20 ` [RFC 1/7] net neighbor: convert top level list to RCU Stephen Hemminger
@ 2006-08-14 21:20 ` Stephen Hemminger
2006-08-14 21:20 ` [RFC 3/7] neighbour: convert pneigh " Stephen Hemminger
` (4 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Stephen Hemminger @ 2006-08-14 21:20 UTC (permalink / raw)
To: David Miller; +Cc: netdev
[-- Attachment #1: hlist1.patch --]
[-- Type: text/plain, Size: 11819 bytes --]
Change the neighbour table hash list to hlist from list.h
to allow for easier later conversion to RCU.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
---
include/net/neighbour.h | 6 -
net/core/neighbour.c | 160 +++++++++++++++++++++++++-----------------------
2 files changed, 88 insertions(+), 78 deletions(-)
--- net-2.6.19.orig/include/net/neighbour.h
+++ net-2.6.19/include/net/neighbour.h
@@ -88,10 +88,10 @@ struct neigh_statistics
struct neighbour
{
- struct neighbour *next;
+ struct hlist_node hlist;
struct neigh_table *tbl;
struct neigh_parms *parms;
- struct net_device *dev;
+ struct net_device *dev;
unsigned long used;
unsigned long confirmed;
unsigned long updated;
@@ -161,7 +161,7 @@ struct neigh_table
unsigned long last_rand;
kmem_cache_t *kmem_cachep;
struct neigh_statistics *stats;
- struct neighbour **hash_buckets;
+ struct hlist_head *hash_buckets;
unsigned int hash_mask;
__u32 hash_rnd;
unsigned int hash_chain_gc;
--- net-2.6.19.orig/net/core/neighbour.c
+++ net-2.6.19/net/core/neighbour.c
@@ -126,10 +126,11 @@ static int neigh_forced_gc(struct neigh_
write_lock_bh(&tbl->lock);
for (i = 0; i <= tbl->hash_mask; i++) {
- struct neighbour *n, **np;
+ struct neighbour *n;
+ struct hlist_node *node, *tmp;
- np = &tbl->hash_buckets[i];
- while ((n = *np) != NULL) {
+ hlist_for_each_entry_safe(n, node, tmp,
+ &tbl->hash_buckets[i], hlist) {
/* Neighbour record may be discarded if:
* - nobody refers to it.
* - it is not permanent
@@ -137,7 +138,7 @@ static int neigh_forced_gc(struct neigh_
write_lock(&n->lock);
if (atomic_read(&n->refcnt) == 1 &&
!(n->nud_state & NUD_PERMANENT)) {
- *np = n->next;
+ hlist_del(&n->hlist);
n->dead = 1;
shrunk = 1;
write_unlock(&n->lock);
@@ -145,7 +146,6 @@ static int neigh_forced_gc(struct neigh_
continue;
}
write_unlock(&n->lock);
- np = &n->next;
}
}
@@ -181,14 +181,15 @@ static void neigh_flush_dev(struct neigh
int i;
for (i = 0; i <= tbl->hash_mask; i++) {
- struct neighbour *n, **np = &tbl->hash_buckets[i];
+ struct hlist_node *node, *tmp;
+ struct neighbour *n;
- while ((n = *np) != NULL) {
- if (dev && n->dev != dev) {
- np = &n->next;
+ hlist_for_each_entry_safe(n, node, tmp,
+ &tbl->hash_buckets[i], hlist) {
+ if (dev && n->dev != dev)
continue;
- }
- *np = n->next;
+
+ hlist_del(&n->hlist);
write_lock(&n->lock);
neigh_del_timer(n);
n->dead = 1;
@@ -279,23 +280,20 @@ out_entries:
goto out;
}
-static struct neighbour **neigh_hash_alloc(unsigned int entries)
+static struct hlist_head *neigh_hash_alloc(unsigned int entries)
{
- unsigned long size = entries * sizeof(struct neighbour *);
- struct neighbour **ret;
+ unsigned long size = entries * sizeof(struct hlist_head);
- if (size <= PAGE_SIZE) {
- ret = kzalloc(size, GFP_ATOMIC);
- } else {
- ret = (struct neighbour **)
+ if (size <= PAGE_SIZE)
+ return kzalloc(size, GFP_ATOMIC);
+ else
+ return (struct hlist_head *)
__get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
- }
- return ret;
}
-static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
+static void neigh_hash_free(struct hlist_head *hash, unsigned int entries)
{
- unsigned long size = entries * sizeof(struct neighbour *);
+ unsigned long size = entries * sizeof(struct hlist_head);
if (size <= PAGE_SIZE)
kfree(hash);
@@ -305,7 +303,7 @@ static void neigh_hash_free(struct neigh
static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
{
- struct neighbour **new_hash, **old_hash;
+ struct hlist_head *new_hash, *old_hash;
unsigned int i, new_hash_mask, old_entries;
NEIGH_CACHE_STAT_INC(tbl, hash_grows);
@@ -321,16 +319,15 @@ static void neigh_hash_grow(struct neigh
get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
for (i = 0; i < old_entries; i++) {
- struct neighbour *n, *next;
+ struct neighbour *n;
+ struct hlist_node *node, *tmp;
- for (n = old_hash[i]; n; n = next) {
+ hlist_for_each_entry_safe(n, node, tmp, &old_hash[i], hlist) {
unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
hash_val &= new_hash_mask;
- next = n->next;
-
- n->next = new_hash[hash_val];
- new_hash[hash_val] = n;
+ hlist_del(&n->hlist);
+ hlist_add_head(&n->hlist, &new_hash[hash_val]);
}
}
tbl->hash_buckets = new_hash;
@@ -343,19 +340,22 @@ struct neighbour *neigh_lookup(struct ne
struct net_device *dev)
{
struct neighbour *n;
+ struct hlist_node *tmp;
int key_len = tbl->key_len;
u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
NEIGH_CACHE_STAT_INC(tbl, lookups);
read_lock_bh(&tbl->lock);
- for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+ hlist_for_each_entry(n, tmp, &tbl->hash_buckets[hash_val], hlist) {
if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
neigh_hold(n);
NEIGH_CACHE_STAT_INC(tbl, hits);
- break;
+ goto found;
}
}
+ n = NULL;
+found:
read_unlock_bh(&tbl->lock);
return n;
}
@@ -363,19 +363,22 @@ struct neighbour *neigh_lookup(struct ne
struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
{
struct neighbour *n;
+ struct hlist_node *tmp;
int key_len = tbl->key_len;
u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask;
NEIGH_CACHE_STAT_INC(tbl, lookups);
read_lock_bh(&tbl->lock);
- for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
+ hlist_for_each_entry(n, tmp, &tbl->hash_buckets[hash_val], hlist) {
if (!memcmp(n->primary_key, pkey, key_len)) {
neigh_hold(n);
NEIGH_CACHE_STAT_INC(tbl, hits);
- break;
+ goto found;
}
}
+ n = NULL;
+found:
read_unlock_bh(&tbl->lock);
return n;
}
@@ -387,6 +390,7 @@ struct neighbour *neigh_create(struct ne
int key_len = tbl->key_len;
int error;
struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+ struct hlist_node *tmp;
if (!n) {
rc = ERR_PTR(-ENOBUFS);
@@ -424,7 +428,7 @@ struct neighbour *neigh_create(struct ne
goto out_tbl_unlock;
}
- for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
+ hlist_for_each_entry(n1, tmp, &tbl->hash_buckets[hash_val], hlist) {
if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
neigh_hold(n1);
rc = n1;
@@ -432,8 +436,7 @@ struct neighbour *neigh_create(struct ne
}
}
- n->next = tbl->hash_buckets[hash_val];
- tbl->hash_buckets[hash_val] = n;
+ hlist_add_head(&n->hlist, &tbl->hash_buckets[hash_val]);
n->dead = 0;
neigh_hold(n);
write_unlock_bh(&tbl->lock);
@@ -635,7 +638,9 @@ static void neigh_connect(struct neighbo
static void neigh_periodic_timer(unsigned long arg)
{
struct neigh_table *tbl = (struct neigh_table *)arg;
- struct neighbour *n, **np;
+ struct neighbour *n;
+ struct hlist_head *head;
+ struct hlist_node *node, *tmp;
unsigned long expire, now = jiffies;
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
@@ -654,19 +659,17 @@ static void neigh_periodic_timer(unsigne
neigh_rand_reach_time(p->base_reachable_time);
}
- np = &tbl->hash_buckets[tbl->hash_chain_gc];
+ head = &tbl->hash_buckets[tbl->hash_chain_gc];
tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
- while ((n = *np) != NULL) {
+ hlist_for_each_entry_safe(n, node, tmp, head, hlist) {
unsigned int state;
write_lock(&n->lock);
state = n->nud_state;
- if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
- write_unlock(&n->lock);
+ if (state & (NUD_PERMANENT | NUD_IN_TIMER))
goto next_elt;
- }
if (time_before(n->used, n->confirmed))
n->used = n->confirmed;
@@ -674,16 +677,14 @@ static void neigh_periodic_timer(unsigne
if (atomic_read(&n->refcnt) == 1 &&
(state == NUD_FAILED ||
time_after(now, n->used + n->parms->gc_staletime))) {
- *np = n->next;
+ hlist_del(&n->hlist);
n->dead = 1;
write_unlock(&n->lock);
neigh_release(n);
continue;
}
+ next_elt:
write_unlock(&n->lock);
-
-next_elt:
- np = &n->next;
}
/* Cycle through all hash buckets every base_reachable_time/2 ticks.
@@ -1980,27 +1981,29 @@ nla_put_failure:
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct neighbour *n;
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
for (h = 0; h <= tbl->hash_mask; h++) {
+ struct neighbour *n;
+ struct hlist_node *tmp;
+
if (h < s_h)
continue;
if (h > s_h)
s_idx = 0;
read_lock_bh(&tbl->lock);
- for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
- if (idx < s_idx)
- continue;
- if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+ idx = 0;
+ hlist_for_each_entry(n, tmp, &tbl->hash_buckets[h], hlist) {
+ if (idx >= s_idx &&
+ neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- NLM_F_MULTI) <= 0) {
+ RTM_NEWNEIGH, NLM_F_MULTI) <= 0) {
read_unlock_bh(&tbl->lock);
rc = -1;
goto out;
}
+ ++idx;
}
read_unlock_bh(&tbl->lock);
}
@@ -2044,10 +2047,10 @@ void neigh_for_each(struct neigh_table *
read_lock_bh(&tbl->lock);
for (chain = 0; chain <= tbl->hash_mask; chain++) {
- struct neighbour *n;
+ struct hlist_node *p;
- for (n = tbl->hash_buckets[chain]; n; n = n->next)
- cb(n, cookie);
+ hlist_for_each(p, &tbl->hash_buckets[chain])
+ cb(hlist_entry(p, struct neighbour, hlist), cookie);
}
read_unlock_bh(&tbl->lock);
}
@@ -2060,19 +2063,19 @@ void __neigh_for_each_release(struct nei
int chain;
for (chain = 0; chain <= tbl->hash_mask; chain++) {
- struct neighbour *n, **np;
+ struct neighbour *n;
+ struct hlist_node *tmp, *next;
- np = &tbl->hash_buckets[chain];
- while ((n = *np) != NULL) {
+ hlist_for_each_entry_safe(n, tmp, next,
+ &tbl->hash_buckets[chain], hlist) {
int release;
write_lock(&n->lock);
release = cb(n);
if (release) {
- *np = n->next;
+ hlist_del(&n->hlist);
n->dead = 1;
- } else
- np = &n->next;
+ }
write_unlock(&n->lock);
if (release)
neigh_release(n);
@@ -2092,33 +2095,39 @@ static struct neighbour *neigh_get_first
state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
- n = tbl->hash_buckets[bucket];
+ struct hlist_node *tmp;
- while (n) {
+ hlist_for_each_entry(n, tmp, &tbl->hash_buckets[bucket], hlist) {
if (state->neigh_sub_iter) {
loff_t fakep = 0;
void *v;
v = state->neigh_sub_iter(state, n, &fakep);
if (!v)
- goto next;
+ continue;
}
+
if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
- break;
+ goto found;
if (n->nud_state & ~NUD_NOARP)
- break;
- next:
- n = n->next;
+ goto found;
}
-
- if (n)
- break;
+ n = NULL;
}
+found:
state->bucket = bucket;
return n;
}
+static struct neighbour *next_neigh(struct hlist_node *node)
+{
+ if (node)
+ return hlist_entry(node, struct neighbour, hlist);
+ else
+ return NULL;
+}
+
static struct neighbour *neigh_get_next(struct seq_file *seq,
struct neighbour *n,
loff_t *pos)
@@ -2131,7 +2140,8 @@ static struct neighbour *neigh_get_next(
if (v)
return n;
}
- n = n->next;
+
+ n = next_neigh(n->hlist.next);
while (1) {
while (n) {
@@ -2147,7 +2157,7 @@ static struct neighbour *neigh_get_next(
if (n->nud_state & ~NUD_NOARP)
break;
next:
- n = n->next;
+ n = next_neigh(n->hlist.next);
}
if (n)
@@ -2156,7 +2166,7 @@ static struct neighbour *neigh_get_next(
if (++state->bucket > tbl->hash_mask)
break;
- n = tbl->hash_buckets[state->bucket];
+ n = next_neigh(tbl->hash_buckets[state->bucket].first);
}
if (n && pos)
--
^ permalink raw reply [flat|nested] 12+ messages in thread
* [RFC 3/7] neighbour: convert pneigh hash table to hlist
2006-08-14 21:20 [RFC 0/7] neighbour table lockless read Stephen Hemminger
2006-08-14 21:20 ` [RFC 1/7] net neighbor: convert top level list to RCU Stephen Hemminger
2006-08-14 21:20 ` [RFC 2/7] neighbour: convert neighbour hash table to hlist Stephen Hemminger
@ 2006-08-14 21:20 ` Stephen Hemminger
2006-08-14 21:20 ` [RFC 4/7] net neighbour: convert to RCU Stephen Hemminger
` (3 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Stephen Hemminger @ 2006-08-14 21:20 UTC (permalink / raw)
To: David Miller; +Cc: netdev
[-- Attachment #1: hlist2.patch --]
[-- Type: text/plain, Size: 5335 bytes --]
Change the pneigh_entry table to hlist from list.h
to allow for easier later conversion to RCU.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
---
include/net/neighbour.h | 6 ++--
net/core/neighbour.c | 58 ++++++++++++++++++++++++------------------------
2 files changed, 33 insertions(+), 31 deletions(-)
--- net-2.6.19.orig/include/net/neighbour.h
+++ net-2.6.19/include/net/neighbour.h
@@ -124,8 +124,8 @@ struct neigh_ops
struct pneigh_entry
{
- struct pneigh_entry *next;
- struct net_device *dev;
+ struct hlist_node hlist;
+ struct net_device *dev;
u8 key[0];
};
@@ -165,7 +165,7 @@ struct neigh_table
unsigned int hash_mask;
__u32 hash_rnd;
unsigned int hash_chain_gc;
- struct pneigh_entry **phash_buckets;
+ struct hlist_head *phash_buckets;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *pde;
#endif
--- net-2.6.19.orig/net/core/neighbour.c
+++ net-2.6.19/net/core/neighbour.c
@@ -455,6 +455,7 @@ struct pneigh_entry * pneigh_lookup(stru
struct net_device *dev, int creat)
{
struct pneigh_entry *n;
+ struct hlist_node *tmp;
int key_len = tbl->key_len;
u32 hash_val = *(u32 *)(pkey + key_len - 4);
@@ -465,7 +466,7 @@ struct pneigh_entry * pneigh_lookup(stru
read_lock_bh(&tbl->lock);
- for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+ hlist_for_each_entry(n, tmp, &tbl->phash_buckets[hash_val], hlist) {
if (!memcmp(n->key, pkey, key_len) &&
(n->dev == dev || !n->dev)) {
read_unlock_bh(&tbl->lock);
@@ -495,8 +496,7 @@ struct pneigh_entry * pneigh_lookup(stru
}
write_lock_bh(&tbl->lock);
- n->next = tbl->phash_buckets[hash_val];
- tbl->phash_buckets[hash_val] = n;
+ hlist_add_head(&n->hlist, &tbl->phash_buckets[hash_val]);
write_unlock_bh(&tbl->lock);
out:
return n;
@@ -506,7 +506,8 @@ out:
int pneigh_delete(struct neigh_table *tbl, const void *pkey,
struct net_device *dev)
{
- struct pneigh_entry *n, **np;
+ struct pneigh_entry *n;
+ struct hlist_node *tmp;
int key_len = tbl->key_len;
u32 hash_val = *(u32 *)(pkey + key_len - 4);
@@ -516,10 +517,9 @@ int pneigh_delete(struct neigh_table *tb
hash_val &= PNEIGH_HASHMASK;
write_lock_bh(&tbl->lock);
- for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
- np = &n->next) {
+ hlist_for_each_entry(n, tmp, &tbl->phash_buckets[hash_val], hlist) {
if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
- *np = n->next;
+ hlist_del(&n->hlist);
write_unlock_bh(&tbl->lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
@@ -535,22 +535,21 @@ int pneigh_delete(struct neigh_table *tb
static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
- struct pneigh_entry *n, **np;
u32 h;
for (h = 0; h <= PNEIGH_HASHMASK; h++) {
- np = &tbl->phash_buckets[h];
- while ((n = *np) != NULL) {
+ struct pneigh_entry *n;
+ struct hlist_node *tmp, *nxt;
+
+ hlist_for_each_entry_safe(n, tmp, nxt, &tbl->phash_buckets[h], hlist) {
if (!dev || n->dev == dev) {
- *np = n->next;
+ hlist_del(&n->hlist);
if (tbl->pdestructor)
tbl->pdestructor(n);
if (n->dev)
dev_put(n->dev);
kfree(n);
- continue;
}
- np = &n->next;
}
}
return -ENOENT;
@@ -1332,7 +1331,6 @@ void neigh_parms_destroy(struct neigh_pa
void neigh_table_init_no_netlink(struct neigh_table *tbl)
{
unsigned long now = jiffies;
- unsigned long phsize;
atomic_set(&tbl->parms.refcnt, 1);
INIT_RCU_HEAD(&tbl->parms.rcu_head);
@@ -1363,8 +1361,8 @@ void neigh_table_init_no_netlink(struct
tbl->hash_mask = 1;
tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
- phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
- tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
+ tbl->phash_buckets = kcalloc(PNEIGH_HASHMASK + 1, sizeof(struct hlist_head),
+ GFP_KERNEL);
if (!tbl->hash_buckets || !tbl->phash_buckets)
panic("cannot allocate neighbour cache hashes");
@@ -2192,18 +2190,18 @@ static struct pneigh_entry *pneigh_get_f
{
struct neigh_seq_state *state = seq->private;
struct neigh_table *tbl = state->tbl;
- struct pneigh_entry *pn = NULL;
+ struct hlist_node *pn = NULL;
int bucket = state->bucket;
state->flags |= NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
- pn = tbl->phash_buckets[bucket];
+ pn = tbl->phash_buckets[bucket].first;
if (pn)
break;
}
state->bucket = bucket;
- return pn;
+ return pn ? hlist_entry(pn, struct pneigh_entry, hlist) : NULL;
}
static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
@@ -2212,20 +2210,24 @@ static struct pneigh_entry *pneigh_get_n
{
struct neigh_seq_state *state = seq->private;
struct neigh_table *tbl = state->tbl;
+ struct hlist_node *tmp = &pn->hlist;
- pn = pn->next;
- while (!pn) {
- if (++state->bucket > PNEIGH_HASHMASK)
- break;
- pn = tbl->phash_buckets[state->bucket];
- if (pn)
- break;
+ tmp = tmp->next;
+ if (tmp)
+ goto found;
+
+ while (++state->bucket < PNEIGH_HASHMASK) {
+ tmp = tbl->phash_buckets[state->bucket].first;
+ if (tmp)
+ goto found;
}
+ return NULL;
- if (pn && pos)
+found:
+ if (pos)
--(*pos);
- return pn;
+ return hlist_entry(tmp, struct pneigh_entry, hlist);
}
static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
--
^ permalink raw reply [flat|nested] 12+ messages in thread
* [RFC 4/7] net neighbour: convert to RCU
2006-08-14 21:20 [RFC 0/7] neighbour table lockless read Stephen Hemminger
` (2 preceding siblings ...)
2006-08-14 21:20 ` [RFC 3/7] neighbour: convert pneigh " Stephen Hemminger
@ 2006-08-14 21:20 ` Stephen Hemminger
2006-08-14 21:20 ` [RFC 5/7] neighbour: convert lookup to sequence lock Stephen Hemminger
` (2 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Stephen Hemminger @ 2006-08-14 21:20 UTC (permalink / raw)
To: David Miller; +Cc: netdev
[-- Attachment #1: rcu-neigh.patch --]
[-- Type: text/plain, Size: 15234 bytes --]
Use RCU to allow for lock less access to the neighbour table.
This should speedup the send path because no atomic operations
will be needed to lookup ARP entries, etc.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
---
include/net/neighbour.h | 4 -
net/core/neighbour.c | 158 +++++++++++++++++++++++++-----------------------
2 files changed, 87 insertions(+), 75 deletions(-)
--- net-2.6.19.orig/include/net/neighbour.h
+++ net-2.6.19/include/net/neighbour.h
@@ -108,6 +108,7 @@ struct neighbour
struct sk_buff_head arp_queue;
struct timer_list timer;
struct neigh_ops *ops;
+ struct rcu_head rcu;
u8 primary_key[0];
};
@@ -126,6 +127,7 @@ struct pneigh_entry
{
struct hlist_node hlist;
struct net_device *dev;
+ struct rcu_head rcu;
u8 key[0];
};
@@ -157,7 +159,7 @@ struct neigh_table
struct timer_list proxy_timer;
struct sk_buff_head proxy_queue;
atomic_t entries;
- rwlock_t lock;
+ spinlock_t lock;
unsigned long last_rand;
kmem_cache_t *kmem_cachep;
struct neigh_statistics *stats;
--- net-2.6.19.orig/net/core/neighbour.c
+++ net-2.6.19/net/core/neighbour.c
@@ -67,9 +67,10 @@ static struct file_operations neigh_stat
#endif
/*
- Neighbour hash table buckets are protected with rwlock tbl->lock.
+ Neighbour hash table buckets are protected with lock tbl->lock.
- - All the scans/updates to hash buckets MUST be made under this lock.
+ - All the scans of hash buckes must be made with RCU read lock (nopreempt)
+ - updates to hash buckets MUST be made under this lock.
- NOTHING clever should be made under this lock: no callbacks
to protocol backends, no attempts to send something to network.
It will result in deadlocks, if backend/driver wants to use neighbour
@@ -117,6 +118,13 @@ unsigned long neigh_rand_reach_time(unsi
}
+static void neigh_rcu_release(struct rcu_head *head)
+{
+ struct neighbour *neigh = container_of(head, struct neighbour, rcu);
+
+ neigh_release(neigh);
+}
+
static int neigh_forced_gc(struct neigh_table *tbl)
{
int shrunk = 0;
@@ -124,7 +132,7 @@ static int neigh_forced_gc(struct neigh_
NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
for (i = 0; i <= tbl->hash_mask; i++) {
struct neighbour *n;
struct hlist_node *node, *tmp;
@@ -138,11 +146,11 @@ static int neigh_forced_gc(struct neigh_
write_lock(&n->lock);
if (atomic_read(&n->refcnt) == 1 &&
!(n->nud_state & NUD_PERMANENT)) {
- hlist_del(&n->hlist);
+ hlist_del_rcu(&n->hlist);
n->dead = 1;
shrunk = 1;
write_unlock(&n->lock);
- neigh_release(n);
+ call_rcu(&n->rcu, neigh_rcu_release);
continue;
}
write_unlock(&n->lock);
@@ -151,7 +159,7 @@ static int neigh_forced_gc(struct neigh_
tbl->last_flush = jiffies;
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
return shrunk;
}
@@ -189,7 +197,7 @@ static void neigh_flush_dev(struct neigh
if (dev && n->dev != dev)
continue;
- hlist_del(&n->hlist);
+ hlist_del_rcu(&n->hlist);
write_lock(&n->lock);
neigh_del_timer(n);
n->dead = 1;
@@ -220,17 +228,17 @@ static void neigh_flush_dev(struct neigh
void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
{
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
neigh_flush_dev(tbl, dev);
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
}
int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
{
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
neigh_flush_dev(tbl, dev);
pneigh_ifdown(tbl, dev);
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
del_timer_sync(&tbl->proxy_timer);
pneigh_queue_purge(&tbl->proxy_queue);
@@ -326,8 +334,8 @@ static void neigh_hash_grow(struct neigh
unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
hash_val &= new_hash_mask;
- hlist_del(&n->hlist);
- hlist_add_head(&n->hlist, &new_hash[hash_val]);
+ __hlist_del(&n->hlist);
+ hlist_add_head_rcu(&n->hlist, &new_hash[hash_val]);
}
}
tbl->hash_buckets = new_hash;
@@ -346,8 +354,8 @@ struct neighbour *neigh_lookup(struct ne
NEIGH_CACHE_STAT_INC(tbl, lookups);
- read_lock_bh(&tbl->lock);
- hlist_for_each_entry(n, tmp, &tbl->hash_buckets[hash_val], hlist) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(n, tmp, &tbl->hash_buckets[hash_val], hlist) {
if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
neigh_hold(n);
NEIGH_CACHE_STAT_INC(tbl, hits);
@@ -356,7 +364,7 @@ struct neighbour *neigh_lookup(struct ne
}
n = NULL;
found:
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
return n;
}
@@ -369,8 +377,8 @@ struct neighbour *neigh_lookup_nodev(str
NEIGH_CACHE_STAT_INC(tbl, lookups);
- read_lock_bh(&tbl->lock);
- hlist_for_each_entry(n, tmp, &tbl->hash_buckets[hash_val], hlist) {
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(n, tmp, &tbl->hash_buckets[hash_val], hlist) {
if (!memcmp(n->primary_key, pkey, key_len)) {
neigh_hold(n);
NEIGH_CACHE_STAT_INC(tbl, hits);
@@ -379,7 +387,7 @@ struct neighbour *neigh_lookup_nodev(str
}
n = NULL;
found:
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
return n;
}
@@ -416,7 +424,7 @@ struct neighbour *neigh_create(struct ne
n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
@@ -436,21 +444,22 @@ struct neighbour *neigh_create(struct ne
}
}
- hlist_add_head(&n->hlist, &tbl->hash_buckets[hash_val]);
n->dead = 0;
neigh_hold(n);
- write_unlock_bh(&tbl->lock);
+ hlist_add_head_rcu(&n->hlist, &tbl->hash_buckets[hash_val]);
+ spin_unlock_bh(&tbl->lock);
NEIGH_PRINTK2("neigh %p is created.\n", n);
rc = n;
out:
return rc;
out_tbl_unlock:
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
out_neigh_release:
neigh_release(n);
goto out;
}
+/* Assumes rcu_read_lock is held */
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, int creat)
{
@@ -464,16 +473,14 @@ struct pneigh_entry * pneigh_lookup(stru
hash_val ^= hash_val >> 4;
hash_val &= PNEIGH_HASHMASK;
- read_lock_bh(&tbl->lock);
-
- hlist_for_each_entry(n, tmp, &tbl->phash_buckets[hash_val], hlist) {
+ hlist_for_each_entry_rcu(n, tmp, &tbl->phash_buckets[hash_val], hlist) {
if (!memcmp(n->key, pkey, key_len) &&
(n->dev == dev || !n->dev)) {
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
goto out;
}
}
- read_unlock_bh(&tbl->lock);
+
n = NULL;
if (!creat)
goto out;
@@ -495,13 +502,18 @@ struct pneigh_entry * pneigh_lookup(stru
goto out;
}
- write_lock_bh(&tbl->lock);
- hlist_add_head(&n->hlist, &tbl->phash_buckets[hash_val]);
- write_unlock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
+ hlist_add_head_rcu(&n->hlist, &tbl->phash_buckets[hash_val]);
+ spin_unlock_bh(&tbl->lock);
out:
return n;
}
+static void pneigh_destroy(struct rcu_head *head)
+{
+ struct pneigh_entry *n = container_of(head, struct pneigh_entry, rcu);
+ kfree(n);
+}
int pneigh_delete(struct neigh_table *tbl, const void *pkey,
struct net_device *dev)
@@ -516,20 +528,20 @@ int pneigh_delete(struct neigh_table *tb
hash_val ^= hash_val >> 4;
hash_val &= PNEIGH_HASHMASK;
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
hlist_for_each_entry(n, tmp, &tbl->phash_buckets[hash_val], hlist) {
if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
- hlist_del(&n->hlist);
- write_unlock_bh(&tbl->lock);
+ hlist_del_rcu(&n->hlist);
+ spin_unlock_bh(&tbl->lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
if (n->dev)
dev_put(n->dev);
- kfree(n);
+ call_rcu(&n->rcu, pneigh_destroy);
return 0;
}
}
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
return -ENOENT;
}
@@ -543,7 +555,7 @@ static int pneigh_ifdown(struct neigh_ta
hlist_for_each_entry_safe(n, tmp, nxt, &tbl->phash_buckets[h], hlist) {
if (!dev || n->dev == dev) {
- hlist_del(&n->hlist);
+ hlist_del_rcu(&n->hlist);
if (tbl->pdestructor)
tbl->pdestructor(n);
if (n->dev)
@@ -644,7 +656,7 @@ static void neigh_periodic_timer(unsigne
NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
- write_lock(&tbl->lock);
+ spin_lock(&tbl->lock);
/*
* periodically recompute ReachableTime from random function
@@ -676,7 +688,7 @@ static void neigh_periodic_timer(unsigne
if (atomic_read(&n->refcnt) == 1 &&
(state == NUD_FAILED ||
time_after(now, n->used + n->parms->gc_staletime))) {
- hlist_del(&n->hlist);
+ hlist_del_rcu(&n->hlist);
n->dead = 1;
write_unlock(&n->lock);
neigh_release(n);
@@ -697,7 +709,7 @@ static void neigh_periodic_timer(unsigne
mod_timer(&tbl->gc_timer, now + expire);
- write_unlock(&tbl->lock);
+ spin_unlock(&tbl->lock);
}
static __inline__ int neigh_max_probes(struct neighbour *n)
@@ -1285,10 +1297,10 @@ struct neigh_parms *neigh_parms_alloc(st
p->dev = dev;
}
p->sysctl_table = NULL;
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
p->next = tbl->parms.next;
tbl->parms.next = p;
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
}
return p;
}
@@ -1307,19 +1319,19 @@ void neigh_parms_release(struct neigh_ta
if (!parms || parms == &tbl->parms)
return;
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
for (p = &tbl->parms.next; *p; p = &(*p)->next) {
if (*p == parms) {
*p = parms->next;
parms->dead = 1;
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
if (parms->dev)
dev_put(parms->dev);
call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
return;
}
}
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
NEIGH_PRINTK1("neigh_parms_release: not found\n");
}
@@ -1369,7 +1381,7 @@ void neigh_table_init_no_netlink(struct
get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
- rwlock_init(&tbl->lock);
+ spin_lock_init(&tbl->lock);
init_timer(&tbl->gc_timer);
tbl->gc_timer.data = (unsigned long)tbl;
tbl->gc_timer.function = neigh_periodic_timer;
@@ -1624,7 +1636,7 @@ static int neightbl_fill_info(struct sk_
ndtmsg = nlmsg_data(nlh);
- read_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
ndtmsg->ndtm_family = tbl->family;
ndtmsg->ndtm_pad1 = 0;
ndtmsg->ndtm_pad2 = 0;
@@ -1684,11 +1696,11 @@ static int neightbl_fill_info(struct sk_
if (neightbl_fill_parms(skb, &tbl->parms) < 0)
goto nla_put_failure;
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
return nlmsg_end(skb, nlh);
nla_put_failure:
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
return nlmsg_cancel(skb, nlh);
}
@@ -1707,7 +1719,7 @@ static int neightbl_fill_param_info(stru
ndtmsg = nlmsg_data(nlh);
- read_lock_bh(&tbl->lock);
+ rcu_read_lock(); /* this maybe unnecessary */
ndtmsg->ndtm_family = tbl->family;
ndtmsg->ndtm_pad1 = 0;
ndtmsg->ndtm_pad2 = 0;
@@ -1716,10 +1728,10 @@ static int neightbl_fill_param_info(stru
neightbl_fill_parms(skb, parms) < 0)
goto errout;
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
return nlmsg_end(skb, nlh);
errout:
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
return nlmsg_cancel(skb, nlh);
}
@@ -1797,7 +1809,7 @@ int neightbl_set(struct sk_buff *skb, st
* We acquire tbl->lock to be nice to the periodic timers and
* make sure they always see a consistent set of values.
*/
- write_lock_bh(&tbl->lock);
+ spin_lock_bh(&tbl->lock);
if (tb[NDTA_PARMS]) {
struct nlattr *tbp[NDTPA_MAX+1];
@@ -1878,7 +1890,7 @@ int neightbl_set(struct sk_buff *skb, st
err = 0;
errout_tbl_lock:
- write_unlock_bh(&tbl->lock);
+ spin_unlock_bh(&tbl->lock);
errout_locked:
rcu_read_unlock();
errout:
@@ -1894,7 +1906,7 @@ int neightbl_dump_info(struct sk_buff *s
family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
- rcu_read_lock();
+ rcu_read_lock_bh();
list_for_each_entry_rcu(tbl, &neigh_tables, list) {
struct neigh_parms *p;
@@ -1990,20 +2002,20 @@ static int neigh_dump_table(struct neigh
continue;
if (h > s_h)
s_idx = 0;
- read_lock_bh(&tbl->lock);
+ rcu_read_lock();
idx = 0;
- hlist_for_each_entry(n, tmp, &tbl->hash_buckets[h], hlist) {
+ hlist_for_each_entry_rcu(n, tmp, &tbl->hash_buckets[h], hlist) {
if (idx >= s_idx &&
neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH, NLM_F_MULTI) <= 0) {
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
rc = -1;
goto out;
}
++idx;
}
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
}
rc = skb->len;
out:
@@ -2043,14 +2055,15 @@ void neigh_for_each(struct neigh_table *
{
int chain;
- read_lock_bh(&tbl->lock);
+ rcu_read_lock();
for (chain = 0; chain <= tbl->hash_mask; chain++) {
+ struct neighbour *n;
struct hlist_node *p;
- hlist_for_each(p, &tbl->hash_buckets[chain])
- cb(hlist_entry(p, struct neighbour, hlist), cookie);
+ hlist_for_each_entry_rcu(n, p, &tbl->hash_buckets[chain], hlist)
+ cb(n, cookie);
}
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_for_each);
@@ -2071,12 +2084,12 @@ void __neigh_for_each_release(struct nei
write_lock(&n->lock);
release = cb(n);
if (release) {
- hlist_del(&n->hlist);
+ hlist_del_rcu(&n->hlist);
n->dead = 1;
}
write_unlock(&n->lock);
if (release)
- neigh_release(n);
+ call_rcu(&n->rcu, neigh_rcu_release);
}
}
}
@@ -2120,7 +2133,7 @@ found:
static struct neighbour *next_neigh(struct hlist_node *node)
{
- if (node)
+ if (rcu_dereference(node))
return hlist_entry(node, struct neighbour, hlist);
else
return NULL;
@@ -2195,7 +2208,7 @@ static struct pneigh_entry *pneigh_get_f
state->flags |= NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
- pn = tbl->phash_buckets[bucket].first;
+ pn = rcu_dereference(tbl->phash_buckets[bucket].first);
if (pn)
break;
}
@@ -2212,12 +2225,12 @@ static struct pneigh_entry *pneigh_get_n
struct neigh_table *tbl = state->tbl;
struct hlist_node *tmp = &pn->hlist;
- tmp = tmp->next;
+ tmp = rcu_dereference(tmp->next);
if (tmp)
goto found;
while (++state->bucket < PNEIGH_HASHMASK) {
- tmp = tbl->phash_buckets[state->bucket].first;
+ tmp = rcu_dereference(tbl->phash_buckets[state->bucket].first);
if (tmp)
goto found;
}
@@ -2265,7 +2278,7 @@ void *neigh_seq_start(struct seq_file *s
state->bucket = 0;
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
- read_lock_bh(&tbl->lock);
+ rcu_read_lock();
pos_minus_one = *pos - 1;
return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN;
@@ -2301,10 +2314,7 @@ EXPORT_SYMBOL(neigh_seq_next);
void neigh_seq_stop(struct seq_file *seq, void *v)
{
- struct neigh_seq_state *state = seq->private;
- struct neigh_table *tbl = state->tbl;
-
- read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_seq_stop);
--
^ permalink raw reply [flat|nested] 12+ messages in thread
* [RFC 5/7] neighbour: convert lookup to sequence lock
2006-08-14 21:20 [RFC 0/7] neighbour table lockless read Stephen Hemminger
` (3 preceding siblings ...)
2006-08-14 21:20 ` [RFC 4/7] net neighbour: convert to RCU Stephen Hemminger
@ 2006-08-14 21:20 ` Stephen Hemminger
2006-08-14 22:22 ` Thomas Graf
2006-08-14 21:20 ` [RFC 6/7] neighbour: convert hard header cache to sequence number Stephen Hemminger
2006-08-14 21:20 ` [RFC 7/7] neighbour: reduce exports Stephen Hemminger
6 siblings, 1 reply; 12+ messages in thread
From: Stephen Hemminger @ 2006-08-14 21:20 UTC (permalink / raw)
To: David Miller; +Cc: netdev
[-- Attachment #1: neighbour-seqlock.patch --]
[-- Type: text/plain, Size: 17033 bytes --]
The reading of the contents of a neighbour entry can be converted
from a slow reader/writer lock to a fast lockless sequence number check.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
---
include/net/neighbour.h | 2
net/core/neighbour.c | 102 +++++++++++++++++++++++++++---------------------
net/ipv4/arp.c | 101 ++++++++++++++++++++++++++++-------------------
net/ipv6/ndisc.c | 16 +++----
net/ipv6/route.c | 12 ++---
net/sched/sch_teql.c | 11 +++--
6 files changed, 142 insertions(+), 102 deletions(-)
--- net-2.6.19.orig/include/net/neighbour.h
+++ net-2.6.19/include/net/neighbour.h
@@ -100,7 +100,7 @@ struct neighbour
__u8 type;
__u8 dead;
atomic_t probes;
- rwlock_t lock;
+ seqlock_t lock;
unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))];
struct hh_cache *hh;
atomic_t refcnt;
--- net-2.6.19.orig/net/core/neighbour.c
+++ net-2.6.19/net/core/neighbour.c
@@ -143,17 +143,17 @@ static int neigh_forced_gc(struct neigh_
* - nobody refers to it.
* - it is not permanent
*/
- write_lock(&n->lock);
+ write_seqlock(&n->lock);
if (atomic_read(&n->refcnt) == 1 &&
!(n->nud_state & NUD_PERMANENT)) {
hlist_del_rcu(&n->hlist);
n->dead = 1;
shrunk = 1;
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
call_rcu(&n->rcu, neigh_rcu_release);
continue;
}
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
}
}
@@ -198,7 +198,7 @@ static void neigh_flush_dev(struct neigh
continue;
hlist_del_rcu(&n->hlist);
- write_lock(&n->lock);
+ write_seqlock(&n->lock);
neigh_del_timer(n);
n->dead = 1;
@@ -220,7 +220,7 @@ static void neigh_flush_dev(struct neigh
n->nud_state = NUD_NONE;
NEIGH_PRINTK2("neigh %p is stray.\n", n);
}
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
neigh_release(n);
}
}
@@ -267,7 +267,7 @@ static struct neighbour *neigh_alloc(str
memset(n, 0, tbl->entry_size);
skb_queue_head_init(&n->arp_queue);
- rwlock_init(&n->lock);
+ seqlock_init(&n->lock);
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
@@ -615,7 +615,7 @@ void neigh_destroy(struct neighbour *nei
/* Neighbour state is suspicious;
disable fast path.
- Called with write_locked neigh.
+ Called with locked neigh.
*/
static void neigh_suspect(struct neighbour *neigh)
{
@@ -632,7 +632,7 @@ static void neigh_suspect(struct neighbo
/* Neighbour state is OK;
enable fast path.
- Called with write_locked neigh.
+ Called with locked neigh.
*/
static void neigh_connect(struct neighbour *neigh)
{
@@ -676,7 +676,7 @@ static void neigh_periodic_timer(unsigne
hlist_for_each_entry_safe(n, node, tmp, head, hlist) {
unsigned int state;
- write_lock(&n->lock);
+ write_seqlock(&n->lock);
state = n->nud_state;
if (state & (NUD_PERMANENT | NUD_IN_TIMER))
@@ -690,12 +690,12 @@ static void neigh_periodic_timer(unsigne
time_after(now, n->used + n->parms->gc_staletime))) {
hlist_del_rcu(&n->hlist);
n->dead = 1;
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
neigh_release(n);
continue;
}
next_elt:
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
}
/* Cycle through all hash buckets every base_reachable_time/2 ticks.
@@ -738,7 +738,7 @@ static void neigh_timer_handler(unsigned
unsigned state;
int notify = 0;
- write_lock(&neigh->lock);
+ write_seqlock(&neigh->lock);
state = neigh->nud_state;
now = jiffies;
@@ -748,6 +748,7 @@ static void neigh_timer_handler(unsigned
#ifndef CONFIG_SMP
printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
#endif
+ write_sequnlock(&neigh->lock);
goto out;
}
@@ -808,9 +809,9 @@ static void neigh_timer_handler(unsigned
*/
while (neigh->nud_state == NUD_FAILED &&
(skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
- write_unlock(&neigh->lock);
+ write_sequnlock(&neigh->lock);
neigh->ops->error_report(neigh, skb);
- write_lock(&neigh->lock);
+ write_sequnlock(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
}
@@ -821,20 +822,22 @@ static void neigh_timer_handler(unsigned
if (!mod_timer(&neigh->timer, next))
neigh_hold(neigh);
}
+
if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
struct sk_buff *skb = skb_peek(&neigh->arp_queue);
/* keep skb alive even if arp_queue overflows */
if (skb)
skb_get(skb);
- write_unlock(&neigh->lock);
+ write_sequnlock(&neigh->lock);
neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
if (skb)
kfree_skb(skb);
} else {
-out:
- write_unlock(&neigh->lock);
+ write_sequnlock(&neigh->lock);
}
+
+out:
if (notify)
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
@@ -850,11 +853,11 @@ int __neigh_event_send(struct neighbour
int rc;
unsigned long now;
- write_lock_bh(&neigh->lock);
+ write_seqlock_bh(&neigh->lock);
rc = 0;
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
- goto out_unlock_bh;
+ goto out;
now = jiffies;
@@ -868,7 +871,7 @@ int __neigh_event_send(struct neighbour
} else {
neigh->nud_state = NUD_FAILED;
neigh->updated = jiffies;
- write_unlock_bh(&neigh->lock);
+ write_sequnlock_bh(&neigh->lock);
if (skb)
kfree_skb(skb);
@@ -896,8 +899,8 @@ int __neigh_event_send(struct neighbour
}
rc = 1;
}
-out_unlock_bh:
- write_unlock_bh(&neigh->lock);
+out:
+ write_sequnlock_bh(&neigh->lock);
return rc;
}
@@ -948,7 +951,7 @@ int neigh_update(struct neighbour *neigh
struct net_device *dev;
int update_isrouter = 0;
- write_lock_bh(&neigh->lock);
+ write_seqlock_bh(&neigh->lock);
dev = neigh->dev;
old = neigh->nud_state;
@@ -1052,22 +1055,23 @@ int neigh_update(struct neighbour *neigh
while (neigh->nud_state & NUD_VALID &&
(skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
struct neighbour *n1 = neigh;
- write_unlock_bh(&neigh->lock);
+ write_sequnlock_bh(&neigh->lock);
/* On shaper/eql skb->dst->neighbour != neigh :( */
if (skb->dst && skb->dst->neighbour)
n1 = skb->dst->neighbour;
n1->output(skb);
- write_lock_bh(&neigh->lock);
+ write_seqlock_bh(&neigh->lock);
}
skb_queue_purge(&neigh->arp_queue);
}
-out:
+
if (update_isrouter) {
neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
(neigh->flags | NTF_ROUTER) :
(neigh->flags & ~NTF_ROUTER);
}
- write_unlock_bh(&neigh->lock);
+out:
+ write_sequnlock_bh(&neigh->lock);
if (notify)
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
@@ -1160,19 +1164,24 @@ int neigh_resolve_output(struct sk_buff
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
+ unsigned short type = ntohs(skb->protocol);
+
if (dev->hard_header_cache && !dst->hh) {
- write_lock_bh(&neigh->lock);
+ write_seqlock_bh(&neigh->lock);
if (!dst->hh)
neigh_hh_init(neigh, dst, dst->ops->protocol);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+ err = dev->hard_header(skb, dev, type,
neigh->ha, NULL, skb->len);
- write_unlock_bh(&neigh->lock);
+ write_sequnlock_bh(&neigh->lock);
} else {
- read_lock_bh(&neigh->lock);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
- read_unlock_bh(&neigh->lock);
+ unsigned seq;
+ do {
+ seq = read_seqbegin(&neigh->lock);
+ err = dev->hard_header(skb, dev, type,
+ neigh->ha, NULL, skb->len);
+ } while (read_seqretry(&neigh->lock, seq));
}
+
if (err >= 0)
rc = neigh->ops->queue_xmit(skb);
else
@@ -1197,13 +1206,16 @@ int neigh_connected_output(struct sk_buf
struct dst_entry *dst = skb->dst;
struct neighbour *neigh = dst->neighbour;
struct net_device *dev = neigh->dev;
+ unsigned seq;
__skb_pull(skb, skb->nh.raw - skb->data);
- read_lock_bh(&neigh->lock);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, NULL, skb->len);
- read_unlock_bh(&neigh->lock);
+ do {
+ seq = read_seqbegin(&neigh->lock);
+ err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+ neigh->ha, NULL, skb->len);
+ } while (read_seqretry(&neigh->lock, seq));
+
if (err >= 0)
err = neigh->ops->queue_xmit(skb);
else {
@@ -1964,11 +1976,15 @@ static int neigh_fill_info(struct sk_buf
NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
- read_lock_bh(&neigh->lock);
ndm->ndm_state = neigh->nud_state;
+
+ /* Not really updating this neighbour but don't want to
+ * deal with the unwind case when seqlock needs retry
+ */
+ write_seqlock_bh(&neigh->lock);
if ((neigh->nud_state & NUD_VALID) &&
nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
- read_unlock_bh(&neigh->lock);
+ write_sequnlock_bh(&neigh->lock);
goto nla_put_failure;
}
@@ -1976,7 +1992,7 @@ static int neigh_fill_info(struct sk_buf
ci.ndm_confirmed = now - neigh->confirmed;
ci.ndm_updated = now - neigh->updated;
ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
- read_unlock_bh(&neigh->lock);
+ write_sequnlock_bh(&neigh->lock);
NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
@@ -2081,13 +2097,13 @@ void __neigh_for_each_release(struct nei
&tbl->hash_buckets[chain], hlist) {
int release;
- write_lock(&n->lock);
+ write_seqlock(&n->lock);
release = cb(n);
if (release) {
hlist_del_rcu(&n->hlist);
n->dead = 1;
}
- write_unlock(&n->lock);
+ write_sequnlock(&n->lock);
if (release)
call_rcu(&n->rcu, neigh_rcu_release);
}
--- net-2.6.19.orig/net/ipv4/arp.c
+++ net-2.6.19/net/ipv4/arp.c
@@ -328,6 +328,31 @@ static void arp_error_report(struct neig
kfree_skb(skb);
}
+
+static unsigned arp_state_to_flags(const struct neighbour *neigh)
+{
+ unsigned flags = 0;
+ if (neigh->nud_state&NUD_PERMANENT)
+ flags = ATF_PERM|ATF_COM;
+ else if (neigh->nud_state&NUD_VALID)
+ flags = ATF_COM;
+ return flags;
+}
+
+static void arp_get_neigh_addr(u8 *ha, const struct neighbour *neigh,
+ unsigned len, unsigned *flags)
+{
+ unsigned seq;
+
+ do {
+ seq = read_seqbegin(&neigh->lock);
+ memcpy(ha, neigh->ha, len);
+ if (flags)
+ *flags = arp_state_to_flags(neigh);
+ } while (read_seqretry(&neigh->lock, seq));
+
+}
+
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
u32 saddr = 0;
@@ -369,8 +394,12 @@ static void arp_solicit(struct neighbour
if ((probes -= neigh->parms->ucast_probes) < 0) {
if (!(neigh->nud_state&NUD_VALID))
printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n");
- dst_ha = neigh->ha;
- read_lock_bh(&neigh->lock);
+
+ dst_ha = kmalloc(MAX_ADDR_LEN, GFP_ATOMIC);
+ if (!dst_ha)
+ return;
+
+ arp_get_neigh_addr(dst_ha, neigh, MAX_ADDR_LEN, NULL);
} else if ((probes -= neigh->parms->app_probes) < 0) {
#ifdef CONFIG_ARPD
neigh_app_ns(neigh);
@@ -380,8 +409,9 @@ static void arp_solicit(struct neighbour
arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
dst_ha, dev->dev_addr, NULL);
+
if (dst_ha)
- read_unlock_bh(&neigh->lock);
+ kfree(dst_ha);
}
static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
@@ -489,10 +519,7 @@ int arp_find(unsigned char *haddr, struc
if (n) {
n->used = jiffies;
if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) {
- read_lock_bh(&n->lock);
- memcpy(haddr, n->ha, dev->addr_len);
- read_unlock_bh(&n->lock);
- neigh_release(n);
+ arp_get_neigh_addr(haddr, n, dev->addr_len, NULL);
return 0;
}
neigh_release(n);
@@ -1047,16 +1074,6 @@ static int arp_req_set(struct arpreq *r,
return err;
}
-static unsigned arp_state_to_flags(struct neighbour *neigh)
-{
- unsigned flags = 0;
- if (neigh->nud_state&NUD_PERMANENT)
- flags = ATF_PERM|ATF_COM;
- else if (neigh->nud_state&NUD_VALID)
- flags = ATF_COM;
- return flags;
-}
-
/*
* Get an ARP cache entry.
*/
@@ -1069,10 +1086,8 @@ static int arp_req_get(struct arpreq *r,
neigh = neigh_lookup(&arp_tbl, &ip, dev);
if (neigh) {
- read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
- r->arp_flags = arp_state_to_flags(neigh);
- read_unlock_bh(&neigh->lock);
+ arp_get_neigh_addr(r->arp_ha.sa_data, neigh, dev->addr_len,
+ &r->arp_flags);
r->arp_ha.sa_family = dev->type;
strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev));
neigh_release(neigh);
@@ -1258,7 +1273,7 @@ void __init arp_init(void)
/*
* ax25 -> ASCII conversion
*/
-static char *ax2asc2(ax25_address *a, char *buf)
+static char *ax2asc2(const ax25_address *a, char *buf)
{
char c, *s;
int n;
@@ -1290,35 +1305,41 @@ static char *ax2asc2(ax25_address *a, ch
#define HBUFFERLEN 30
static void arp_format_neigh_entry(struct seq_file *seq,
- struct neighbour *n)
+ const struct neighbour *n)
{
char hbuffer[HBUFFERLEN];
const char hexbuf[] = "0123456789ABCDEF";
int k, j;
+ unsigned hflags, seqno;
char tbuf[16];
struct net_device *dev = n->dev;
int hatype = dev->type;
- read_lock(&n->lock);
- /* Convert hardware address to XX:XX:XX:XX ... form. */
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
- if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
- ax2asc2((ax25_address *)n->ha, hbuffer);
- else {
-#endif
- for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) {
- hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15];
- hbuffer[k++] = hexbuf[n->ha[j] & 15];
- hbuffer[k++] = ':';
- }
- hbuffer[--k] = 0;
+ do {
+ seqno = read_seqbegin(&n->lock);
+
+ /* Convert hardware address to XX:XX:XX:XX ... form. */
#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
- }
+ if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
+ ax2asc2((const ax25_address *)n->ha, hbuffer);
+ else
#endif
- sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->primary_key));
+ {
+ for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) {
+ hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15];
+ hbuffer[k++] = hexbuf[n->ha[j] & 15];
+ hbuffer[k++] = ':';
+ }
+ hbuffer[--k] = 0;
+ }
+
+ sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(*(u32*)n->primary_key));
+ hflags = arp_state_to_flags(n);
+ } while (read_seqretry(&n->lock, seqno));
+
seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
- tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name);
- read_unlock(&n->lock);
+ tbuf, hatype, hflags, hbuffer, dev->name);
+
}
static void arp_format_pneigh_entry(struct seq_file *seq,
--- net-2.6.19.orig/net/ipv6/ndisc.c
+++ net-2.6.19/net/ipv6/ndisc.c
@@ -1405,15 +1405,15 @@ void ndisc_send_redirect(struct sk_buff
return;
}
- if (dev->addr_len) {
- read_lock_bh(&neigh->lock);
- if (neigh->nud_state & NUD_VALID) {
+ if (dev->addr_len && (neigh->nud_state & NUD_VALID)) {
+ unsigned seq;
+ do {
+ seq = read_seqbegin(&neigh->lock);
memcpy(ha_buf, neigh->ha, dev->addr_len);
- read_unlock_bh(&neigh->lock);
- ha = ha_buf;
- len += ndisc_opt_addr_space(dev);
- } else
- read_unlock_bh(&neigh->lock);
+ } while (read_seqretry(&neigh->lock, seq));
+
+ ha = ha_buf;
+ len += ndisc_opt_addr_space(dev);
}
rd_len = min_t(unsigned int,
--- net-2.6.19.orig/net/ipv6/route.c
+++ net-2.6.19/net/ipv6/route.c
@@ -280,20 +280,19 @@ static void rt6_probe(struct rt6_info *r
*/
if (!neigh || (neigh->nud_state & NUD_VALID))
return;
- read_lock_bh(&neigh->lock);
+
if (!(neigh->nud_state & NUD_VALID) &&
- time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
+ time_after(jiffies,
+ neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
struct in6_addr mcaddr;
struct in6_addr *target;
neigh->updated = jiffies;
- read_unlock_bh(&neigh->lock);
target = (struct in6_addr *)&neigh->primary_key;
addrconf_addr_solict_mult(target, &mcaddr);
ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
- } else
- read_unlock_bh(&neigh->lock);
+ }
}
#else
static inline void rt6_probe(struct rt6_info *rt)
@@ -324,10 +323,9 @@ static int inline rt6_check_neigh(struct
!(rt->rt6i_flags & RTF_GATEWAY))
m = 1;
else if (neigh) {
- read_lock_bh(&neigh->lock);
+ smp_rmb();
if (neigh->nud_state & NUD_VALID)
m = 2;
- read_unlock_bh(&neigh->lock);
}
return m;
}
--- net-2.6.19.orig/net/sched/sch_teql.c
+++ net-2.6.19/net/sched/sch_teql.c
@@ -248,9 +248,14 @@ __teql_resolve(struct sk_buff *skb, stru
}
if (neigh_event_send(n, skb_res) == 0) {
int err;
- read_lock(&n->lock);
- err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len);
- read_unlock(&n->lock);
+ unsigned seq;
+
+ do {
+ seq = read_seqbegin(&n->lock);
+ err = dev->hard_header(skb, dev, ntohs(skb->protocol),
+ n->ha, NULL, skb->len);
+ } while (read_seqretry(&n->lock, seq));
+
if (err < 0) {
neigh_release(n);
return -EINVAL;
--
^ permalink raw reply [flat|nested] 12+ messages in thread
* [RFC 6/7] neighbour: convert hard header cache to sequence number
2006-08-14 21:20 [RFC 0/7] neighbour table lockless read Stephen Hemminger
` (4 preceding siblings ...)
2006-08-14 21:20 ` [RFC 5/7] neighbour: convert lookup to sequence lock Stephen Hemminger
@ 2006-08-14 21:20 ` Stephen Hemminger
2006-08-14 21:20 ` [RFC 7/7] neighbour: reduce exports Stephen Hemminger
6 siblings, 0 replies; 12+ messages in thread
From: Stephen Hemminger @ 2006-08-14 21:20 UTC (permalink / raw)
To: David Miller; +Cc: netdev
[-- Attachment #1: hardheader-rcu.patch --]
[-- Type: text/plain, Size: 5140 bytes --]
The reading of the hard header cache in the output path can be
made lockless using seqlock.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
---
include/linux/netdevice.h | 3 ++-
include/net/neighbour.h | 2 ++
net/core/neighbour.c | 40 +++++++++++++++++++++++++++++++++++-----
net/ipv4/ip_output.c | 13 +++----------
net/ipv6/ip6_output.c | 13 +++----------
5 files changed, 45 insertions(+), 26 deletions(-)
--- net-2.6.19.orig/include/linux/netdevice.h
+++ net-2.6.19/include/linux/netdevice.h
@@ -193,7 +193,7 @@ struct hh_cache
*/
int hh_len; /* length of header */
int (*hh_output)(struct sk_buff *skb);
- rwlock_t hh_lock;
+ seqlock_t hh_lock;
/* cached hardware header; allow for machine alignment needs. */
#define HH_DATA_MOD 16
@@ -217,6 +217,7 @@ struct hh_cache
#define LL_RESERVED_SPACE_EXTRA(dev,extra) \
((((dev)->hard_header_len+extra)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+
/* These flag bits are private to the generic network queueing
* layer, they may not be explicitly referenced by any other
* code.
--- net-2.6.19.orig/net/core/neighbour.c
+++ net-2.6.19/net/core/neighbour.c
@@ -591,9 +591,11 @@ void neigh_destroy(struct neighbour *nei
while ((hh = neigh->hh) != NULL) {
neigh->hh = hh->hh_next;
hh->hh_next = NULL;
- write_lock_bh(&hh->hh_lock);
+
+ write_seqlock_bh(&hh->hh_lock);
hh->hh_output = neigh_blackhole;
- write_unlock_bh(&hh->hh_lock);
+ write_sequnlock_bh(&hh->hh_lock);
+
if (atomic_dec_and_test(&hh->hh_refcnt))
kfree(hh);
}
@@ -912,9 +914,9 @@ static __inline__ void neigh_update_hhs(
if (update) {
for (hh = neigh->hh; hh; hh = hh->hh_next) {
- write_lock_bh(&hh->hh_lock);
+ write_seqlock_bh(&hh->hh_lock);
update(hh, neigh->dev, neigh->ha);
- write_unlock_bh(&hh->hh_lock);
+ write_sequnlock_bh(&hh->hh_lock);
}
}
}
@@ -1105,7 +1107,7 @@ static void neigh_hh_init(struct neighbo
break;
if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
- rwlock_init(&hh->hh_lock);
+ seqlock_init(&hh->hh_lock);
hh->hh_type = protocol;
atomic_set(&hh->hh_refcnt, 0);
hh->hh_next = NULL;
@@ -1128,6 +1130,33 @@ static void neigh_hh_init(struct neighbo
}
}
+
+/*
+ * Add header to skb from hard header cache
+ * Handle case where cache gets changed.
+ */
+int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
+{
+ int len, alen;
+ unsigned seq;
+ int (*output)(struct sk_buff *);
+
+ for(;;) {
+ seq = read_seqbegin(&hh->hh_lock);
+ len = hh->hh_len;
+ alen = HH_DATA_ALIGN(len);
+ output = hh->hh_output;
+ memcpy(skb->data - alen, hh->hh_data, alen);
+ skb_push(skb, len);
+
+ if (likely(!read_seqretry(&hh->hh_lock, seq)))
+ return output(skb);
+
+ /* undo and try again */
+ __skb_pull(skb, len);
+ }
+}
+
/* This function can be used in contexts, where only old dev_queue_xmit
worked, f.e. if you want to override normal output path (eql, shaper),
but resolution is not made yet.
@@ -2762,6 +2791,7 @@ EXPORT_SYMBOL(neigh_delete);
EXPORT_SYMBOL(neigh_destroy);
EXPORT_SYMBOL(neigh_dump_info);
EXPORT_SYMBOL(neigh_event_ns);
+EXPORT_SYMBOL(neigh_hh_output);
EXPORT_SYMBOL(neigh_ifdown);
EXPORT_SYMBOL(neigh_lookup);
EXPORT_SYMBOL(neigh_lookup_nodev);
--- net-2.6.19.orig/net/ipv4/ip_output.c
+++ net-2.6.19/net/ipv4/ip_output.c
@@ -182,16 +182,9 @@ static inline int ip_finish_output2(stru
skb = skb2;
}
- if (hh) {
- int hh_alen;
-
- read_lock_bh(&hh->hh_lock);
- hh_alen = HH_DATA_ALIGN(hh->hh_len);
- memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
- read_unlock_bh(&hh->hh_lock);
- skb_push(skb, hh->hh_len);
- return hh->hh_output(skb);
- } else if (dst->neighbour)
+ if (hh)
+ return neigh_hh_output(hh, skb);
+ else if (dst->neighbour)
return dst->neighbour->output(skb);
if (net_ratelimit())
--- net-2.6.19.orig/net/ipv6/ip6_output.c
+++ net-2.6.19/net/ipv6/ip6_output.c
@@ -76,16 +76,9 @@ static inline int ip6_output_finish(stru
struct dst_entry *dst = skb->dst;
struct hh_cache *hh = dst->hh;
- if (hh) {
- int hh_alen;
-
- read_lock_bh(&hh->hh_lock);
- hh_alen = HH_DATA_ALIGN(hh->hh_len);
- memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
- read_unlock_bh(&hh->hh_lock);
- skb_push(skb, hh->hh_len);
- return hh->hh_output(skb);
- } else if (dst->neighbour)
+ if (hh)
+ return neigh_hh_output(hh, skb);
+ else if (dst->neighbour)
return dst->neighbour->output(skb);
IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
--- net-2.6.19.orig/include/net/neighbour.h
+++ net-2.6.19/include/net/neighbour.h
@@ -193,6 +193,8 @@ extern struct neighbour * neigh_create(s
struct net_device *dev);
extern void neigh_destroy(struct neighbour *neigh);
extern int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb);
+extern int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb);
+
extern int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
u32 flags);
extern void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
--
^ permalink raw reply [flat|nested] 12+ messages in thread
* [RFC 7/7] neighbour: reduce exports
2006-08-14 21:20 [RFC 0/7] neighbour table lockless read Stephen Hemminger
` (5 preceding siblings ...)
2006-08-14 21:20 ` [RFC 6/7] neighbour: convert hard header cache to sequence number Stephen Hemminger
@ 2006-08-14 21:20 ` Stephen Hemminger
6 siblings, 0 replies; 12+ messages in thread
From: Stephen Hemminger @ 2006-08-14 21:20 UTC (permalink / raw)
To: David Miller; +Cc: netdev
[-- Attachment #1: neighbour-exports.patch --]
[-- Type: text/plain, Size: 1359 bytes --]
The function neigh_update_hhs is only used in neighbour.c
and let compiler decide to inline or not.
There are several symbols only used by rtnetlink and since it can
not be a module, there is no reason to export them.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
---
net/core/neighbour.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
--- net-2.6.19.orig/net/core/neighbour.c
+++ net-2.6.19/net/core/neighbour.c
@@ -906,7 +906,7 @@ out:
return rc;
}
-static __inline__ void neigh_update_hhs(struct neighbour *neigh)
+static void neigh_update_hhs(struct neighbour *neigh)
{
struct hh_cache *hh;
void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
@@ -2782,7 +2782,6 @@ void neigh_sysctl_unregister(struct neig
#endif /* CONFIG_SYSCTL */
EXPORT_SYMBOL(__neigh_event_send);
-EXPORT_SYMBOL(neigh_add);
EXPORT_SYMBOL(neigh_changeaddr);
EXPORT_SYMBOL(neigh_compat_output);
EXPORT_SYMBOL(neigh_connected_output);
@@ -2803,11 +2802,8 @@ EXPORT_SYMBOL(neigh_table_clear);
EXPORT_SYMBOL(neigh_table_init);
EXPORT_SYMBOL(neigh_table_init_no_netlink);
EXPORT_SYMBOL(neigh_update);
-EXPORT_SYMBOL(neigh_update_hhs);
EXPORT_SYMBOL(pneigh_enqueue);
EXPORT_SYMBOL(pneigh_lookup);
-EXPORT_SYMBOL(neightbl_dump_info);
-EXPORT_SYMBOL(neightbl_set);
#ifdef CONFIG_ARPD
EXPORT_SYMBOL(neigh_app_ns);
--
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC 5/7] neighbour: convert lookup to sequence lock
2006-08-14 21:20 ` [RFC 5/7] neighbour: convert lookup to sequence lock Stephen Hemminger
@ 2006-08-14 22:22 ` Thomas Graf
2006-08-14 22:37 ` Stephen Hemminger
0 siblings, 1 reply; 12+ messages in thread
From: Thomas Graf @ 2006-08-14 22:22 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, netdev
* Stephen Hemminger <shemminger@osdl.org> 2006-08-14 14:20
> --- net-2.6.19.orig/net/core/neighbour.c
> +++ net-2.6.19/net/core/neighbour.c
> @@ -1160,19 +1164,24 @@ int neigh_resolve_output(struct sk_buff
> if (!neigh_event_send(neigh, skb)) {
> int err;
> struct net_device *dev = neigh->dev;
> + unsigned short type = ntohs(skb->protocol);
> +
> if (dev->hard_header_cache && !dst->hh) {
> - write_lock_bh(&neigh->lock);
> + write_seqlock_bh(&neigh->lock);
> if (!dst->hh)
> neigh_hh_init(neigh, dst, dst->ops->protocol);
> - err = dev->hard_header(skb, dev, ntohs(skb->protocol),
> + err = dev->hard_header(skb, dev, type,
> neigh->ha, NULL, skb->len);
> - write_unlock_bh(&neigh->lock);
> + write_sequnlock_bh(&neigh->lock);
> } else {
> - read_lock_bh(&neigh->lock);
> - err = dev->hard_header(skb, dev, ntohs(skb->protocol),
> - neigh->ha, NULL, skb->len);
> - read_unlock_bh(&neigh->lock);
> + unsigned seq;
> + do {
> + seq = read_seqbegin(&neigh->lock);
> + err = dev->hard_header(skb, dev, type,
> + neigh->ha, NULL, skb->len);
> + } while (read_seqretry(&neigh->lock, seq));
Don't you need to pull in order to undo the push that probably
happened in dev->hard_header()?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC 5/7] neighbour: convert lookup to sequence lock
2006-08-14 22:22 ` Thomas Graf
@ 2006-08-14 22:37 ` Stephen Hemminger
2006-08-14 23:17 ` Thomas Graf
0 siblings, 1 reply; 12+ messages in thread
From: Stephen Hemminger @ 2006-08-14 22:37 UTC (permalink / raw)
To: Thomas Graf; +Cc: David Miller, netdev
On Tue, 15 Aug 2006 00:22:20 +0200
Thomas Graf <tgraf@suug.ch> wrote:
> * Stephen Hemminger <shemminger@osdl.org> 2006-08-14 14:20
> > --- net-2.6.19.orig/net/core/neighbour.c
> > +++ net-2.6.19/net/core/neighbour.c
> > @@ -1160,19 +1164,24 @@ int neigh_resolve_output(struct sk_buff
> > if (!neigh_event_send(neigh, skb)) {
> > int err;
> > struct net_device *dev = neigh->dev;
> > + unsigned short type = ntohs(skb->protocol);
> > +
> > if (dev->hard_header_cache && !dst->hh) {
> > - write_lock_bh(&neigh->lock);
> > + write_seqlock_bh(&neigh->lock);
> > if (!dst->hh)
> > neigh_hh_init(neigh, dst, dst->ops->protocol);
> > - err = dev->hard_header(skb, dev, ntohs(skb->protocol),
> > + err = dev->hard_header(skb, dev, type,
> > neigh->ha, NULL, skb->len);
> > - write_unlock_bh(&neigh->lock);
> > + write_sequnlock_bh(&neigh->lock);
> > } else {
> > - read_lock_bh(&neigh->lock);
> > - err = dev->hard_header(skb, dev, ntohs(skb->protocol),
> > - neigh->ha, NULL, skb->len);
> > - read_unlock_bh(&neigh->lock);
> > + unsigned seq;
> > + do {
> > + seq = read_seqbegin(&neigh->lock);
> > + err = dev->hard_header(skb, dev, type,
> > + neigh->ha, NULL, skb->len);
> > + } while (read_seqretry(&neigh->lock, seq));
>
> Don't you need to pull in order to undo the push that probably
> happened in dev->hard_header()?
Probably just record the headroom() and undo that?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC 5/7] neighbour: convert lookup to sequence lock
2006-08-14 22:37 ` Stephen Hemminger
@ 2006-08-14 23:17 ` Thomas Graf
2006-08-15 0:42 ` Alexey Kuznetsov
0 siblings, 1 reply; 12+ messages in thread
From: Thomas Graf @ 2006-08-14 23:17 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, netdev
* Stephen Hemminger <shemminger@osdl.org> 2006-08-14 15:37
> On Tue, 15 Aug 2006 00:22:20 +0200
> Thomas Graf <tgraf@suug.ch> wrote:
> > Don't you need to pull in order to undo the push that probably
> > happened in dev->hard_header()?
>
> Probably just record the headroom() and undo that?
That wouldn't work if hard_header() ever expands the head. Fortunately
hard_header() returns the length added even in case of an error so we
can undo the absolute value returned.
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC 5/7] neighbour: convert lookup to sequence lock
2006-08-14 23:17 ` Thomas Graf
@ 2006-08-15 0:42 ` Alexey Kuznetsov
0 siblings, 0 replies; 12+ messages in thread
From: Alexey Kuznetsov @ 2006-08-15 0:42 UTC (permalink / raw)
To: Thomas Graf; +Cc: Stephen Hemminger, David Miller, netdev
Hello!
> That wouldn't work if hard_header() ever expands the head. Fortunately
> hard_header() returns the length added even in case of an error so we
> can undo the absolute value returned.
Yes.
Or probably it is safer to undo to skb->nh. Even if hard_header
expands skb, skb->nh still remains valid.
Alexey
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2006-08-15 0:43 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-08-14 21:20 [RFC 0/7] neighbour table lockless read Stephen Hemminger
2006-08-14 21:20 ` [RFC 1/7] net neighbor: convert top level list to RCU Stephen Hemminger
2006-08-14 21:20 ` [RFC 2/7] neighbour: convert neighbour hash table to hlist Stephen Hemminger
2006-08-14 21:20 ` [RFC 3/7] neighbour: convert pneigh " Stephen Hemminger
2006-08-14 21:20 ` [RFC 4/7] net neighbour: convert to RCU Stephen Hemminger
2006-08-14 21:20 ` [RFC 5/7] neighbour: convert lookup to sequence lock Stephen Hemminger
2006-08-14 22:22 ` Thomas Graf
2006-08-14 22:37 ` Stephen Hemminger
2006-08-14 23:17 ` Thomas Graf
2006-08-15 0:42 ` Alexey Kuznetsov
2006-08-14 21:20 ` [RFC 6/7] neighbour: convert hard header cache to sequence number Stephen Hemminger
2006-08-14 21:20 ` [RFC 7/7] neighbour: reduce exports Stephen Hemminger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).