netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eric Dumazet <dada1@cosmosbay.com>
To: Corey Minyard <minyard@acm.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	David Miller <davem@davemloft.net>,
	Stephen Hemminger <shemminger@vyatta.com>,
	benny+usenet@amorsen.dk,
	Linux Netdev List <netdev@vger.kernel.org>,
	Christoph Lameter <cl@linux-foundation.org>,
	Evgeniy Polyakov <zbr@ioremap.net>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Christian Bell <christian@myri.com>
Subject: [PATCH 1/3] rcu: Introduce hlist_nulls variant of hlist
Date: Thu, 13 Nov 2008 14:14:18 +0100	[thread overview]
Message-ID: <491C282A.5050802@cosmosbay.com> (raw)
In-Reply-To: <4909D551.9080309@cosmosbay.com>

[-- Attachment #1: Type: text/plain, Size: 2524 bytes --]

hlist uses NULL value to finish a chain.

hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker.

This allows to store many different end markers, so that some RCU lockless
algos (used in TCP/UDP stack for example) can save some memory barriers in
fast paths.

Two new files are added :

include/linux/list_nulls.h
  - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant

include/linux/rculist_nulls.h
  - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant

   Only four helpers are declared for the moment :

     hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(),
     hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu()

prefetches() were removed, since an end of list is not anymore NULL value.
prefetches() could trigger useless (and possibly dangerous) memory transactions.

Example of use (extracted from __udp4_lib_lookup())

	struct sock *sk, *result;
        struct hlist_nulls_node *node;
        unsigned short hnum = ntohs(dport);
        unsigned int hash = udp_hashfn(net, hnum);
        struct udp_hslot *hslot = &udptable->hash[hash];
        int score, badness;

        rcu_read_lock();
begin:
        result = NULL;
        badness = -1;
        sk_nulls_for_each_rcu(sk, node, &hslot->head) {
                score = compute_score(sk, net, saddr, hnum, sport,
                                      daddr, dport, dif);
                if (score > badness) {
                        result = sk;
                        badness = score;
                }
        }
        /*
         * if the nulls value we got at the end of this lookup is
         * not the expected one, we must restart lookup.
         * We probably met an item that was moved to another chain.
         */
        if (get_nulls_value(node) != hash)
                goto begin;

        if (result) {
                if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
                        result = NULL;
                else if (unlikely(compute_score(result, net, saddr, hnum, sport,
                                  daddr, dport, dif) < badness)) {
                        sock_put(result);
                        goto begin;
                }
        }
        rcu_read_unlock();
        return result;

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
---
 include/linux/list_nulls.h    |   94 +++++++++++++++++++++++++++
 include/linux/rculist_nulls.h |  110 ++++++++++++++++++++++++++++++++
 2 files changed, 204 insertions(+)

[-- Attachment #2: nulls.patch --]
[-- Type: text/plain, Size: 7213 bytes --]

diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
new file mode 100644
index 0000000..856dee8
--- /dev/null
+++ b/include/linux/list_nulls.h
@@ -0,0 +1,94 @@
+#ifndef _LINUX_LIST_NULLS_H
+#define _LINUX_LIST_NULLS_H
+
+/*
+ * Special version of lists, where end of list is not a NULL pointer,
+ * but a 'nulls' marker, which can have many different values.
+ * (up to 2^31 different values guaranteed on all platforms)
+ *
+ * In the standard hlist, termination of a list is the NULL pointer.
+ * In this special 'nulls' variant, we use the fact that objects stored in
+ * a list are aligned on a word (4 or 8 bytes alignment).
+ * We therefore use the last significant bit of 'ptr' :
+ * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1)
+ * Set to 0 : This is a pointer to some object (ptr)
+ */
+
+struct hlist_nulls_head {
+	struct hlist_nulls_node *first;
+};
+
+struct hlist_nulls_node {
+	struct hlist_nulls_node *next, **pprev;
+};
+#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \
+	((ptr)->first = (struct hlist_nulls_node *) (1UL | (((long)nulls) << 1)))
+
+#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
+/**
+ * ptr_is_a_nulls - Test if a ptr is a nulls
+ * @ptr: ptr to be tested
+ *
+ */
+static inline int is_a_nulls(const struct hlist_nulls_node *ptr)
+{
+	return ((unsigned long)ptr & 1);
+}
+
+/**
+ * get_nulls_value - Get the 'nulls' value of the end of chain
+ * @ptr: end of chain
+ *
+ * Should be called only if is_a_nulls(ptr);
+ */
+static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr)
+{
+	return ((unsigned long)ptr) >> 1;
+}
+
+static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h)
+{
+	return !h->pprev;
+}
+
+static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
+{
+	return is_a_nulls(h->first);
+}
+
+static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
+{
+	struct hlist_nulls_node *next = n->next;
+	struct hlist_nulls_node **pprev = n->pprev;
+	*pprev = next;
+	if (!is_a_nulls(next))
+		next->pprev = pprev;
+}
+
+/**
+ * hlist_nulls_for_each_entry	- iterate over list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ *
+ */
+#define hlist_nulls_for_each_entry(tpos, pos, head, member)		       \
+	for (pos = (head)->first;					       \
+	     (!is_a_nulls(pos)) &&					       \
+		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ *
+ */
+#define hlist_nulls_for_each_entry_from(tpos, pos, member)	\
+	for (; (!is_a_nulls(pos)) && 				\
+		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+#endif
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
new file mode 100644
index 0000000..b185ac4
--- /dev/null
+++ b/include/linux/rculist_nulls.h
@@ -0,0 +1,110 @@
+#ifndef _LINUX_RCULIST_NULLS_H
+#define _LINUX_RCULIST_NULLS_H
+
+#ifdef __KERNEL__
+
+/*
+ * RCU-protected list version
+ */
+#include <linux/list_nulls.h>
+#include <linux/rcupdate.h>
+
+/**
+ * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: hlist_nulls_unhashed() on the node return true after this. It is
+ * useful for RCU based read lockfree traversal if the writer side
+ * must know if the list entry is still hashed or already unhashed.
+ *
+ * In particular, it means that we can not poison the forward pointers
+ * that may still be used for walking the hash list and we can only
+ * zero the pprev pointer so list_unhashed() will return true after
+ * this.
+ *
+ * The caller must take whatever precautions are necessary (such as
+ * holding appropriate locks) to avoid racing with another
+ * list-mutation primitive, such as hlist_nulls_add_head_rcu() or
+ * hlist_nulls_del_rcu(), running on this same list.  However, it is
+ * perfectly legal to run concurrently with the _rcu list-traversal
+ * primitives, such as hlist_nulls_for_each_entry_rcu().
+ */
+static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
+{
+	if (!hlist_nulls_unhashed(n)) {
+		__hlist_nulls_del(n);
+		n->pprev = NULL;
+	}
+}
+
+/**
+ * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: hlist_nulls_unhashed() on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the hash list.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
+ * or hlist_nulls_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_nulls_for_each_entry().
+ */
+static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n)
+{
+	__hlist_nulls_del(n);
+	n->pprev = LIST_POISON2;
+}
+
+/**
+ * hlist_nulls_add_head_rcu
+ * @n: the element to add to the hash list.
+ * @h: the list to add to.
+ *
+ * Description:
+ * Adds the specified element to the specified hlist_nulls,
+ * while permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
+ * or hlist_nulls_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.  Regardless of the type of CPU, the
+ * list-traversal primitive must be guarded by rcu_read_lock().
+ */
+static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
+					struct hlist_nulls_head *h)
+{
+	struct hlist_nulls_node *first = h->first;
+
+	n->next = first;
+	n->pprev = &h->first;
+	rcu_assign_pointer(h->first, n);
+	if (!is_a_nulls(first))
+		first->pprev = &n->next;
+}
+/**
+ * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_nulls_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_nulls_node within the struct.
+ *
+ */
+#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
+	for (pos = rcu_dereference((head)->first);			 \
+		(!is_a_nulls(pos)) && 			\
+		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
+		pos = rcu_dereference(pos->next))
+
+#endif
+#endif

  parent reply	other threads:[~2008-11-13 13:15 UTC|newest]

Thread overview: 134+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-06 18:50 [PATCH 3/3] Convert the UDP hash lock to RCU Corey Minyard
2008-10-06 21:22 ` Eric Dumazet
2008-10-06 21:40   ` David Miller
2008-10-06 23:08     ` Corey Minyard
2008-10-07  8:37       ` Evgeniy Polyakov
2008-10-07 14:16         ` Christoph Lameter
2008-10-07 14:29           ` Evgeniy Polyakov
2008-10-07 14:38             ` Christoph Lameter
2008-10-07 14:33           ` Paul E. McKenney
2008-10-07 14:45             ` Christoph Lameter
2008-10-07 15:07               ` Eric Dumazet
2008-10-07 15:07               ` Paul E. McKenney
2008-10-07  5:24     ` Eric Dumazet
2008-10-07  8:54       ` Benny Amorsen
2008-10-07 12:59         ` Eric Dumazet
2008-10-07 14:07           ` Stephen Hemminger
2008-10-07 20:55             ` David Miller
2008-10-07 21:20               ` Stephen Hemminger
2008-10-08 13:55               ` Eric Dumazet
2008-10-08 18:45                 ` David Miller
2008-10-28 20:37                   ` [PATCH 1/2] udp: introduce struct udp_table and multiple rwlocks Eric Dumazet
2008-10-28 21:23                     ` Christian Bell
2008-10-28 21:31                       ` Evgeniy Polyakov
2008-10-28 21:48                       ` Eric Dumazet
2008-10-28 21:28                     ` Evgeniy Polyakov
2008-10-28 20:42                   ` [PATCH 2/2] udp: RCU handling for Unicast packets Eric Dumazet
2008-10-28 22:45                     ` Eric Dumazet
2008-10-29  5:05                       ` David Miller
2008-10-29  8:23                         ` Eric Dumazet
2008-10-29  8:56                           ` David Miller
2008-10-29 10:19                             ` Eric Dumazet
2008-10-29 18:19                               ` David Miller
2008-10-29  9:04                           ` Eric Dumazet
2008-10-29  9:17                             ` David Miller
2008-10-29 13:17                             ` Corey Minyard
2008-10-29 14:36                               ` Eric Dumazet
2008-10-29 15:34                                 ` Corey Minyard
2008-10-29 16:09                                   ` Eric Dumazet
2008-10-29 16:37                                     ` Paul E. McKenney
2008-10-29 17:22                                       ` Corey Minyard
2008-10-29 17:45                                         ` Eric Dumazet
2008-10-29 18:28                                           ` Corey Minyard
2008-10-29 18:52                                             ` Paul E. McKenney
2008-10-29 20:00                                               ` Eric Dumazet
2008-10-29 20:17                                                 ` Paul E. McKenney
2008-10-29 21:29                                                   ` Corey Minyard
2008-10-29 21:57                                                     ` Eric Dumazet
2008-10-29 21:58                                                     ` Paul E. McKenney
2008-10-29 22:08                                                   ` Eric Dumazet
2008-10-30  3:22                                                     ` Corey Minyard
2008-10-30  5:50                                                       ` Eric Dumazet
2008-11-02  4:19                                                         ` David Miller
2008-10-30  5:40                                                     ` David Miller
2008-10-30  5:51                                                       ` Eric Dumazet
2008-10-30  7:04                                                         ` Eric Dumazet
2008-10-30  7:05                                                           ` David Miller
2008-10-30 15:40                                                     ` [PATCH] udp: Introduce special NULL pointers for hlist termination Eric Dumazet
2008-10-30 15:51                                                       ` Stephen Hemminger
2008-10-30 16:28                                                         ` Corey Minyard
2008-10-31 14:37                                                           ` Eric Dumazet
2008-10-31 14:55                                                             ` Pavel Emelyanov
2008-11-02  4:22                                                               ` David Miller
2008-10-30 17:12                                                         ` Eric Dumazet
2008-10-31  7:51                                                           ` David Miller
2008-10-30 16:01                                                       ` Peter Zijlstra
2008-10-31  0:14                                                       ` Keith Owens
2008-11-13 13:13                                                       ` [PATCH 0/3] net: RCU lookups for UDP, DCCP and TCP protocol Eric Dumazet
2008-11-13 17:20                                                         ` Andi Kleen
2008-11-17  3:41                                                         ` David Miller
2008-11-19 19:52                                                           ` Christoph Lameter
2008-11-13 13:14                                                       ` Eric Dumazet [this message]
2008-11-13 13:29                                                         ` [PATCH 1/3] rcu: Introduce hlist_nulls variant of hlist Peter Zijlstra
2008-11-13 13:44                                                           ` Eric Dumazet
2008-11-13 16:02                                                             ` [PATCH 4/3] rcu: documents rculist_nulls Eric Dumazet
2008-11-14 15:16                                                               ` Peter Zijlstra
2008-11-17  3:36                                                                 ` David Miller
2008-11-19 17:07                                                               ` Paul E. McKenney
2008-11-14 15:16                                                         ` [PATCH 1/3] rcu: Introduce hlist_nulls variant of hlist Peter Zijlstra
2008-11-19 17:01                                                         ` Paul E. McKenney
2008-11-19 17:53                                                           ` Eric Dumazet
2008-11-19 18:46                                                             ` Paul E. McKenney
2008-11-19 18:53                                                               ` Arnaldo Carvalho de Melo
2008-11-19 21:17                                                                 ` Paul E. McKenney
2008-11-19 20:39                                                               ` Eric Dumazet
2008-11-19 21:21                                                                 ` Paul E. McKenney
2008-11-13 13:15                                                       ` [PATCH 2/3] udp: Use hlist_nulls in UDP RCU code Eric Dumazet
2008-11-19 17:29                                                         ` Paul E. McKenney
2008-11-19 17:53                                                           ` Eric Dumazet
2008-11-13 13:15                                                       ` [PATCH 3/3] net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls Eric Dumazet
2008-11-13 13:34                                                         ` Peter Zijlstra
2008-11-13 13:51                                                           ` Eric Dumazet
2008-11-13 14:08                                                             ` Christoph Lameter
2008-11-13 14:22                                                             ` Peter Zijlstra
2008-11-13 14:27                                                               ` Christoph Lameter
2008-11-19 17:53                                                         ` Paul E. McKenney
2008-11-23  9:33                                                         ` [PATCH] net: Convert TCP/DCCP listening hash tables to use RCU Eric Dumazet
2008-11-23 15:59                                                           ` Paul E. McKenney
2008-11-23 18:42                                                             ` Eric Dumazet
2008-11-23 19:17                                                               ` Paul E. McKenney
2008-11-23 20:18                                                                 ` Eric Dumazet
2008-11-23 22:33                                                                   ` Paul E. McKenney
2008-11-24  1:23                                                           ` David Miller
2008-10-30 11:04                                                 ` [PATCH 2/2] udp: RCU handling for Unicast packets Peter Zijlstra
2008-10-30 11:30                                                   ` Eric Dumazet
2008-10-30 18:25                                                     ` Paul E. McKenney
2008-10-31 16:40                                                       ` Eric Dumazet
2008-11-01  3:10                                                         ` Paul E. McKenney
2008-10-29 17:32                                       ` Eric Dumazet
2008-10-29 18:11                                         ` Paul E. McKenney
2008-10-29 18:29                                           ` David Miller
2008-10-29 18:38                                             ` Paul E. McKenney
2008-10-29 18:36                                           ` Eric Dumazet
2008-10-29 18:20                                 ` David Miller
2008-10-30 11:12                                 ` Peter Zijlstra
2008-10-30 11:29                                   ` Eric Dumazet
2008-10-28 20:37                 ` [PATCH 0/2] udp: Convert the UDP hash lock to RCU Eric Dumazet
2008-10-28 21:28                   ` Stephen Hemminger
2008-10-28 21:50                     ` Eric Dumazet
2008-10-07 16:43           ` [PATCH 3/3] " Corey Minyard
2008-10-07 18:26       ` David Miller
2008-10-08  8:35         ` Eric Dumazet
2008-10-08 16:38           ` David Miller
2008-10-07  8:31     ` Peter Zijlstra
2008-10-07 14:36       ` Paul E. McKenney
2008-10-07 18:29       ` David Miller
2008-10-06 22:07   ` Corey Minyard
2008-10-07  8:17   ` Peter Zijlstra
2008-10-07  9:24     ` Eric Dumazet
2008-10-07 14:15       ` Christoph Lameter
2008-10-07 14:38         ` Paul E. McKenney
2008-10-07 14:50         ` Eric Dumazet
2008-10-07 15:05           ` Paul E. McKenney
2008-10-07 15:09           ` Peter Zijlstra
2008-10-07 15:23           ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=491C282A.5050802@cosmosbay.com \
    --to=dada1@cosmosbay.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=benny+usenet@amorsen.dk \
    --cc=christian@myri.com \
    --cc=cl@linux-foundation.org \
    --cc=davem@davemloft.net \
    --cc=minyard@acm.org \
    --cc=netdev@vger.kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=shemminger@vyatta.com \
    --cc=zbr@ioremap.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).