All of lore.kernel.org
 help / color / mirror / Atom feed
From: Benjamin LaHaise <bcrl@lhnet.ca>
To: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>,
	Octavian Purdila <opurdila@ixiacom.com>,
	netdev@vger.kernel.org, Cosmin Ratiu <cratiu@ixiacom.com>
Subject: Re: [PATCH] net: allow netdev_wait_allrefs() to run faster
Date: Thu, 29 Oct 2009 19:38:48 -0400	[thread overview]
Message-ID: <20091029233848.GV3141@kvack.org> (raw)
In-Reply-To: <m1d445yf2x.fsf@fess.ebiederm.org>

On Thu, Oct 29, 2009 at 04:07:18PM -0700, Eric W. Biederman wrote:
> Could you keep me in the loop with that.  I have some pending cleanups for
> all of those pieces of code and may be able to help/advice/review.

Here are the sysfs scaling improvements.  I have to break them up, as there 
are 3 separate changes in this patch: 1. use an rbtree for name lookup in 
sysfs, 2. keep track of the number of directories for the purpose of 
generating the link count, as otherwise too much cpu time is spent in 
sysfs_count_nlink when new entries are added, and 3. when adding a new 
sysfs_dirent, walk the list backwards when linking it in, as higher 
numbered inodes tend to be at the end of the list, not the beginning.

		-ben


diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 5fad489..38ad7c8 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -43,10 +43,18 @@ static DEFINE_IDA(sysfs_ino_ida);
 static void sysfs_link_sibling(struct sysfs_dirent *sd)
 {
 	struct sysfs_dirent *parent_sd = sd->s_parent;
-	struct sysfs_dirent **pos;
+	struct sysfs_dirent **pos, *prev = NULL;
+	struct rb_node **new, *parent;
 
 	BUG_ON(sd->s_sibling);
 
+	if (parent_sd->s_dir.children_tail &&
+	    parent_sd->s_dir.children_tail->s_ino < sd->s_ino) {
+		prev = parent_sd->s_dir.children_tail;
+		pos = &prev->s_sibling;
+		goto got_it;
+	}
+
 	/* Store directory entries in order by ino.  This allows
 	 * readdir to properly restart without having to add a
 	 * cursor into the s_dir.children list.
@@ -54,9 +62,36 @@ static void sysfs_link_sibling(struct sysfs_dirent *sd)
 	for (pos = &parent_sd->s_dir.children; *pos; pos = &(*pos)->s_sibling) {
 		if (sd->s_ino < (*pos)->s_ino)
 			break;
+		prev = *pos;
 	}
+got_it:
+	if (prev == parent_sd->s_dir.children_tail)
+		parent_sd->s_dir.children_tail = sd;
 	sd->s_sibling = *pos;
+	sd->s_sibling_prev = prev;
 	*pos = sd;
+	parent_sd->s_nr_children_dir += (sysfs_type(sd) == SYSFS_DIR);
+
+	// rb tree insert
+	new = &(parent_sd->s_dir.child_rb_root.rb_node);
+	parent = NULL;
+
+	while (*new) {
+		struct sysfs_dirent *this =
+			container_of(*new, struct sysfs_dirent, s_rb_node);
+		int result = strcmp(sd->s_name, this->s_name);
+
+		parent = *new;
+		if (result < 0)
+			new = &((*new)->rb_left);
+		else if (result > 0)
+			new = &((*new)->rb_right);
+		else
+			BUG();
+	}
+
+	rb_link_node(&sd->s_rb_node, parent, new);
+	rb_insert_color(&sd->s_rb_node, &parent_sd->s_dir.child_rb_root);
 }
 
 /**
@@ -71,16 +106,22 @@ static void sysfs_link_sibling(struct sysfs_dirent *sd)
  */
 static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent **pos;
+	struct sysfs_dirent **pos, *prev = NULL;
 
-	for (pos = &sd->s_parent->s_dir.children; *pos;
-	     pos = &(*pos)->s_sibling) {
-		if (*pos == sd) {
-			*pos = sd->s_sibling;
-			sd->s_sibling = NULL;
-			break;
-		}
-	}
+	prev = sd->s_sibling_prev;
+	if (prev)
+		pos = &prev->s_sibling;
+	else
+		pos = &sd->s_parent->s_dir.children;
+	if (sd == sd->s_parent->s_dir.children_tail)
+		sd->s_parent->s_dir.children_tail = prev;
+	*pos = sd->s_sibling;
+	if (sd->s_sibling)
+		sd->s_sibling->s_sibling_prev = prev;
+	
+	sd->s_parent->s_nr_children_dir -= (sysfs_type(sd) == SYSFS_DIR);
+	sd->s_sibling_prev = NULL;
+	rb_erase(&sd->s_rb_node, &sd->s_parent->s_dir.child_rb_root);
 }
 
 /**
@@ -331,6 +372,9 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
 	sd->s_mode = mode;
 	sd->s_flags = type;
 
+	if (type == SYSFS_DIR)
+		sd->s_dir.child_rb_root = RB_ROOT;
+
 	return sd;
 
  err_out2:
@@ -630,11 +674,20 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
 struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
 				       const unsigned char *name)
 {
-	struct sysfs_dirent *sd;
-
-	for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
-		if (!strcmp(sd->s_name, name))
-			return sd;
+	struct rb_node *node = parent_sd->s_dir.child_rb_root.rb_node;
+
+	while (node) {
+		struct sysfs_dirent *data =
+			container_of(node, struct sysfs_dirent, s_rb_node);
+		int result;
+		result = strcmp(name, data->s_name);
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return data;
+	}
 	return NULL;
 }
 
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index e28cecf..ff6e960 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -191,14 +191,7 @@ static struct lock_class_key sysfs_inode_imutex_key;
 
 static int sysfs_count_nlink(struct sysfs_dirent *sd)
 {
-	struct sysfs_dirent *child;
-	int nr = 0;
-
-	for (child = sd->s_dir.children; child; child = child->s_sibling)
-		if (sysfs_type(child) == SYSFS_DIR)
-			nr++;
-
-	return nr + 2;
+	return sd->s_nr_children_dir + 2;
 }
 
 static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index af4c4e7..22fd1bc 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -9,6 +9,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/rbtree.h>
 
 struct sysfs_open_dirent;
 
@@ -16,7 +17,10 @@ struct sysfs_open_dirent;
 struct sysfs_elem_dir {
 	struct kobject		*kobj;
 	/* children list starts here and goes through sd->s_sibling */
+	
 	struct sysfs_dirent	*children;
+	struct sysfs_dirent	*children_tail;
+	struct rb_root		child_rb_root;
 };
 
 struct sysfs_elem_symlink {
@@ -52,6 +56,8 @@ struct sysfs_dirent {
 	atomic_t		s_active;
 	struct sysfs_dirent	*s_parent;
 	struct sysfs_dirent	*s_sibling;
+	struct sysfs_dirent	*s_sibling_prev;
+	struct rb_node		s_rb_node;
 	const char		*s_name;
 
 	union {
@@ -65,6 +71,8 @@ struct sysfs_dirent {
 	ino_t			s_ino;
 	umode_t			s_mode;
 	struct sysfs_inode_attrs *s_iattr;
+
+	int			s_nr_children_dir;
 };
 
 #define SD_DEACTIVATED_BIAS		INT_MIN

  reply	other threads:[~2009-10-29 23:38 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-17 22:18 [PATCH/RFC] make unregister_netdev() delete more than 4 interfaces per second Benjamin LaHaise
2009-10-18  4:26 ` Eric Dumazet
2009-10-18 16:13   ` Benjamin LaHaise
2009-10-18 17:51     ` Eric Dumazet
2009-10-18 18:21       ` Benjamin LaHaise
2009-10-18 19:36         ` Eric Dumazet
2009-10-21 12:39         ` Octavian Purdila
2009-10-21 15:40           ` [PATCH] net: allow netdev_wait_allrefs() to run faster Eric Dumazet
2009-10-21 16:09             ` Eric Dumazet
2009-10-21 16:51             ` Benjamin LaHaise
2009-10-21 19:54               ` Eric Dumazet
2009-10-29 23:07               ` Eric W. Biederman
2009-10-29 23:38                 ` Benjamin LaHaise [this message]
2009-10-30  1:45                   ` Eric W. Biederman
2009-10-30 14:35                     ` Benjamin LaHaise
2009-10-30 14:43                       ` Eric Dumazet
2009-10-30 23:25                       ` Eric W. Biederman
2009-10-30 23:53                         ` Benjamin LaHaise
2009-10-31  0:37                           ` Eric W. Biederman
2010-08-09 17:23                   ` Ben Greear
2010-08-09 17:34                     ` Benjamin LaHaise
2010-08-09 17:44                       ` Ben Greear
2010-08-09 17:48                         ` Benjamin LaHaise
2010-08-09 18:03                           ` Ben Greear
2010-08-09 19:59                       ` Eric W. Biederman
2010-08-09 21:03                         ` Benjamin LaHaise
2010-08-09 21:17                           ` Eric W. Biederman
2009-10-21 16:55             ` Octavian Purdila
2009-10-23 21:13             ` Paul E. McKenney
2009-10-24  4:35               ` Eric Dumazet
2009-10-24  5:49                 ` Paul E. McKenney
2009-10-24  8:49                   ` Eric Dumazet
2009-10-24 13:52                     ` Paul E. McKenney
2009-10-24 14:24                       ` Eric Dumazet
2009-10-24 14:46                         ` Paul E. McKenney
2009-10-24 23:49                         ` Octavian Purdila
2009-10-25  4:47                           ` Paul E. McKenney
2009-10-25  8:35                           ` Eric Dumazet
2009-10-25 15:19                             ` Octavian Purdila
2009-10-25 19:28                               ` Eric Dumazet
2009-10-24 20:22                 ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091029233848.GV3141@kvack.org \
    --to=bcrl@lhnet.ca \
    --cc=cratiu@ixiacom.com \
    --cc=ebiederm@xmission.com \
    --cc=eric.dumazet@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=opurdila@ixiacom.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.