public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch v2] epoll use a single inode ...
@ 2007-03-05 23:41 Davide Libenzi
  2007-03-06  0:00 ` Linus Torvalds
  0 siblings, 1 reply; 29+ messages in thread
From: Davide Libenzi @ 2007-03-05 23:41 UTC (permalink / raw)
  To: Linux Kernel Mailing List; +Cc: Linus Torvalds, Andrew Morton, Eric Dumazet


ChangeLog:

- v2) Properly size the buffer. Avoid extra strlen() (Eric Dumazet)

Epoll does not keep any private data attached to its inode, so there'd be 
no need to allocate one inode per fd. For epoll, the inode is just a 
placeholder for the file operations and could be shared by all instances.
I'd like to use the same optimization even for the upcoming file-based 
objects, so if you see problems let me know.
One that Al was pointing out was that an fstat(2) over an epoll fd would 
show the same st_ino. IMO that should be fine since an fstat(2) over an 
epoll fd is not something you want to do in any case and expecting 
meaningfull results.
This patch also avoids the dentry associated with the file* to be hashed
inside the global dentry hash.



Signed-off-by: Davide Libenzi <davidel@xmailserver.org>



- Davide



eventpoll.c |   39 +++++++++++++++++++++++++++++++++------
1 file changed, 33 insertions(+), 6 deletions(-)



Index: linux-2.6.20.ep2/fs/eventpoll.c
===================================================================
--- linux-2.6.20.ep2.orig/fs/eventpoll.c	2007-03-04 14:40:01.000000000 -0800
+++ linux-2.6.20.ep2/fs/eventpoll.c	2007-03-05 15:26:16.000000000 -0800
@@ -258,6 +258,7 @@
 		   int maxevents, long timeout);
 static int eventpollfs_delete_dentry(struct dentry *dentry);
 static struct inode *ep_eventpoll_inode(void);
+static struct inode *ep_create_inode(void);
 static int eventpollfs_get_sb(struct file_system_type *fs_type,
 			      int flags, const char *dev_name,
 			      void *data, struct vfsmount *mnt);
@@ -279,6 +280,9 @@
 /* Virtual fs used to allocate inodes for eventpoll files */
 static struct vfsmount *eventpoll_mnt __read_mostly;
 
+/* Placeholder inode for eventpoll fds */
+static struct inode *eventpoll_inode;
+
 /* File callbacks that implement the eventpoll file behaviour */
 static const struct file_operations eventpoll_fops = {
 	.release	= ep_eventpoll_close,
@@ -733,7 +737,7 @@
 		    struct eventpoll *ep)
 {
 	struct qstr this;
-	char name[32];
+	char name[2 * sizeof(void *) + 8];
 	struct dentry *dentry;
 	struct inode *inode;
 	struct file *file;
@@ -763,15 +767,17 @@
 	 * using the inode number.
 	 */
 	error = -ENOMEM;
-	sprintf(name, "[%lu]", inode->i_ino);
 	this.name = name;
-	this.len = strlen(name);
-	this.hash = inode->i_ino;
+	this.len = sprintf(name, "[%p]", ep);
+	this.hash = 0;
 	dentry = d_alloc(eventpoll_mnt->mnt_sb->s_root, &this);
 	if (!dentry)
 		goto eexit_4;
 	dentry->d_op = &eventpollfs_dentry_operations;
-	d_add(dentry, inode);
+	/* Do not publish this dentry inside the global dentry hash table */
+	dentry->d_flags &= ~DCACHE_UNHASHED;
+	d_instantiate(dentry, inode);
+
 	file->f_path.mnt = mntget(eventpoll_mnt);
 	file->f_path.dentry = dentry;
 	file->f_mapping = inode->i_mapping;
@@ -1555,6 +1561,11 @@
 
 static int eventpollfs_delete_dentry(struct dentry *dentry)
 {
+	/*
+	 * We faked vfs to believe the dentry was hashed when we created it.
+	 * Now we restore the flag so that dput() will work correctly.
+	 */
+	dentry->d_flags |= DCACHE_UNHASHED;
 
 	return 1;
 }
@@ -1562,6 +1573,17 @@
 
 static struct inode *ep_eventpoll_inode(void)
 {
+
+	return igrab(eventpoll_inode);
+}
+
+/*
+ * A single inode exist for all eventpoll files. On the contrary of pipes,
+ * eventpoll inodes has no per-instance data associated, so we can avoid
+ * the allocation of multiple of them.
+ */
+static struct inode *ep_create_inode(void)
+{
 	int error = -ENOMEM;
 	struct inode *inode = new_inode(eventpoll_mnt->mnt_sb);
 
@@ -1626,10 +1648,14 @@
 
 	/* Mount the above commented virtual file system */
 	eventpoll_mnt = kern_mount(&eventpoll_fs_type);
-	error = PTR_ERR(eventpoll_mnt);
 	if (IS_ERR(eventpoll_mnt))
 		goto epanic;
 
+	/* Create the single instance of inode for all eventpoll fds */
+	eventpoll_inode = ep_create_inode();
+	if (IS_ERR(eventpoll_inode))
+		goto epanic;
+
 	DNPRINTK(3, (KERN_INFO "[%p] eventpoll: successfully initialized.\n",
 			current));
 	return 0;
@@ -1642,6 +1668,7 @@
 static void __exit eventpoll_exit(void)
 {
 	/* Undo all operations done inside eventpoll_init() */
+	iput(eventpoll_inode);
 	unregister_filesystem(&eventpoll_fs_type);
 	mntput(eventpoll_mnt);
 	kmem_cache_destroy(pwq_cache);

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2007-03-10  8:24 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-05 23:41 [patch v2] epoll use a single inode Davide Libenzi
2007-03-06  0:00 ` Linus Torvalds
2007-03-06  0:12   ` Davide Libenzi
2007-03-06  0:20     ` Davide Libenzi
2007-03-06  0:25     ` Linus Torvalds
2007-03-06  2:25       ` H. Peter Anvin
2007-03-06  2:34         ` Davide Libenzi
2007-03-06  2:37           ` H. Peter Anvin
2007-03-06  2:43             ` Davide Libenzi
2007-03-06  6:22               ` Eric Dumazet
2007-03-06  6:31                 ` Davide Libenzi
2007-03-06  6:37                   ` Davide Libenzi
2007-03-06 16:28                 ` H. Peter Anvin
2007-03-06 17:09                   ` Linus Torvalds
2007-03-06 17:14                     ` H. Peter Anvin
2007-03-06 17:12                   ` Eric Dumazet
2007-03-06 17:19                     ` Linus Torvalds
2007-03-06 17:27                       ` H. Peter Anvin
2007-03-06 17:28                       ` Eric Dumazet
2007-03-06 18:10                         ` Eric Dumazet
2007-03-06 20:20                           ` Eric Dumazet
2007-03-07  3:47                             ` Linus Torvalds
2007-03-07  5:40                               ` H. Peter Anvin
2007-03-07  6:57                               ` Eric Dumazet
2007-03-07  7:13                                 ` H. Peter Anvin
2007-03-07 23:46                             ` Sami Farin
2007-03-06 18:10                   ` Davide Libenzi
2007-03-10  8:06     ` Pavel Machek
2007-03-10  8:24       ` Davide Libenzi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox