All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <dada1@cosmosbay.com>
To: Andi Kleen <ak@suse.de>
Cc: Benjamin LaHaise <bcrl@linux.intel.com>,
	"David S. Miller" <davem@davemloft.net>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH] struct file cleanup : the very large file_ra_state is now allocated only on demand.
Date: Thu, 18 Aug 2005 02:40:46 +0200	[thread overview]
Message-ID: <4303D90E.2030103@cosmosbay.com> (raw)
In-Reply-To: <20050817215357.GU3996@wotan.suse.de>

[-- Attachment #1: Type: text/plain, Size: 977 bytes --]

Andi Kleen a écrit :

> 
>>(because of the insane struct file_ra_state f_ra. I wish this structure 
>>were dynamically allocated only for files that really use it)
> 
> 
> How about you submit a patch for that instead? 
> 
> -Andi

OK, could you please comment this patch ?

The problem of dynamically allocating the readahead state data is that the allocation can fail and should not be fatal.
I made some choices that might be not good.

I also chose not to align "file_ra" slab on SLAB_HWCACHE_ALIGN because the object size is 10*sizeof(long), so alignment would loose 
6*sizeof(long) bytes for each object.


[PATCH]

* struct file cleanup : the very large file_ra_state is now allocated only on demand, using a dedicated "file_ra" slab.
	64bits machines handling lot of sockets can save about 72 bytes per file.
* private_data : The field is moved close to f_count and f_op fields to speedup sockfd_lookups

Thank you

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>




[-- Attachment #2: readahead.patch --]
[-- Type: text/plain, Size: 7652 bytes --]

--- linux-2.6.13-rc6/include/linux/fs.h	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/include/linux/fs.h	2005-08-18 01:33:04.000000000 +0200
@@ -586,20 +586,19 @@
 	struct dentry		*f_dentry;
 	struct vfsmount         *f_vfsmnt;
 	struct file_operations	*f_op;
+	void			*private_data;
 	atomic_t		f_count;
 	unsigned int 		f_flags;
 	mode_t			f_mode;
 	loff_t			f_pos;
 	struct fown_struct	f_owner;
 	unsigned int		f_uid, f_gid;
-	struct file_ra_state	f_ra;
+	struct file_ra_state	*f_rap;
 
 	size_t			f_maxcount;
 	unsigned long		f_version;
 	void			*f_security;
 
-	/* needed for tty driver, and maybe others */
-	void			*private_data;
 
 #ifdef CONFIG_EPOLL
 	/* Used by fs/eventpoll.c to link all the hooks to this file */
@@ -1514,8 +1513,7 @@
 extern void do_generic_mapping_read(struct address_space *mapping,
 				    struct file_ra_state *, struct file *,
 				    loff_t *, read_descriptor_t *, read_actor_t);
-extern void
-file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
+extern struct file_ra_state *file_ra_state_init(struct file *);
 extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb,
 	const struct iovec *iov, loff_t offset, unsigned long nr_segs);
 extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, 
@@ -1549,8 +1547,10 @@
 					read_descriptor_t * desc,
 					read_actor_t actor)
 {
+	if (filp->f_rap == NULL)
+		file_ra_state_init(filp);
 	do_generic_mapping_read(filp->f_mapping,
-				&filp->f_ra,
+				filp->f_rap,
 				filp,
 				ppos,
 				desc,
--- linux-2.6.13-rc6/include/linux/slab.h	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/include/linux/slab.h	2005-08-18 00:37:59.000000000 +0200
@@ -125,6 +125,7 @@
 extern kmem_cache_t	*names_cachep;
 extern kmem_cache_t	*files_cachep;
 extern kmem_cache_t	*filp_cachep;
+extern kmem_cache_t	*ra_cachep;
 extern kmem_cache_t	*fs_cachep;
 extern kmem_cache_t	*signal_cachep;
 extern kmem_cache_t	*sighand_cachep;
--- linux-2.6.13-rc6/mm/readahead.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/readahead.c	2005-08-18 01:14:11.000000000 +0200
@@ -29,14 +29,20 @@
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
 /*
- * Initialise a struct file's readahead state.  Assumes that the caller has
- * memset *ra to zero.
+ * Initialise a struct file's readahead state.
  */
-void
-file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
+struct file_ra_state *
+file_ra_state_init(struct file *file)
 {
-	ra->ra_pages = mapping->backing_dev_info->ra_pages;
-	ra->prev_page = -1;
+	struct file_ra_state *ra = kmem_cache_alloc(ra_cachep, GFP_KERNEL);
+
+	if (ra != NULL) {
+		file->f_rap = ra;
+		memset(ra, 0, sizeof(*ra));
+		ra->ra_pages = file->f_mapping->host->i_mapping->backing_dev_info->ra_pages;
+		ra->prev_page = -1;
+	}
+	return ra;
 }
 
 /*
--- linux-2.6.13-rc6/mm/filemap.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/filemap.c	2005-08-18 01:33:58.000000000 +0200
@@ -711,7 +711,7 @@
  * NULL.
  */
 void do_generic_mapping_read(struct address_space *mapping,
-			     struct file_ra_state *_ra,
+			     struct file_ra_state *_rap,
 			     struct file *filp,
 			     loff_t *ppos,
 			     read_descriptor_t *desc,
@@ -727,8 +727,15 @@
 	loff_t isize;
 	struct page *cached_page;
 	int error;
-	struct file_ra_state ra = *_ra;
+	struct file_ra_state ra;
 
+	if (likely(_rap != NULL))
+		ra = *_rap;
+	else {
+		memset(&ra, 0, sizeof(ra));
+		ra.prev_page = -1;
+		ra.ra_pages = default_backing_dev_info.ra_pages;
+	}
 	cached_page = NULL;
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	next_index = index;
@@ -908,7 +915,8 @@
 	}
 
 out:
-	*_ra = ra;
+	if (_rap != NULL)
+		*_rap = ra;
 
 	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
 	if (cached_page)
@@ -1187,12 +1195,15 @@
 	int error;
 	struct file *file = area->vm_file;
 	struct address_space *mapping = file->f_mapping;
-	struct file_ra_state *ra = &file->f_ra;
+	struct file_ra_state *ra = file->f_rap;
 	struct inode *inode = mapping->host;
 	struct page *page;
 	unsigned long size, pgoff;
 	int did_readaround = 0, majmin = VM_FAULT_MINOR;
 
+	if (ra == NULL)
+		ra = file_ra_state_init(file);
+
 	pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
 
 retry_all:
@@ -1225,14 +1236,16 @@
 			handle_ra_miss(mapping, ra, pgoff);
 			goto no_cached_page;
 		}
-		ra->mmap_miss++;
+		if (ra != NULL) {
+			ra->mmap_miss++;
 
-		/*
-		 * Do we miss much more than hit in this file? If so,
-		 * stop bothering with read-ahead. It will only hurt.
-		 */
-		if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
-			goto no_cached_page;
+			/*
+			 * Do we miss much more than hit in this file? If so,
+			 * stop bothering with read-ahead. It will only hurt.
+			 */
+			if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
+				goto no_cached_page;
+		}
 
 		/*
 		 * To keep the pgmajfault counter straight, we need to
@@ -1243,7 +1256,7 @@
 			inc_page_state(pgmajfault);
 		}
 		did_readaround = 1;
-		ra_pages = max_sane_readahead(file->f_ra.ra_pages);
+		ra_pages = max_sane_readahead(ra != NULL ? ra->ra_pages : default_backing_dev_info.ra_pages);
 		if (ra_pages) {
 			pgoff_t start = 0;
 
@@ -1256,7 +1269,7 @@
 			goto no_cached_page;
 	}
 
-	if (!did_readaround)
+	if (!did_readaround && ra != NULL)
 		ra->mmap_hit++;
 
 	/*
--- linux-2.6.13-rc6/mm/fadvise.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/mm/fadvise.c	2005-08-18 01:05:15.000000000 +0200
@@ -28,6 +28,7 @@
 	struct file *file = fget(fd);
 	struct address_space *mapping;
 	struct backing_dev_info *bdi;
+	struct file_ra_state *ra;
 	loff_t endbyte;
 	pgoff_t start_index;
 	pgoff_t end_index;
@@ -54,15 +55,20 @@
 
 	bdi = mapping->backing_dev_info;
 
+	if ((ra = file->f_rap) == NULL)
+		ra = file_ra_state_init(file);
 	switch (advice) {
 	case POSIX_FADV_NORMAL:
-		file->f_ra.ra_pages = bdi->ra_pages;
+		if (ra != NULL)
+			ra->ra_pages = bdi->ra_pages;
 		break;
 	case POSIX_FADV_RANDOM:
-		file->f_ra.ra_pages = 0;
+		if (ra != NULL)
+			ra->ra_pages = 0;
 		break;
 	case POSIX_FADV_SEQUENTIAL:
-		file->f_ra.ra_pages = bdi->ra_pages * 2;
+		if (ra != NULL)
+			ra->ra_pages = bdi->ra_pages * 2;
 		break;
 	case POSIX_FADV_WILLNEED:
 	case POSIX_FADV_NOREUSE:
--- linux-2.6.13-rc6/fs/open.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/open.c	2005-08-18 01:05:15.000000000 +0200
@@ -804,7 +804,7 @@
 	}
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
-	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
+	f->f_rap = NULL;
 
 	/* NB: we're sure to have correct a_ops only after f_op->open */
 	if (f->f_flags & O_DIRECT) {
--- linux-2.6.13-rc6/fs/file_table.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/file_table.c	2005-08-18 00:37:59.000000000 +0200
@@ -55,6 +55,8 @@
 
 static inline void file_free(struct file *f)
 {
+	if (f->f_rap != NULL)
+		kmem_cache_free(ra_cachep, f->f_rap);
 	kmem_cache_free(filp_cachep, f);
 }
 
--- linux-2.6.13-rc6/fs/dcache.c	2005-08-07 20:18:56.000000000 +0200
+++ linux-2.6.13-rc6-ed/fs/dcache.c	2005-08-18 02:22:56.000000000 +0200
@@ -1705,6 +1705,8 @@
 
 /* SLAB cache for file structures */
 kmem_cache_t *filp_cachep;
+/* SLAB cache for ra structures */
+kmem_cache_t *ra_cachep;
 
 EXPORT_SYMBOL(d_genocide);
 
@@ -1733,6 +1735,9 @@
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor);
 
+	ra_cachep = kmem_cache_create("file_ra", sizeof(struct file_ra_state), 0,
+			SLAB_PANIC, NULL, NULL);
+
 	dcache_init(mempages);
 	inode_init(mempages);
 	files_init(mempages);

       reply	other threads:[~2005-08-18  0:41 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20050810164655.GB4162@linux.intel.com>
     [not found] ` <20050810.135306.79296985.davem@davemloft.net>
     [not found]   ` <20050810211737.GA21581@linux.intel.com>
     [not found]     ` <430391F1.9080900@cosmosbay.com>
     [not found]       ` <20050817211829.GK27628@wotan.suse.de>
     [not found]         ` <4303AEC4.3060901@cosmosbay.com>
     [not found]           ` <20050817215357.GU3996@wotan.suse.de>
2005-08-18  0:40             ` Eric Dumazet [this message]
2005-08-18  1:05               ` [PATCH] struct file cleanup : the very large file_ra_state is now allocated only on demand Andi Kleen
2005-08-18  2:43                 ` David S. Miller
2005-08-18  7:14                   ` Eric Dumazet
2005-08-18  7:18                     ` David S. Miller
2005-08-18  2:52                 ` Nick Piggin
2005-08-18  2:57                   ` Andi Kleen
2005-08-18  3:00                     ` Nick Piggin
2005-08-18  1:39               ` Coywolf Qi Hunt
2005-08-18  6:51                 ` Eric Dumazet
2005-08-18  9:05               ` [PATCH] Put the very large file_ra_state outside of 'struct file' Eric Dumazet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4303D90E.2030103@cosmosbay.com \
    --to=dada1@cosmosbay.com \
    --cc=ak@suse.de \
    --cc=bcrl@linux.intel.com \
    --cc=davem@davemloft.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.