From: Nick Piggin <npiggin@suse.de>
To: linux-fsdevel@vger.kernel.org
Subject: [patch] fs: avoid buffer_head
Date: Fri, 19 Mar 2010 18:16:23 +1100 [thread overview]
Message-ID: <20100319071623.GK25636@laptop> (raw)
In-Reply-To: <20100319071213.GJ25636@laptop>
This is the more interesting patch. I wonder if any filesystem people
have interesting requests of the callbacks? Also, I didn't put the
equivalent invalidate_inode_buffers callback in clear_inode because
I was hoping filesystems could do that. But maybe it is required in
some cases?
--
Introduce new address space operations sync and release, which can be used
by a filesystem to synchronize and release per-address_space private metadata.
They generalise sync_mapping_buffers, invalidate_inode_buffers, and
remove_inode_buffers calls, and get another step closer to divorcing
buffer heads from core mm/fs code.
---
fs/buffer.c | 4 ++--
fs/inode.c | 42 ++++++++++++++++++++++++++++++++----------
fs/libfs.c | 7 ++++++-
include/linux/buffer_head.h | 2 --
include/linux/fs.h | 35 +++++++++++++++++++++++++++++++++++
5 files changed, 75 insertions(+), 15 deletions(-)
Index: linux-2.6/fs/buffer.c
===================================================================
--- linux-2.6.orig/fs/buffer.c
+++ linux-2.6/fs/buffer.c
@@ -476,9 +476,9 @@ static void __remove_assoc_queue(struct
bh->b_assoc_map = NULL;
}
-int inode_has_buffers(struct inode *inode)
+static int inode_has_buffers(struct inode *inode)
{
- return !list_empty(&inode->i_data.private_list);
+ return mapping_has_private(&inode->i_data);
}
/*
Index: linux-2.6/include/linux/buffer_head.h
===================================================================
--- linux-2.6.orig/include/linux/buffer_head.h
+++ linux-2.6/include/linux/buffer_head.h
@@ -159,7 +159,6 @@ void end_buffer_async_write(struct buffe
/* Things to do with buffers at mapping->private_list */
void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode);
-int inode_has_buffers(struct inode *);
void invalidate_inode_buffers(struct inode *);
int remove_inode_buffers(struct inode *inode);
int sync_mapping_buffers(struct address_space *mapping);
@@ -335,7 +334,6 @@ extern int __set_page_dirty_buffers(stru
static inline void buffer_init(void) {}
static inline int try_to_free_buffers(struct page *page) { return 1; }
-static inline int inode_has_buffers(struct inode *inode) { return 0; }
static inline void invalidate_inode_buffers(struct inode *inode) {}
static inline int remove_inode_buffers(struct inode *inode) { return 1; }
static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
Index: linux-2.6/fs/inode.c
===================================================================
--- linux-2.6.orig/fs/inode.c
+++ linux-2.6/fs/inode.c
@@ -28,11 +28,11 @@
/*
* This is needed for the following functions:
- * - inode_has_buffers
* - invalidate_inode_buffers
- * - invalidate_bdev
+ * - remove_inode_buffers
*
* FIXME: remove all knowledge of the buffer layer from this file
+ * (by converting filesystems to ->release and ->sync aops)
*/
#include <linux/buffer_head.h>
@@ -224,7 +224,8 @@ static struct inode *alloc_inode(struct
void __destroy_inode(struct inode *inode)
{
- BUG_ON(inode_has_buffers(inode));
+ BUG_ON(mapping_has_private(&inode->i_data));
+ BUG_ON(inode->i_data.nrpages);
security_inode_free(inode);
fsnotify_inode_delete(inode);
#ifdef CONFIG_FS_POSIX_ACL
@@ -306,10 +307,15 @@ void __iget(struct inode *inode)
*/
void clear_inode(struct inode *inode)
{
+ struct address_space *mapping = &inode->i_data;
+
might_sleep();
- invalidate_inode_buffers(inode);
+ /* XXX: filesystems should invalidate this before calling */
+ if (!mapping->a_ops->release)
+ invalidate_inode_buffers(inode);
- BUG_ON(inode->i_data.nrpages);
+ BUG_ON(mapping_has_private(mapping));
+ BUG_ON(mapping->nrpages);
BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(inode->i_state & I_CLEAR);
inode_sync_wait(inode);
@@ -370,6 +376,7 @@ static int invalidate_list(struct list_h
for (;;) {
struct list_head *tmp = next;
struct inode *inode;
+ struct address_space *mapping;
/*
* We can reschedule here without worrying about the list's
@@ -385,7 +392,12 @@ static int invalidate_list(struct list_h
inode = list_entry(tmp, struct inode, i_sb_list);
if (inode->i_state & I_NEW)
continue;
- invalidate_inode_buffers(inode);
+ mapping = &inode->i_data;
+ if (!mapping->a_ops->release)
+ invalidate_inode_buffers(inode);
+ else
+ mapping->a_ops->release(mapping, AOP_RELEASE_FORCE);
+ BUG_ON(mapping_has_private(mapping));
if (!atomic_read(&inode->i_count)) {
list_move(&inode->i_list, dispose);
WARN_ON(inode->i_state & I_NEW);
@@ -429,13 +441,15 @@ EXPORT_SYMBOL(invalidate_inodes);
static int can_unuse(struct inode *inode)
{
+ struct address_space *mapping = &inode->i_data;
+
if (inode->i_state)
return 0;
- if (inode_has_buffers(inode))
+ if (mapping_has_private(mapping))
return 0;
if (atomic_read(&inode->i_count))
return 0;
- if (inode->i_data.nrpages)
+ if (mapping->nrpages)
return 0;
return 1;
}
@@ -464,6 +478,7 @@ static void prune_icache(int nr_to_scan)
spin_lock(&inode_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
struct inode *inode;
+ struct address_space *mapping;
if (list_empty(&inode_unused))
break;
@@ -474,10 +489,17 @@ static void prune_icache(int nr_to_scan)
list_move(&inode->i_list, &inode_unused);
continue;
}
- if (inode_has_buffers(inode) || inode->i_data.nrpages) {
+ mapping = &inode->i_data;
+ if (mapping_has_private(mapping) || mapping->nrpages) {
+ int ret;
+
__iget(inode);
spin_unlock(&inode_lock);
- if (remove_inode_buffers(inode))
+ if (mapping->a_ops->release)
+ ret = mapping->a_ops->release(mapping, 0);
+ else
+ ret = !remove_inode_buffers(inode);
+ if (ret)
reap += invalidate_mapping_pages(&inode->i_data,
0, -1);
iput(inode);
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h
+++ linux-2.6/include/linux/fs.h
@@ -569,6 +569,17 @@ typedef struct {
typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
unsigned long, unsigned long);
+/*
+ * Flags for address_space_operations.release operations.
+ */
+#define AOP_RELEASE_FORCE 0x01 /* Release dirty and in-use data */
+
+/*
+ * Flags for address_space_operations.sync operations.
+ */
+#define AOP_SYNC_WRITE 0x01 /* Begin writeout */
+#define AOP_SYNC_WAIT 0x02 /* Wait for started writeout */
+
struct address_space_operations {
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
@@ -604,6 +615,22 @@ struct address_space_operations {
int (*launder_page) (struct page *);
int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
unsigned long);
+ /*
+ * release_mapping releases any private data on the mapping so that
+ * it may be reclaimed.
+ * Second parameter is flags (see above).
+ * Returns 0 success, or -errno.
+ */
+ int (*release)(struct address_space *, unsigned int);
+
+ /*
+ * sync writes back and waits for any private data on the mapping,
+ * as a data consistency operation.
+ * Second parameter is flags (see above).
+ * Returns 0 success, or -errno.
+ */
+ int (*sync)(struct address_space *, unsigned int);
+
int (*error_remove_page)(struct address_space *, struct page *);
};
@@ -688,6 +715,14 @@ struct block_device {
int mapping_tagged(struct address_space *mapping, int tag);
/*
+ * Does this mapping have anything on its private list?
+ */
+static inline int mapping_has_private(struct address_space *mapping)
+{
+ return !list_empty(&mapping->private_list);
+}
+
+/*
* Might pages of this file be mapped into userspace?
*/
static inline int mapping_mapped(struct address_space *mapping)
Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c
+++ linux-2.6/fs/libfs.c
@@ -823,10 +823,15 @@ int simple_fsync(struct file *file, stru
.nr_to_write = 0, /* metadata-only; caller takes care of data */
};
struct inode *inode = dentry->d_inode;
+ struct address_space *mapping = inode->i_mapping;
int err;
int ret;
- ret = sync_mapping_buffers(inode->i_mapping);
+ if (!mapping->a_ops->sync)
+ ret = sync_mapping_buffers(mapping);
+ else
+ ret = mapping->a_ops->sync(mapping, AOP_SYNC_WRITE|AOP_SYNC_WAIT);
+
if (!(inode->i_state & I_DIRTY))
return ret;
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
prev parent reply other threads:[~2010-03-19 7:16 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-19 7:12 [patch] fs: move invalidate_bdev Nick Piggin
2010-03-19 7:16 ` Nick Piggin [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100319071623.GK25636@laptop \
--to=npiggin@suse.de \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).