linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 00/17] Shrink inode size
@ 2014-05-02 13:19 Jan Kara
  2014-05-02 13:19 ` [PATCH 01/17] fs: Remove i_devices from inode Jan Kara
                   ` (16 more replies)
  0 siblings, 17 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:19 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

  Hello,

  So I've come across a couple of relatively scarcely used fields in
struct inode and struct address_space and decided it might be time to
do a spring cleaning. This patch series is composed of several relatively
independent parts:

1) Patch 1 - remove i_devices list head from inode (save 2 longs)
2) Patch 2 - better union fields in struct inode   (save 1 long)
3) Patches 3-6 - remove i_mapping.private_data     (save 1 long)
4) Patches 7-15 - move i_mapping.private_list to fs private part of inode
						   (save 2 longs for quite some
						    filesystems)

There are other savings possible (like moving i_dquot fields to fs private
part of inode for filesystems that need it) but for now I'd like to hear
about what people think about the patches I wrote so far. Comments welcome.

								Honza

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH 01/17] fs: Remove i_devices from inode
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
@ 2014-05-02 13:19 ` Jan Kara
  2014-05-02 14:42   ` Christoph Hellwig
  2014-05-02 13:19 ` [PATCH 02/17] fs: Save pointer in struct inode by better unioning Jan Kara
                   ` (15 subsequent siblings)
  16 siblings, 1 reply; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:19 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Struct inode has an i_devices list head which is used when inode
represents a block device or a character device for tracking all inodes
referencing that device. The only use of that tracking is to remove
references to the device from inodes when the device is released.
However both block device and character device code take reference to
the device together with referencing it from an inode (block device
code takes inode reference, character device code kobj reference) thus
device cannot be released while there are any inodes referencing it.

Remove the useless code and i_devices from struct inode.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/block_dev.c       | 17 +++--------------
 fs/char_dev.c        | 19 -------------------
 fs/inode.c           |  1 -
 include/linux/cdev.h |  1 -
 include/linux/fs.h   |  2 --
 5 files changed, 3 insertions(+), 37 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1e86823a9cbd..01fc44cd9e7d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -398,7 +398,6 @@ static void init_once(void *foo)
 
 	memset(bdev, 0, sizeof(*bdev));
 	mutex_init(&bdev->bd_mutex);
-	INIT_LIST_HEAD(&bdev->bd_inodes);
 	INIT_LIST_HEAD(&bdev->bd_list);
 #ifdef CONFIG_SYSFS
 	INIT_LIST_HEAD(&bdev->bd_holder_disks);
@@ -408,24 +407,14 @@ static void init_once(void *foo)
 	mutex_init(&bdev->bd_fsfreeze_mutex);
 }
 
-static inline void __bd_forget(struct inode *inode)
-{
-	list_del_init(&inode->i_devices);
-	inode->i_bdev = NULL;
-	inode->i_mapping = &inode->i_data;
-}
-
 static void bdev_evict_inode(struct inode *inode)
 {
 	struct block_device *bdev = &BDEV_I(inode)->bdev;
-	struct list_head *p;
+
 	truncate_inode_pages(&inode->i_data, 0);
 	invalidate_inode_buffers(inode); /* is it needed here? */
 	clear_inode(inode);
 	spin_lock(&bdev_lock);
-	while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
-		__bd_forget(list_entry(p, struct inode, i_devices));
-	}
 	list_del_init(&bdev->bd_list);
 	spin_unlock(&bdev_lock);
 }
@@ -585,7 +574,6 @@ static struct block_device *bd_acquire(struct inode *inode)
 			ihold(bdev->bd_inode);
 			inode->i_bdev = bdev;
 			inode->i_mapping = bdev->bd_inode->i_mapping;
-			list_add(&inode->i_devices, &bdev->bd_inodes);
 		}
 		spin_unlock(&bdev_lock);
 	}
@@ -606,7 +594,8 @@ void bd_forget(struct inode *inode)
 	spin_lock(&bdev_lock);
 	if (!sb_is_blkdev_sb(inode->i_sb))
 		bdev = inode->i_bdev;
-	__bd_forget(inode);
+	inode->i_bdev = NULL;
+	inode->i_mapping = &inode->i_data;
 	spin_unlock(&bdev_lock);
 
 	if (bdev)
diff --git a/fs/char_dev.c b/fs/char_dev.c
index f77f7702fabe..883208f2f211 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -389,7 +389,6 @@ static int chrdev_open(struct inode *inode, struct file *filp)
 		p = inode->i_cdev;
 		if (!p) {
 			inode->i_cdev = p = new;
-			list_add(&inode->i_devices, &p->list);
 			new = NULL;
 		} else if (!cdev_get(p))
 			ret = -ENXIO;
@@ -422,23 +421,10 @@ static int chrdev_open(struct inode *inode, struct file *filp)
 void cd_forget(struct inode *inode)
 {
 	spin_lock(&cdev_lock);
-	list_del_init(&inode->i_devices);
 	inode->i_cdev = NULL;
 	spin_unlock(&cdev_lock);
 }
 
-static void cdev_purge(struct cdev *cdev)
-{
-	spin_lock(&cdev_lock);
-	while (!list_empty(&cdev->list)) {
-		struct inode *inode;
-		inode = container_of(cdev->list.next, struct inode, i_devices);
-		list_del_init(&inode->i_devices);
-		inode->i_cdev = NULL;
-	}
-	spin_unlock(&cdev_lock);
-}
-
 /*
  * Dummy default file-operations: the only thing this does
  * is contain the open that then fills in the correct operations
@@ -509,10 +495,8 @@ void cdev_del(struct cdev *p)
 
 static void cdev_default_release(struct kobject *kobj)
 {
-	struct cdev *p = container_of(kobj, struct cdev, kobj);
 	struct kobject *parent = kobj->parent;
 
-	cdev_purge(p);
 	kobject_put(parent);
 }
 
@@ -521,7 +505,6 @@ static void cdev_dynamic_release(struct kobject *kobj)
 	struct cdev *p = container_of(kobj, struct cdev, kobj);
 	struct kobject *parent = kobj->parent;
 
-	cdev_purge(p);
 	kfree(p);
 	kobject_put(parent);
 }
@@ -543,7 +526,6 @@ struct cdev *cdev_alloc(void)
 {
 	struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL);
 	if (p) {
-		INIT_LIST_HEAD(&p->list);
 		kobject_init(&p->kobj, &ktype_cdev_dynamic);
 	}
 	return p;
@@ -560,7 +542,6 @@ struct cdev *cdev_alloc(void)
 void cdev_init(struct cdev *cdev, const struct file_operations *fops)
 {
 	memset(cdev, 0, sizeof *cdev);
-	INIT_LIST_HEAD(&cdev->list);
 	kobject_init(&cdev->kobj, &ktype_cdev_default);
 	cdev->ops = fops;
 }
diff --git a/fs/inode.c b/fs/inode.c
index 4bcdad3c9361..e89be4c4c9f4 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -365,7 +365,6 @@ void inode_init_once(struct inode *inode)
 {
 	memset(inode, 0, sizeof(*inode));
 	INIT_HLIST_NODE(&inode->i_hash);
-	INIT_LIST_HEAD(&inode->i_devices);
 	INIT_LIST_HEAD(&inode->i_wb_list);
 	INIT_LIST_HEAD(&inode->i_lru);
 	address_space_init_once(&inode->i_data);
diff --git a/include/linux/cdev.h b/include/linux/cdev.h
index fb4591977b03..fe00138b5106 100644
--- a/include/linux/cdev.h
+++ b/include/linux/cdev.h
@@ -13,7 +13,6 @@ struct cdev {
 	struct kobject kobj;
 	struct module *owner;
 	const struct file_operations *ops;
-	struct list_head list;
 	dev_t dev;
 	unsigned int count;
 };
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 23b2a35d712e..ac6e06a09980 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -440,7 +440,6 @@ struct block_device {
 	struct inode *		bd_inode;	/* will die */
 	struct super_block *	bd_super;
 	struct mutex		bd_mutex;	/* open/close mutex */
-	struct list_head	bd_inodes;
 	void *			bd_claiming;
 	void *			bd_holder;
 	int			bd_holders;
@@ -595,7 +594,6 @@ struct inode {
 #ifdef CONFIG_QUOTA
 	struct dquot		*i_dquot[MAXQUOTAS];
 #endif
-	struct list_head	i_devices;
 	union {
 		struct pipe_inode_info	*i_pipe;
 		struct block_device	*i_bdev;
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 02/17] fs: Save pointer in struct inode by better unioning
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
  2014-05-02 13:19 ` [PATCH 01/17] fs: Remove i_devices from inode Jan Kara
@ 2014-05-02 13:19 ` Jan Kara
  2014-05-02 13:19 ` [PATCH 03/17] hugetlbfs: Use own list instead of mapping->private_list Jan Kara
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:19 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

i_rcu (struct rcu_head) has two pointers but it is unioned with i_dentry
(struct hlist_head) which has only one pointer. Union i_rcu with
i_sb_list instead and thus safe one pointer in struct inode.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/porting | 5 -----
 fs/inode.c                        | 3 ++-
 include/linux/fs.h                | 4 ++--
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index fe2b7ae6f962..26ce6cd742c6 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -354,11 +354,6 @@ protects *all* the dcache state of a given dentry.
 via rcu-walk path walk (basically, if the file can have had a path name in the
 vfs namespace).
 
-	Even though i_dentry and i_rcu share storage in a union, we will
-initialize the former in inode_init_always(), so just leave it alone in
-the callback.  It used to be necessary to clean it there, but not anymore
-(starting at 3.2).
-
 --
 [recommended]
 	vfs now tries to do path walking in "rcu-walk mode", which avoids
diff --git a/fs/inode.c b/fs/inode.c
index e89be4c4c9f4..1b888eb524ca 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -183,7 +183,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	}
 	inode->i_private = NULL;
 	inode->i_mapping = mapping;
-	INIT_HLIST_HEAD(&inode->i_dentry);	/* buggered by rcu freeing */
+	INIT_LIST_HEAD(&inode->i_sb_list);	/* buggered by rcu freeing */
 #ifdef CONFIG_FS_POSIX_ACL
 	inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
 #endif
@@ -258,6 +258,7 @@ static void i_callback(struct rcu_head *head)
 static void destroy_inode(struct inode *inode)
 {
 	BUG_ON(!list_empty(&inode->i_lru));
+	BUG_ON(!list_empty(&inode->i_sb_list));
 	__destroy_inode(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ac6e06a09980..b76720852f5f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -579,9 +579,9 @@ struct inode {
 	struct hlist_node	i_hash;
 	struct list_head	i_wb_list;	/* backing dev IO list */
 	struct list_head	i_lru;		/* inode LRU list */
-	struct list_head	i_sb_list;
+	struct hlist_head	i_dentry;
 	union {
-		struct hlist_head	i_dentry;
+		struct list_head	i_sb_list;
 		struct rcu_head		i_rcu;
 	};
 	u64			i_version;
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 03/17] hugetlbfs: Use own list instead of mapping->private_list
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
  2014-05-02 13:19 ` [PATCH 01/17] fs: Remove i_devices from inode Jan Kara
  2014-05-02 13:19 ` [PATCH 02/17] fs: Save pointer in struct inode by better unioning Jan Kara
@ 2014-05-02 13:19 ` Jan Kara
  2014-05-02 13:19 ` [PATCH 04/17] aio: Use i_private instead of mapping->private_data Jan Kara
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:19 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Use list head in hugetlbfs inode instead of mapping->private_list since
that will go away.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/hugetlbfs/inode.c |  8 +++++++-
 mm/hugetlb.c         | 14 +++++++-------
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d19b30ababf1..d345d0f4873a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -52,6 +52,7 @@ struct hugetlbfs_config {
 
 struct hugetlbfs_inode_info {
 	struct shared_policy policy;
+	struct list_head region_list;
 	struct inode vfs_inode;
 };
 
@@ -60,6 +61,11 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
 	return container_of(inode, struct hugetlbfs_inode_info, vfs_inode);
 }
 
+struct list_head *hugetlb_region_list(struct inode *inode)
+{
+	return &HUGETLBFS_I(inode)->region_list;
+}
+
 static struct backing_dev_info hugetlbfs_backing_dev_info = {
 	.name		= "hugetlbfs",
 	.ra_pages	= 0,	/* No readahead */
@@ -487,7 +493,6 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
 		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-		INIT_LIST_HEAD(&inode->i_mapping->private_list);
 		info = HUGETLBFS_I(inode);
 		/*
 		 * The policy is initialized here even if we are creating a
@@ -710,6 +715,7 @@ static void init_once(void *foo)
 	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
 
 	inode_init_once(&ei->vfs_inode);
+	INIT_LIST_HEAD(&ei->region_list);
 }
 
 const struct file_operations hugetlbfs_file_operations = {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c01cb9fedb18..046a93e1df23 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -47,6 +47,8 @@ static struct hstate * __initdata parsed_hstate;
 static unsigned long __initdata default_hstate_max_huge_pages;
 static unsigned long __initdata default_hstate_size;
 
+extern struct list_head *hugetlb_region_list(struct inode *inode);
+
 /*
  * Protects updates to hugepage_freelists, hugepage_activelist, nr_huge_pages,
  * free_huge_pages, and surplus_huge_pages.
@@ -1155,9 +1157,7 @@ static long vma_needs_reservation(struct hstate *h,
 
 	if (vma->vm_flags & VM_MAYSHARE) {
 		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
-		return region_chg(&inode->i_mapping->private_list,
-							idx, idx + 1);
-
+		return region_chg(hugetlb_region_list(inode), idx, idx + 1);
 	} else if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
 		return 1;
 
@@ -1180,7 +1180,7 @@ static void vma_commit_reservation(struct hstate *h,
 
 	if (vma->vm_flags & VM_MAYSHARE) {
 		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
-		region_add(&inode->i_mapping->private_list, idx, idx + 1);
+		region_add(hugetlb_region_list(inode), idx, idx + 1);
 
 	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
 		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
@@ -3177,7 +3177,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 	 * called to make the mapping read-write. Assume !vma is a shm mapping
 	 */
 	if (!vma || vma->vm_flags & VM_MAYSHARE)
-		chg = region_chg(&inode->i_mapping->private_list, from, to);
+		chg = region_chg(hugetlb_region_list(inode), from, to);
 	else {
 		struct resv_map *resv_map = resv_map_alloc();
 		if (!resv_map)
@@ -3222,7 +3222,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 	 * else has to be done for private mappings here
 	 */
 	if (!vma || vma->vm_flags & VM_MAYSHARE)
-		region_add(&inode->i_mapping->private_list, from, to);
+		region_add(hugetlb_region_list(inode), from, to);
 	return 0;
 out_err:
 	if (vma)
@@ -3233,7 +3233,7 @@ out_err:
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 {
 	struct hstate *h = hstate_inode(inode);
-	long chg = region_truncate(&inode->i_mapping->private_list, offset);
+	long chg = region_truncate(hugetlb_region_list(inode), offset);
 	struct hugepage_subpool *spool = subpool_inode(inode);
 
 	spin_lock(&inode->i_lock);
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 04/17] aio: Use i_private instead of mapping->private_data
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (2 preceding siblings ...)
  2014-05-02 13:19 ` [PATCH 03/17] hugetlbfs: Use own list instead of mapping->private_list Jan Kara
@ 2014-05-02 13:19 ` Jan Kara
  2014-05-02 13:19 ` [PATCH 05/17] virtio_balloon: Store mapping directly in balloon_dev_info Jan Kara
                   ` (12 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:19 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Use inode->i_private to store pointer to aio context instead of
inode->i_mapping->private_data as that's going away.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/aio.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 062a5f6a1448..a906e5f4b688 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -168,7 +168,7 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
 		return ERR_CAST(inode);
 
 	inode->i_mapping->a_ops = &aio_ctx_aops;
-	inode->i_mapping->private_data = ctx;
+	inode->i_private = ctx;
 	inode->i_size = PAGE_SIZE * nr_pages;
 
 	path.dentry = d_alloc_pseudo(aio_mnt->mnt_sb, &this);
@@ -231,7 +231,7 @@ static void put_aio_ring_file(struct kioctx *ctx)
 
 		/* Prevent further access to the kioctx from migratepages */
 		spin_lock(&aio_ring_file->f_inode->i_mapping->private_lock);
-		aio_ring_file->f_inode->i_mapping->private_data = NULL;
+		aio_ring_file->f_inode->i_private = NULL;
 		ctx->aio_ring_file = NULL;
 		spin_unlock(&aio_ring_file->f_inode->i_mapping->private_lock);
 
@@ -289,7 +289,7 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 
 	/* Make sure the old page hasn't already been changed */
 	spin_lock(&mapping->private_lock);
-	ctx = mapping->private_data;
+	ctx = mapping->host->i_private;
 	if (ctx) {
 		pgoff_t idx;
 		spin_lock_irqsave(&ctx->completion_lock, flags);
@@ -318,11 +318,10 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
 	}
 
 	/* We can potentially race against kioctx teardown here.  Use the
-	 * address_space's private data lock to protect the mapping's
-	 * private_data.
+	 * address_space's private data lock to protect inode's i_private.
 	 */
 	spin_lock(&mapping->private_lock);
-	ctx = mapping->private_data;
+	ctx = mapping->host->i_private;
 	if (ctx) {
 		pgoff_t idx;
 		spin_lock_irqsave(&ctx->completion_lock, flags);
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 05/17] virtio_balloon: Store mapping directly in balloon_dev_info
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (3 preceding siblings ...)
  2014-05-02 13:19 ` [PATCH 04/17] aio: Use i_private instead of mapping->private_data Jan Kara
@ 2014-05-02 13:19 ` Jan Kara
  2014-05-02 13:19 ` [PATCH 06/17] fs: Don't store backing dev mapping in inode->i_mapping->private_data Jan Kara
                   ` (11 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:19 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Currently mapping for balloon_dev_info is allocated separately. That
saves some memory in case CONFIG_BALLOON_COMPACTION is disabled but
it creates a need to point from struct address_space to
balloon_dev_info. Just embed struct address_space into struct
balloon_dev_info so that we don't have to use mapping->private_data
which is going away.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 drivers/virtio/virtio_balloon.c    | 22 +----------
 include/linux/balloon_compaction.h | 39 ++------------------
 mm/balloon_compaction.c            | 75 +++++++++++---------------------------
 3 files changed, 28 insertions(+), 108 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 34bdabaecbd6..3620df32b71e 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -425,7 +425,6 @@ static const struct address_space_operations virtio_balloon_aops = {
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
-	struct address_space *vb_mapping;
 	struct balloon_dev_info *vb_devinfo;
 	int err;
 
@@ -442,30 +441,16 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	vb->vdev = vdev;
 	vb->need_stats_update = 0;
 
-	vb_devinfo = balloon_devinfo_alloc(vb);
+	vb_devinfo = balloon_devinfo_alloc(vb, &virtio_balloon_aops);
 	if (IS_ERR(vb_devinfo)) {
 		err = PTR_ERR(vb_devinfo);
 		goto out_free_vb;
 	}
-
-	vb_mapping = balloon_mapping_alloc(vb_devinfo,
-					   (balloon_compaction_check()) ?
-					   &virtio_balloon_aops : NULL);
-	if (IS_ERR(vb_mapping)) {
-		/*
-		 * IS_ERR(vb_mapping) && PTR_ERR(vb_mapping) == -EOPNOTSUPP
-		 * This means !CONFIG_BALLOON_COMPACTION, otherwise we get off.
-		 */
-		err = PTR_ERR(vb_mapping);
-		if (err != -EOPNOTSUPP)
-			goto out_free_vb_devinfo;
-	}
-
 	vb->vb_dev_info = vb_devinfo;
 
 	err = init_vqs(vb);
 	if (err)
-		goto out_free_vb_mapping;
+		goto out_free_vb_devinfo;
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
@@ -477,8 +462,6 @@ static int virtballoon_probe(struct virtio_device *vdev)
 
 out_del_vqs:
 	vdev->config->del_vqs(vdev);
-out_free_vb_mapping:
-	balloon_mapping_free(vb_mapping);
 out_free_vb_devinfo:
 	balloon_devinfo_free(vb_devinfo);
 out_free_vb:
@@ -506,7 +489,6 @@ static void virtballoon_remove(struct virtio_device *vdev)
 
 	kthread_stop(vb->thread);
 	remove_common(vb);
-	balloon_mapping_free(vb->vb_dev_info->mapping);
 	balloon_devinfo_free(vb->vb_dev_info);
 	kfree(vb);
 }
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 089743ade734..a11eb723561b 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -54,8 +54,8 @@
  * balloon driver as a page book-keeper for its registered balloon devices.
  */
 struct balloon_dev_info {
+	struct address_space mapping;	/* balloon special page->mapping */
 	void *balloon_device;		/* balloon device descriptor */
-	struct address_space *mapping;	/* balloon special page->mapping */
 	unsigned long isolated_pages;	/* # of isolated pages for migration */
 	spinlock_t pages_lock;		/* Protection to pages list */
 	struct list_head pages;		/* Pages enqueued & handled to Host */
@@ -64,7 +64,8 @@ struct balloon_dev_info {
 extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info);
 extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info);
 extern struct balloon_dev_info *balloon_devinfo_alloc(
-						void *balloon_dev_descriptor);
+				void *balloon_dev_descriptor,
+				const struct address_space_operations *a_ops);
 
 static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
 {
@@ -98,14 +99,6 @@ extern bool balloon_page_isolate(struct page *page);
 extern void balloon_page_putback(struct page *page);
 extern int balloon_page_migrate(struct page *newpage,
 				struct page *page, enum migrate_mode mode);
-extern struct address_space
-*balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
-			const struct address_space_operations *a_ops);
-
-static inline void balloon_mapping_free(struct address_space *balloon_mapping)
-{
-	kfree(balloon_mapping);
-}
 
 /*
  * page_flags_cleared - helper to perform balloon @page ->flags tests.
@@ -216,11 +209,7 @@ static inline void balloon_page_delete(struct page *page)
  */
 static inline struct balloon_dev_info *balloon_page_device(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
-	if (likely(mapping))
-		return mapping->private_data;
-
-	return NULL;
+	return container_of(page->mapping, struct balloon_dev_info, mapping);
 }
 
 static inline gfp_t balloon_mapping_gfp_mask(void)
@@ -228,24 +217,8 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 	return GFP_HIGHUSER_MOVABLE;
 }
 
-static inline bool balloon_compaction_check(void)
-{
-	return true;
-}
-
 #else /* !CONFIG_BALLOON_COMPACTION */
 
-static inline void *balloon_mapping_alloc(void *balloon_device,
-				const struct address_space_operations *a_ops)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void balloon_mapping_free(struct address_space *balloon_mapping)
-{
-	return;
-}
-
 static inline void balloon_page_insert(struct page *page,
 				       struct address_space *mapping,
 				       struct list_head *head)
@@ -289,9 +262,5 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 	return GFP_HIGHUSER;
 }
 
-static inline bool balloon_compaction_check(void)
-{
-	return false;
-}
 #endif /* CONFIG_BALLOON_COMPACTION */
 #endif /* _LINUX_BALLOON_COMPACTION_H */
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 6e45a5074bf0..e5ddd28a6915 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -19,15 +19,30 @@
  * struct balloon_dev_info which will be used to reference a balloon device
  * as well as to keep track of the balloon device page list.
  */
-struct balloon_dev_info *balloon_devinfo_alloc(void *balloon_dev_descriptor)
+struct balloon_dev_info *balloon_devinfo_alloc(void *balloon_dev_descriptor,
+				const struct address_space_operations *a_ops)
 {
 	struct balloon_dev_info *b_dev_info;
 	b_dev_info = kmalloc(sizeof(*b_dev_info), GFP_KERNEL);
 	if (!b_dev_info)
 		return ERR_PTR(-ENOMEM);
 
+	/*
+	 * Give a clean 'zeroed' status to all elements of this special
+	 * balloon page->mapping struct address_space instance.
+	 */
+	address_space_init_once(&b_dev_info->mapping);
+
+	/*
+	 * Set mapping->flags appropriately, to allow balloon pages
+	 * ->mapping identification.
+	 */
+	mapping_set_balloon(&b_dev_info->mapping);
+	mapping_set_gfp_mask(&b_dev_info->mapping, balloon_mapping_gfp_mask());
+
+	/* balloon's page->mapping->a_ops callback descriptor */
+	b_dev_info->mapping.a_ops = a_ops;
 	b_dev_info->balloon_device = balloon_dev_descriptor;
-	b_dev_info->mapping = NULL;
 	b_dev_info->isolated_pages = 0;
 	spin_lock_init(&b_dev_info->pages_lock);
 	INIT_LIST_HEAD(&b_dev_info->pages);
@@ -61,7 +76,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
 	 */
 	BUG_ON(!trylock_page(page));
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
-	balloon_page_insert(page, b_dev_info->mapping, &b_dev_info->pages);
+	balloon_page_insert(page, &b_dev_info->mapping, &b_dev_info->pages);
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 	unlock_page(page);
 	return page;
@@ -132,60 +147,14 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
 EXPORT_SYMBOL_GPL(balloon_page_dequeue);
 
 #ifdef CONFIG_BALLOON_COMPACTION
-/*
- * balloon_mapping_alloc - allocates a special ->mapping for ballooned pages.
- * @b_dev_info: holds the balloon device information descriptor.
- * @a_ops: balloon_mapping address_space_operations descriptor.
- *
- * Driver must call it to properly allocate and initialize an instance of
- * struct address_space which will be used as the special page->mapping for
- * balloon device enlisted page instances.
- */
-struct address_space *balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
-				const struct address_space_operations *a_ops)
+static inline struct balloon_dev_info *page_balloon_dev(struct page *page)
 {
-	struct address_space *mapping;
-
-	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
-	if (!mapping)
-		return ERR_PTR(-ENOMEM);
-
-	/*
-	 * Give a clean 'zeroed' status to all elements of this special
-	 * balloon page->mapping struct address_space instance.
-	 */
-	address_space_init_once(mapping);
-
-	/*
-	 * Set mapping->flags appropriately, to allow balloon pages
-	 * ->mapping identification.
-	 */
-	mapping_set_balloon(mapping);
-	mapping_set_gfp_mask(mapping, balloon_mapping_gfp_mask());
-
-	/* balloon's page->mapping->a_ops callback descriptor */
-	mapping->a_ops = a_ops;
-
-	/*
-	 * Establish a pointer reference back to the balloon device descriptor
-	 * this particular page->mapping will be servicing.
-	 * This is used by compaction / migration procedures to identify and
-	 * access the balloon device pageset while isolating / migrating pages.
-	 *
-	 * As some balloon drivers can register multiple balloon devices
-	 * for a single guest, this also helps compaction / migration to
-	 * properly deal with multiple balloon pagesets, when required.
-	 */
-	mapping->private_data = b_dev_info;
-	b_dev_info->mapping = mapping;
-
-	return mapping;
+	return container_of(page->mapping, struct balloon_dev_info, mapping);
 }
-EXPORT_SYMBOL_GPL(balloon_mapping_alloc);
 
 static inline void __isolate_balloon_page(struct page *page)
 {
-	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
+	struct balloon_dev_info *b_dev_info = page_balloon_device(page);
 	unsigned long flags;
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 	list_del(&page->lru);
@@ -195,7 +164,7 @@ static inline void __isolate_balloon_page(struct page *page)
 
 static inline void __putback_balloon_page(struct page *page)
 {
-	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
+	struct balloon_dev_info *b_dev_info = page_balloon_device(page);
 	unsigned long flags;
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 	list_add(&page->lru, &b_dev_info->pages);
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 06/17] fs: Don't store backing dev mapping in inode->i_mapping->private_data
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (4 preceding siblings ...)
  2014-05-02 13:19 ` [PATCH 05/17] virtio_balloon: Store mapping directly in balloon_dev_info Jan Kara
@ 2014-05-02 13:19 ` Jan Kara
  2014-05-02 13:19 ` [PATCH 07/17] fs: Generic infrastructure for optional inode fields Jan Kara
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:19 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

There's no need to store inode mapping in inode->i_mapping->private_data
since we can easily get to it via
inode->i_sb->s_bdev->bd_inode->i_mapping and it's not really a
performance critical thing.

Also remove address_space->private_data because noone uses it anymore.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/buffer.c        | 23 +++++++++++++----------
 fs/inode.c         |  1 -
 fs/nilfs2/page.c   |  1 -
 include/linux/fs.h |  1 -
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 27265a8b43c1..d7a88a0ab0d4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -583,6 +583,11 @@ void emergency_thaw_all(void)
 	}
 }
 
+static inline struct address_space *inode_buffer_mapping(struct inode *inode)
+{
+	return inode->i_sb->s_bdev->bd_inode->i_mapping;
+}
+
 /**
  * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
  * @mapping: the mapping which wants those buffers written
@@ -596,11 +601,12 @@ void emergency_thaw_all(void)
  */
 int sync_mapping_buffers(struct address_space *mapping)
 {
-	struct address_space *buffer_mapping = mapping->private_data;
+	struct address_space *buffer_mapping;
 
-	if (buffer_mapping == NULL || list_empty(&mapping->private_list))
+	if (list_empty(&mapping->private_list))
 		return 0;
 
+	buffer_mapping = inode_buffer_mapping(mapping->host);
 	return fsync_buffers_list(&buffer_mapping->private_lock,
 					&mapping->private_list);
 }
@@ -626,14 +632,9 @@ void write_boundary_block(struct block_device *bdev,
 void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 {
 	struct address_space *mapping = inode->i_mapping;
-	struct address_space *buffer_mapping = bh->b_page->mapping;
+	struct address_space *buffer_mapping = inode_buffer_mapping(inode);
 
 	mark_buffer_dirty(bh);
-	if (!mapping->private_data) {
-		mapping->private_data = buffer_mapping;
-	} else {
-		BUG_ON(mapping->private_data != buffer_mapping);
-	}
 	if (!bh->b_assoc_map) {
 		spin_lock(&buffer_mapping->private_lock);
 		list_move_tail(&bh->b_assoc_buffers,
@@ -831,8 +832,9 @@ void invalidate_inode_buffers(struct inode *inode)
 	if (inode_has_buffers(inode)) {
 		struct address_space *mapping = &inode->i_data;
 		struct list_head *list = &mapping->private_list;
-		struct address_space *buffer_mapping = mapping->private_data;
+		struct address_space *buffer_mapping;
 
+		buffer_mapping = inode_buffer_mapping(inode);
 		spin_lock(&buffer_mapping->private_lock);
 		while (!list_empty(list))
 			__remove_assoc_queue(BH_ENTRY(list->next));
@@ -854,8 +856,9 @@ int remove_inode_buffers(struct inode *inode)
 	if (inode_has_buffers(inode)) {
 		struct address_space *mapping = &inode->i_data;
 		struct list_head *list = &mapping->private_list;
-		struct address_space *buffer_mapping = mapping->private_data;
+		struct address_space *buffer_mapping;
 
+		buffer_mapping = inode_buffer_mapping(inode);
 		spin_lock(&buffer_mapping->private_lock);
 		while (!list_empty(list)) {
 			struct buffer_head *bh = BH_ENTRY(list->next);
diff --git a/fs/inode.c b/fs/inode.c
index 1b888eb524ca..1f22605768cf 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -166,7 +166,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	mapping->host = inode;
 	mapping->flags = 0;
 	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
-	mapping->private_data = NULL;
 	mapping->backing_dev_info = &default_backing_dev_info;
 	mapping->writeback_index = 0;
 
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index da276640f776..a9c69c624bd5 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -467,7 +467,6 @@ void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
 	mapping->host = inode;
 	mapping->flags = 0;
 	mapping_set_gfp_mask(mapping, GFP_NOFS);
-	mapping->private_data = NULL;
 	mapping->backing_dev_info = bdi;
 	mapping->a_ops = &empty_aops;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b76720852f5f..2fd48bbf756b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -425,7 +425,6 @@ struct address_space {
 	struct backing_dev_info *backing_dev_info; /* device readahead, etc */
 	spinlock_t		private_lock;	/* for use by the address_space */
 	struct list_head	private_list;	/* ditto */
-	void			*private_data;	/* ditto */
 } __attribute__((aligned(sizeof(long))));
 	/*
 	 * On most architectures that alignment is already the case; but
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 07/17] fs: Generic infrastructure for optional inode fields
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (5 preceding siblings ...)
  2014-05-02 13:19 ` [PATCH 06/17] fs: Don't store backing dev mapping in inode->i_mapping->private_data Jan Kara
@ 2014-05-02 13:19 ` Jan Kara
  2014-05-02 14:44   ` Christoph Hellwig
  2014-05-02 13:19 ` [PATCH 08/17] fs: Convert i_data.private_list to use optional field infrastructure Jan Kara
                   ` (9 subsequent siblings)
  16 siblings, 1 reply; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:19 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

There are parts of struct inode which are used only by a few filesystems
(e.g. i_dquot pointers, i_mapping->private_list, ...). Thus all the
other filesystems are just wasting memory with these fields. On the
other hand it isn't simple to just move these fields to filesystem
specific part of inode because there is generic code which needs to peek
into the fields and it is cumbersome to provide helpers into which fs
has to stuff the field it is storing elsewhere.

We create a simple infrastructure which allows for optional inode fields
stored in the fs-specific part of the inode. Accessing these fields has
a slightly worse performance as we have to lookup their offset in the
offset table stored in the superblock but in most cases this is
acceptable.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fs.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2fd48bbf756b..cc811b2f1a39 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -612,6 +612,11 @@ struct inode {
 	void			*i_private; /* fs or device private pointer */
 };
 
+/* Optional inode fields (stored in filesystems inode if the fs needs them) */
+enum {
+	IF_FIELD_NR	/* Number of optional inode fields */
+};
+
 static inline int inode_unhashed(struct inode *inode)
 {
 	return hlist_unhashed(&inode->i_hash);
@@ -1279,6 +1284,11 @@ struct super_block {
 	void 			*s_fs_info;	/* Filesystem private info */
 	unsigned int		s_max_links;
 	fmode_t			s_mode;
+	/*
+	 * We could have here just a pointer to the offsets array but this
+	 * way we save one dereference when looking up field offsets
+	 */
+	int			s_inode_fields[IF_FIELD_NR];
 
 	/* Granularity of c/m/atime in ns.
 	   Cannot be worse than a second */
@@ -1328,6 +1338,20 @@ struct super_block {
 	struct rcu_head		rcu;
 };
 
+static inline void *inode_get_field(struct inode *inode, int field)
+{
+	int offset = inode->i_sb->s_inode_fields[field];
+
+	if (!offset)	/* Field not present? */
+		return NULL;
+	return ((char *)inode) + offset;
+}
+
+static inline void sb_init_inode_fields(struct super_block *sb, int *fields)
+{
+	memcpy(sb->s_inode_fields, fields, sizeof(int) * IF_FIELD_NR);
+}
+
 extern struct timespec current_fs_time(struct super_block *sb);
 
 /*
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 08/17] fs: Convert i_data.private_list to use optional field infrastructure
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (6 preceding siblings ...)
  2014-05-02 13:19 ` [PATCH 07/17] fs: Generic infrastructure for optional inode fields Jan Kara
@ 2014-05-02 13:19 ` Jan Kara
  2014-05-02 13:20 ` [PATCH 09/17] ext2: Use own optional list for metadata bhs Jan Kara
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:19 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Convert use of i_data.private_list for metadata buffer heads to use
generic optional inode field infrastructure. So far we just set it up so
that it uses i_data.private_list. In later patches we switch filesystems
one by one to use their own fields.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/buffer.c                 | 98 +++++++++++++++++++--------------------------
 fs/super.c                  |  2 +
 include/linux/buffer_head.h | 12 +++++-
 include/linux/fs.h          |  2 +
 4 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index d7a88a0ab0d4..9441889d8383 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -43,7 +43,7 @@
 #include <linux/bit_spinlock.h>
 #include <trace/events/block.h>
 
-static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
+static int fsync_buffers_list(spinlock_t *lock, struct inode_meta_bhs *mbh);
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
@@ -460,44 +460,27 @@ EXPORT_SYMBOL(mark_buffer_async_write);
  *
  * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
  * inode_has_buffers() and invalidate_inode_buffers() are provided for the
- * management of a list of dependent buffers at ->i_mapping->private_list.
- *
- * Locking is a little subtle: try_to_free_buffers() will remove buffers
- * from their controlling inode's queue when they are being freed.  But
- * try_to_free_buffers() will be operating against the *blockdev* mapping
- * at the time, not against the S_ISREG file which depends on those buffers.
- * So the locking for private_list is via the private_lock in the address_space
- * which backs the buffers.  Which is different from the address_space 
- * against which the buffers are listed.  So for a particular address_space,
- * mapping->private_lock does *not* protect mapping->private_list!  In fact,
- * mapping->private_list will always be protected by the backing blockdev's
- * ->private_lock.
- *
- * Which introduces a requirement: all buffers on an address_space's
- * ->private_list must be from the same address_space: the blockdev's.
- *
- * address_spaces which do not place buffers at ->private_list via these
- * utility functions are free to use private_lock and private_list for
- * whatever they want.  The only requirement is that list_empty(private_list)
- * be true at clear_inode() time.
- *
- * FIXME: clear_inode should not call invalidate_inode_buffers().  The
- * filesystems should do that.  invalidate_inode_buffers() should just go
- * BUG_ON(!list_empty).
+ * management of a list of dependent buffers anchored at inode_meta_bhs
+ * structure inode filesystem's inode.
+ *
+ * Locking is a little subtle: try_to_free_buffers() will remove buffers from
+ * their controlling inode's queue when they are being freed.  But
+ * try_to_free_buffers() will be operating against the *blockdev* mapping at
+ * the time, not against the S_ISREG file which depends on those buffers.  So
+ * the locking for inode_meta_bhs list is via the private_lock in the
+ * address_space which backs the buffers.  Which is different from the
+ * address_space against which the buffers are listed.  So for a particular
+ * address_space, mapping->private_lock does *not* protect inode's
+ * inode_meta_bhs list!  In fact, inode_meta_bhs list will always be protected
+ * by the backing blockdev's ->private_lock.
+ *
+ * Which introduces a requirement: all buffers on an inode's inode_meta_bhs
+ * list must be from the same address_space: the blockdev's.
  *
  * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
  * take an address_space, not an inode.  And it should be called
  * mark_buffer_dirty_fsync() to clearly define why those buffers are being
  * queued up.
- *
- * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
- * list if it is already on a list.  Because if the buffer is on a list,
- * it *must* already be on the right one.  If not, the filesystem is being
- * silly.  This will save a ton of locking.  But first we have to ensure
- * that buffers are taken *off* the old inode's list when they are freed
- * (presumably in truncate).  That requires careful auditing of all
- * filesystems (do it inside bforget()).  It could also be done by bringing
- * b_inode back.
  */
 
 /*
@@ -514,7 +497,9 @@ static void __remove_assoc_queue(struct buffer_head *bh)
 
 int inode_has_buffers(struct inode *inode)
 {
-	return !list_empty(&inode->i_data.private_list);
+	struct inode_meta_bhs *mbh = inode_get_field(inode, IF_META_BHS);
+
+	return mbh && !list_empty(&mbh->list);
 }
 
 /*
@@ -592,8 +577,8 @@ static inline struct address_space *inode_buffer_mapping(struct inode *inode)
  * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
  * @mapping: the mapping which wants those buffers written
  *
- * Starts I/O against the buffers at mapping->private_list, and waits upon
- * that I/O.
+ * Starts I/O against the buffers at mapping->host's inode_meta_bhs list, and
+ * waits upon that I/O.
  *
  * Basically, this is a convenience function for fsync().
  * @mapping is a file or directory which needs those buffers to be written for
@@ -601,14 +586,15 @@ static inline struct address_space *inode_buffer_mapping(struct inode *inode)
  */
 int sync_mapping_buffers(struct address_space *mapping)
 {
+	struct inode *inode = mapping->host;
+	struct inode_meta_bhs *mbh = inode_get_field(inode, IF_META_BHS);
 	struct address_space *buffer_mapping;
 
-	if (list_empty(&mapping->private_list))
+	if (!inode_has_buffers(inode))
 		return 0;
 
-	buffer_mapping = inode_buffer_mapping(mapping->host);
-	return fsync_buffers_list(&buffer_mapping->private_lock,
-					&mapping->private_list);
+	buffer_mapping = inode_buffer_mapping(inode);
+	return fsync_buffers_list(&buffer_mapping->private_lock, mbh);
 }
 EXPORT_SYMBOL(sync_mapping_buffers);
 
@@ -633,12 +619,12 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct address_space *buffer_mapping = inode_buffer_mapping(inode);
+	struct inode_meta_bhs *mbh = inode_get_field(inode, IF_META_BHS);
 
 	mark_buffer_dirty(bh);
 	if (!bh->b_assoc_map) {
 		spin_lock(&buffer_mapping->private_lock);
-		list_move_tail(&bh->b_assoc_buffers,
-				&mapping->private_list);
+		list_move_tail(&bh->b_assoc_buffers, &mbh->list);
 		bh->b_assoc_map = mapping;
 		spin_unlock(&buffer_mapping->private_lock);
 	}
@@ -739,7 +725,7 @@ EXPORT_SYMBOL(__set_page_dirty_buffers);
  * the osync code to catch these locked, dirty buffers without requeuing
  * any newly dirty buffers for write.
  */
-static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
+static int fsync_buffers_list(spinlock_t *lock, struct inode_meta_bhs *mbh)
 {
 	struct buffer_head *bh;
 	struct list_head tmp;
@@ -751,8 +737,8 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 	blk_start_plug(&plug);
 
 	spin_lock(lock);
-	while (!list_empty(list)) {
-		bh = BH_ENTRY(list->next);
+	while (!list_empty(&mbh->list)) {
+		bh = BH_ENTRY(mbh->list.next);
 		mapping = bh->b_assoc_map;
 		__remove_assoc_queue(bh);
 		/* Avoid race with mark_buffer_dirty_inode() which does
@@ -799,7 +785,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 		smp_mb();
 		if (buffer_dirty(bh)) {
 			list_add(&bh->b_assoc_buffers,
-				 &mapping->private_list);
+				 &mbh->list);
 			bh->b_assoc_map = mapping;
 		}
 		spin_unlock(lock);
@@ -811,7 +797,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 	}
 	
 	spin_unlock(lock);
-	err2 = osync_buffers_list(lock, list);
+	err2 = osync_buffers_list(lock, &mbh->list);
 	if (err)
 		return err;
 	else
@@ -830,14 +816,14 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 void invalidate_inode_buffers(struct inode *inode)
 {
 	if (inode_has_buffers(inode)) {
-		struct address_space *mapping = &inode->i_data;
-		struct list_head *list = &mapping->private_list;
+		struct inode_meta_bhs *mbh;
 		struct address_space *buffer_mapping;
 
+		mbh = inode_get_field(inode, IF_META_BHS);
 		buffer_mapping = inode_buffer_mapping(inode);
 		spin_lock(&buffer_mapping->private_lock);
-		while (!list_empty(list))
-			__remove_assoc_queue(BH_ENTRY(list->next));
+		while (!list_empty(&mbh->list))
+			__remove_assoc_queue(BH_ENTRY(mbh->list.next));
 		spin_unlock(&buffer_mapping->private_lock);
 	}
 }
@@ -854,14 +840,14 @@ int remove_inode_buffers(struct inode *inode)
 	int ret = 1;
 
 	if (inode_has_buffers(inode)) {
-		struct address_space *mapping = &inode->i_data;
-		struct list_head *list = &mapping->private_list;
+		struct inode_meta_bhs *mbh;
 		struct address_space *buffer_mapping;
 
+		mbh = inode_get_field(inode, IF_META_BHS);
 		buffer_mapping = inode_buffer_mapping(inode);
 		spin_lock(&buffer_mapping->private_lock);
-		while (!list_empty(list)) {
-			struct buffer_head *bh = BH_ENTRY(list->next);
+		while (!list_empty(&mbh->list)) {
+			struct buffer_head *bh = BH_ENTRY(mbh->list.next);
 			if (buffer_dirty(bh)) {
 				ret = 0;
 				break;
diff --git a/fs/super.c b/fs/super.c
index 80d5cf2ca765..4192e3356e37 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -219,6 +219,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	s->s_op = &default_op;
 	s->s_time_gran = 1000000000;
 	s->cleancache_poolid = -1;
+	s->s_inode_fields[IF_META_BHS] = offsetof(struct inode,
+						  i_data.private_list);
 
 	s->s_shrink.seeks = DEFAULT_SEEKS;
 	s->s_shrink.scan_objects = super_cache_scan;
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index d77797a52b7b..060acb847240 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -131,6 +131,16 @@ BUFFER_FNS(Meta, meta)
 BUFFER_FNS(Prio, prio)
 BUFFER_FNS(Defer_Completion, defer_completion)
 
+/* List of metadata buffers associated with the inode */
+struct inode_meta_bhs {
+	struct list_head list;
+};
+
+static inline void inode_mbhs_init_once(struct inode_meta_bhs *mbh)
+{
+	INIT_LIST_HEAD(&mbh->list);
+}
+
 #define bh_offset(bh)		((unsigned long)(bh)->b_data & ~PAGE_MASK)
 
 /* If we *know* page->private refers to buffer_heads */
@@ -162,7 +172,7 @@ void end_buffer_read_sync(struct buffer_head *bh, int uptodate);
 void end_buffer_write_sync(struct buffer_head *bh, int uptodate);
 void end_buffer_async_write(struct buffer_head *bh, int uptodate);
 
-/* Things to do with buffers at mapping->private_list */
+/* Things to do with inode's associated metadata buffers */
 void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode);
 int inode_has_buffers(struct inode *);
 void invalidate_inode_buffers(struct inode *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cc811b2f1a39..37bc82f7a02d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -614,6 +614,8 @@ struct inode {
 
 /* Optional inode fields (stored in filesystems inode if the fs needs them) */
 enum {
+	IF_META_BHS,	/* List of metadata buffer heads for inode
+			   (struct inode_meta_bhs) */
 	IF_FIELD_NR	/* Number of optional inode fields */
 };
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 09/17] ext2: Use own optional list for metadata bhs
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (7 preceding siblings ...)
  2014-05-02 13:19 ` [PATCH 08/17] fs: Convert i_data.private_list to use optional field infrastructure Jan Kara
@ 2014-05-02 13:20 ` Jan Kara
  2014-05-02 13:20 ` [PATCH 10/17] udf: " Jan Kara
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:20 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/ext2.h  | 2 ++
 fs/ext2/super.c | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index d9a17d0b124d..aa446bb4e5df 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -15,6 +15,7 @@
 #include <linux/blockgroup_lock.h>
 #include <linux/percpu_counter.h>
 #include <linux/rbtree.h>
+#include <linux/buffer_head.h>
 
 /* XXX Here for now... not interested in restructing headers JUST now */
 
@@ -689,6 +690,7 @@ struct ext2_inode_info {
 	struct mutex truncate_mutex;
 	struct inode	vfs_inode;
 	struct list_head i_orphan;	/* unlinked but open inodes */
+	struct inode_meta_bhs i_mbh;	/* indirect block bhs for inode */
 };
 
 /*
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 20d6697bd638..4bfcc1faf431 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -45,6 +45,11 @@ static int ext2_sync_fs(struct super_block *sb, int wait);
 static int ext2_freeze(struct super_block *sb);
 static int ext2_unfreeze(struct super_block *sb);
 
+static int ext2_inode_fields[IF_FIELD_NR] = {
+	[IF_META_BHS] = offsetof(struct ext2_inode_info, i_mbh) -
+			offsetof(struct ext2_inode_info, vfs_inode),
+};
+
 void ext2_error(struct super_block *sb, const char *function,
 		const char *fmt, ...)
 {
@@ -189,6 +194,7 @@ static void init_once(void *foo)
 	init_rwsem(&ei->xattr_sem);
 #endif
 	mutex_init(&ei->truncate_mutex);
+	inode_mbhs_init_once(&ei->i_mbh);
 	inode_init_once(&ei->vfs_inode);
 }
 
@@ -790,6 +796,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	sb->s_fs_info = sbi;
 	sbi->s_sb_block = sb_block;
+	sb_init_inode_fields(sb, ext2_inode_fields);
 
 	spin_lock_init(&sbi->s_lock);
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 10/17] udf: Use own optional list for metadata bhs
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (8 preceding siblings ...)
  2014-05-02 13:20 ` [PATCH 09/17] ext2: Use own optional list for metadata bhs Jan Kara
@ 2014-05-02 13:20 ` Jan Kara
  2014-05-02 13:20 ` [PATCH 11/17] affs: " Jan Kara
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:20 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/super.c | 7 +++++++
 fs/udf/udf_i.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/fs/udf/super.c b/fs/udf/super.c
index 3306b9f69bed..5db6af43a45b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -81,6 +81,11 @@
 
 enum { UDF_MAX_LINKS = 0xffff };
 
+static int udf_inode_fields[IF_FIELD_NR] = {
+	[IF_META_BHS] = (int)offsetof(struct udf_inode_info, i_mbh) -
+			(int)offsetof(struct udf_inode_info, vfs_inode),
+};
+
 /* These are the "meat" - everything else is stuffing */
 static int udf_fill_super(struct super_block *, void *, int);
 static void udf_put_super(struct super_block *);
@@ -172,6 +177,7 @@ static void init_once(void *foo)
 	struct udf_inode_info *ei = (struct udf_inode_info *)foo;
 
 	ei->i_ext.i_data = NULL;
+	inode_mbhs_init_once(&ei->i_mbh);
 	inode_init_once(&ei->vfs_inode);
 }
 
@@ -2062,6 +2068,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
 	sb->s_fs_info = sbi;
 
 	mutex_init(&sbi->s_alloc_mutex);
+	sb_init_inode_fields(sb, udf_inode_fields);
 
 	if (!udf_parse_options((char *)options, &uopt, false))
 		goto error_out;
diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h
index b5cd8ed2aa12..e75cf7d42765 100644
--- a/fs/udf/udf_i.h
+++ b/fs/udf/udf_i.h
@@ -51,6 +51,7 @@ struct udf_inode_info {
 	struct udf_ext_cache cached_extent;
 	/* Spinlock for protecting extent cache */
 	spinlock_t i_extent_cache_lock;
+	struct inode_meta_bhs i_mbh;
 	struct inode vfs_inode;
 };
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 11/17] affs: Use own optional list for metadata bhs
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (9 preceding siblings ...)
  2014-05-02 13:20 ` [PATCH 10/17] udf: " Jan Kara
@ 2014-05-02 13:20 ` Jan Kara
  2014-05-02 13:20 ` [PATCH 12/17] bfs: " Jan Kara
                   ` (5 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:20 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/affs/affs.h  | 2 ++
 fs/affs/super.c | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 3952121f2f28..776c80259def 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -59,6 +59,8 @@ struct affs_inode_info {
 	u32	 i_protect;			/* unused attribute bits */
 	u32	 i_lastalloc;			/* last allocated block */
 	int	 i_pa_cnt;			/* number of preallocated blocks */
+	struct inode_meta_bhs i_mbh;		/* list of metadata buffers
+						   associated with the inode */
 	struct inode vfs_inode;
 };
 
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d098731b82ff..425c5a33f63f 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -25,6 +25,11 @@ extern struct timezone sys_tz;
 static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int affs_remount (struct super_block *sb, int *flags, char *data);
 
+static int affs_inode_fields[IF_FIELD_NR] = {
+	[IF_META_BHS] = (int)offsetof(struct affs_inode_info, i_mbh) -
+			(int)offsetof(struct affs_inode_info, vfs_inode),
+};
+
 static void
 affs_commit_super(struct super_block *sb, int wait)
 {
@@ -125,6 +130,7 @@ static void init_once(void *foo)
 
 	sema_init(&ei->i_link_lock, 1);
 	sema_init(&ei->i_ext_lock, 1);
+	inode_mbhs_init_once(&ei->i_mbh);
 	inode_init_once(&ei->vfs_inode);
 }
 
@@ -347,6 +353,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_uid     = uid;
 	sbi->s_gid     = gid;
 	sbi->s_reserved= reserved;
+	sb_init_inode_fields(sb, affs_inode_fields);
 
 	/* Get the size of the device in 512-byte blocks.
 	 * If we later see that the partition uses bigger
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 12/17] bfs: Use own optional list for metadata bhs
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (10 preceding siblings ...)
  2014-05-02 13:20 ` [PATCH 11/17] affs: " Jan Kara
@ 2014-05-02 13:20 ` Jan Kara
  2014-05-02 13:20 ` [PATCH 13/17] fat: " Jan Kara
                   ` (4 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:20 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/bfs/bfs.h   | 2 ++
 fs/bfs/inode.c | 8 ++++++++
 2 files changed, 10 insertions(+)

diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index f7f87e233dd9..f86ee8e43705 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -27,6 +27,8 @@ struct bfs_inode_info {
 	unsigned long i_dsk_ino; /* inode number from the disk, can be 0 */
 	unsigned long i_sblock;
 	unsigned long i_eblock;
+	struct inode_meta_bhs i_mbh;	/* list of metadata buffers associated
+					   with the inode */
 	struct inode vfs_inode;
 };
 
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 8defc6b3f9a2..2b3283668bd7 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -30,6 +30,12 @@ MODULE_LICENSE("GPL");
 #define dprintf(x...)
 #endif
 
+static int bfs_inode_fields[IF_FIELDS_NR] = {
+	[IF_META_BHS] = (int)offsetof(struct bfs_inode_info, i_mbh) -
+			(int)offsetof(struct bfs_inode_info, vfs_inode),
+};
+
+
 void dump_imap(const char *prefix, struct super_block *s);
 
 struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
@@ -263,6 +269,7 @@ static void init_once(void *foo)
 {
 	struct bfs_inode_info *bi = foo;
 
+	inode_mbhs_init_once(&bi->i_mbh);
 	inode_init_once(&bi->vfs_inode);
 }
 
@@ -333,6 +340,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 		return -ENOMEM;
 	mutex_init(&info->bfs_lock);
 	s->s_fs_info = info;
+	sb_init_inode_fields(sb, bfs_inode_fields);
 
 	sb_set_blocksize(s, BFS_BSIZE);
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 13/17] fat: Use own optional list for metadata bhs
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (11 preceding siblings ...)
  2014-05-02 13:20 ` [PATCH 12/17] bfs: " Jan Kara
@ 2014-05-02 13:20 ` Jan Kara
  2014-05-02 13:20 ` [PATCH 14/17] minix: " Jan Kara
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:20 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/fat/fat.h   | 2 ++
 fs/fat/inode.c | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 7c31f4bc74a9..eef6998f8bf7 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -127,6 +127,8 @@ struct msdos_inode_info {
 	struct hlist_node i_fat_hash;	/* hash by i_location */
 	struct hlist_node i_dir_hash;	/* hash by i_logstart */
 	struct rw_semaphore truncate_lock; /* protect bmap against truncate */
+	struct inode_meta_bhs i_mbh;	/* list of metadata buffers associated
+					   with this inode */
 	struct inode vfs_inode;
 };
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 854b578f6695..ba9e662561fc 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -38,6 +38,10 @@
 static int fat_default_codepage = CONFIG_FAT_DEFAULT_CODEPAGE;
 static char fat_default_iocharset[] = CONFIG_FAT_DEFAULT_IOCHARSET;
 
+static int fat_inode_fields[IF_FIELD_NR] = {
+	[IF_META_BHS] = (int)offsetof(struct msdos_inode_info, i_mbh) -
+			(int)offsetof(struct msdos_inode_info, vfs_inode),
+};
 
 static int fat_add_cluster(struct inode *inode)
 {
@@ -604,6 +608,7 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&ei->cache_lru);
 	INIT_HLIST_NODE(&ei->i_fat_hash);
 	INIT_HLIST_NODE(&ei->i_dir_hash);
+	inode_mbhs_init_once(&ei->i_mbh);
 	inode_init_once(&ei->vfs_inode);
 }
 
@@ -1281,6 +1286,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 	mutex_init(&sbi->nfs_build_inode_lock);
 	ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
 			     DEFAULT_RATELIMIT_BURST);
+	sb_init_inode_fields(sb, fat_inode_fields);
 
 	error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options);
 	if (error)
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 14/17] minix: Use own optional list for metadata bhs
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (12 preceding siblings ...)
  2014-05-02 13:20 ` [PATCH 13/17] fat: " Jan Kara
@ 2014-05-02 13:20 ` Jan Kara
  2014-05-02 13:20 ` [PATCH 15/17] sysv: " Jan Kara
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:20 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/minix/inode.c | 7 +++++++
 fs/minix/minix.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 0332109162a5..49c65ae282c8 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -19,6 +19,11 @@
 #include <linux/vfs.h>
 #include <linux/writeback.h>
 
+static int minix_inode_fields[IF_FIELD_NR] = {
+	[IF_META_BHS] = (int)offsetof(struct minix_inode_info, i_mbh) -
+			(int)offsetof(struct minix_inode_info, vfs_inode),
+};
+
 static int minix_write_inode(struct inode *inode,
 		struct writeback_control *wbc);
 static int minix_statfs(struct dentry *dentry, struct kstatfs *buf);
@@ -83,6 +88,7 @@ static void init_once(void *foo)
 {
 	struct minix_inode_info *ei = (struct minix_inode_info *) foo;
 
+	inode_mbhs_init_once(&ei->i_mbh);
 	inode_init_once(&ei->vfs_inode);
 }
 
@@ -169,6 +175,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
 	if (!sbi)
 		return -ENOMEM;
 	s->s_fs_info = sbi;
+	sb_init_inode_fields(s, minix_inode_fields);
 
 	BUILD_BUG_ON(32 != sizeof (struct minix_inode));
 	BUILD_BUG_ON(64 != sizeof(struct minix2_inode));
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 1ebd11854622..542d8b58ecd5 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -18,6 +18,7 @@ struct minix_inode_info {
 		__u16 i1_data[16];
 		__u32 i2_data[16];
 	} u;
+	struct inode_meta_bhs i_mbh;
 	struct inode vfs_inode;
 };
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 15/17] sysv: Use own optional list for metadata bhs
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (13 preceding siblings ...)
  2014-05-02 13:20 ` [PATCH 14/17] minix: " Jan Kara
@ 2014-05-02 13:20 ` Jan Kara
  2014-05-02 13:20 ` [PATCH 16/17] ext4: " Jan Kara
  2014-05-02 13:20 ` [PATCH 17/17] fs: Remove mapping->private_list Jan Kara
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:20 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/sysv/inode.c | 1 +
 fs/sysv/super.c | 6 ++++++
 fs/sysv/sysv.h  | 1 +
 3 files changed, 8 insertions(+)

diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index c327d4ee1235..d091e37a1d6f 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -333,6 +333,7 @@ static void init_once(void *p)
 {
 	struct sysv_inode_info *si = (struct sysv_inode_info *)p;
 
+	inode_mbhs_init_once(&si->i_mbh);
 	inode_init_once(&si->vfs_inode);
 }
 
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index eda10959714f..2b8618282a97 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -44,6 +44,11 @@ enum {
 	JAN_1_1980 = (10*365 + 2) * 24 * 60 * 60
 };
 
+static int sysv_inode_fields[IF_FIELD_NR] = {
+	[IF_META_BHS] = (int)offsetof(struct fat_inode_info, i_mbh) -
+			(int)offsetof(struct fat_inode_info, vfs_inode),
+};
+
 static void detected_xenix(struct sysv_sb_info *sbi, unsigned *max_links)
 {
 	struct buffer_head *bh1 = sbi->s_bh1;
@@ -370,6 +375,7 @@ static int sysv_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_block_base = 0;
 	mutex_init(&sbi->s_lock);
 	sb->s_fs_info = sbi;
+	sb_init_inode_fields(sb, sysv_inode_fields);
 
 	sb_set_blocksize(sb, BLOCK_SIZE);
 
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 69d488986cce..93b7fd8e584a 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -67,6 +67,7 @@ struct sysv_sb_info {
 struct sysv_inode_info {
 	__fs32		i_data[13];
 	u32		i_dir_start_lookup;
+	struct inode_meta_bhs i_mbh;
 	struct inode	vfs_inode;
 };
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 16/17] ext4: Use own optional list for metadata bhs
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (14 preceding siblings ...)
  2014-05-02 13:20 ` [PATCH 15/17] sysv: " Jan Kara
@ 2014-05-02 13:20 ` Jan Kara
  2014-05-02 13:20 ` [PATCH 17/17] fs: Remove mapping->private_list Jan Kara
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:20 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/ext4.h  | 2 ++
 fs/ext4/inode.c | 2 +-
 fs/ext4/super.c | 7 +++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d3a534fdc5ff..9382bf0d5345 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -872,6 +872,8 @@ struct ext4_inode_info {
 	struct rw_semaphore i_data_sem;
 	struct inode vfs_inode;
 	struct jbd2_inode *jinode;
+	struct inode_meta_bhs i_mbh;	/* list of metadata buffers associated
+					   with the inode */
 
 	/*
 	 * File creation time. Its function is same as that of
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6e39895a91b8..78785617736c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1040,7 +1040,7 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
  *
- * ext4 never places buffers on inode->i_mapping->private_list.  metadata
+ * ext4 never places buffers on inode->i_mbh.list when journalling.  Metadata
  * buffers are managed internally.
  */
 static int ext4_write_end(struct file *file,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 710fed2377d4..d50348a8280d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -115,6 +115,11 @@ MODULE_ALIAS("ext3");
 #define IS_EXT3_SB(sb) (0)
 #endif
 
+static int ext4_inode_fields[IF_FIELD_NR] = {
+	[IF_META_BHS] = (int)offsetof(struct ext4_inode_info, i_mbh) -
+			(int)offsetof(struct ext4_inode_info, vfs_inode),
+};
+
 static int ext4_verify_csum_type(struct super_block *sb,
 				 struct ext4_super_block *es)
 {
@@ -937,6 +942,7 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&ei->i_orphan);
 	init_rwsem(&ei->xattr_sem);
 	init_rwsem(&ei->i_data_sem);
+	inode_mbhs_init_once(&ei->i_mbh);
 	inode_init_once(&ei->vfs_inode);
 }
 
@@ -3394,6 +3400,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		goto out_free_orig;
 	}
 	sb->s_fs_info = sbi;
+	sb_init_inode_fields(sb, ext4_inode_fields);
 	sbi->s_sb = sb;
 	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
 	sbi->s_sb_block = sb_block;
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH 17/17] fs: Remove mapping->private_list
  2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
                   ` (15 preceding siblings ...)
  2014-05-02 13:20 ` [PATCH 16/17] ext4: " Jan Kara
@ 2014-05-02 13:20 ` Jan Kara
  16 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 13:20 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: Al Viro, Jan Kara

Noone uses this list anymore. Remove it.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 Documentation/filesystems/porting | 4 ++++
 fs/ext3/inode.c                   | 3 ---
 fs/inode.c                        | 6 ++----
 fs/ntfs/file.c                    | 6 +++---
 fs/super.c                        | 2 --
 include/linux/fs.h                | 1 -
 6 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 26ce6cd742c6..756fb0eebe60 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -458,3 +458,7 @@ in your dentry operations instead.
 	of the in-tree instances did).  inode_hash_lock is still held,
 	of course, so they are still serialized wrt removal from inode hash,
 	as well as wrt set() callback of iget5_locked().
+--
+[mandatory]
+	inode->i_data.private_list and inode->i_data.private_data are gone.
+	Use fields in filesystem's own inode structure instead.
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 384b6ebb655f..45be4e34ea20 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1365,9 +1365,6 @@ static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied)
 /*
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
- *
- * ext3 never places buffers on inode->i_mapping->private_list.  metadata
- * buffers are managed internally.
  */
 static int ext3_ordered_write_end(struct file *file,
 				struct address_space *mapping,
diff --git a/fs/inode.c b/fs/inode.c
index 1f22605768cf..0ea7964687e2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -349,7 +349,6 @@ void address_space_init_once(struct address_space *mapping)
 	INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
 	spin_lock_init(&mapping->tree_lock);
 	mutex_init(&mapping->i_mmap_mutex);
-	INIT_LIST_HEAD(&mapping->private_list);
 	spin_lock_init(&mapping->private_lock);
 	mapping->i_mmap = RB_ROOT;
 	INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
@@ -503,7 +502,6 @@ void clear_inode(struct inode *inode)
 	spin_lock_irq(&inode->i_data.tree_lock);
 	BUG_ON(inode->i_data.nrpages);
 	spin_unlock_irq(&inode->i_data.tree_lock);
-	BUG_ON(!list_empty(&inode->i_data.private_list));
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(inode->i_state & I_CLEAR);
 	/* don't need i_lock here, no concurrent mods to i_state */
@@ -670,8 +668,8 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
  * Isolate the inode from the LRU in preparation for freeing it.
  *
  * Any inodes which are pinned purely because of attached pagecache have their
- * pagecache removed.  If the inode has metadata buffers attached to
- * mapping->private_list then try to remove them.
+ * pagecache removed.  If the inode has metadata buffers attached to it, then
+ * try to remove them.
  *
  * If the inode has the I_REFERENCED flag set, then it means that it has been
  * used recently - the flag is set in iput_final(). When we encounter such an
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index db9bd8a31725..c210eaf1b0a8 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2183,9 +2183,9 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 		ret = __ntfs_write_inode(vi, 1);
 	write_inode_now(vi, !datasync);
 	/*
-	 * NOTE: If we were to use mapping->private_list (see ext2 and
-	 * fs/buffer.c) for dirty blocks then we could optimize the below to be
-	 * sync_mapping_buffers(vi->i_mapping).
+	 * NOTE: If we were to use list of associated metadata buffers (see
+	 * ext2 and fs/buffer.c) for dirty blocks then we could optimize the
+	 * below to be sync_mapping_buffers(vi->i_mapping).
 	 */
 	err = sync_blockdev(vi->i_sb->s_bdev);
 	if (unlikely(err && !ret))
diff --git a/fs/super.c b/fs/super.c
index 4192e3356e37..80d5cf2ca765 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -219,8 +219,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	s->s_op = &default_op;
 	s->s_time_gran = 1000000000;
 	s->cleancache_poolid = -1;
-	s->s_inode_fields[IF_META_BHS] = offsetof(struct inode,
-						  i_data.private_list);
 
 	s->s_shrink.seeks = DEFAULT_SEEKS;
 	s->s_shrink.scan_objects = super_cache_scan;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 37bc82f7a02d..59eb75503226 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -424,7 +424,6 @@ struct address_space {
 	unsigned long		flags;		/* error bits/gfp mask */
 	struct backing_dev_info *backing_dev_info; /* device readahead, etc */
 	spinlock_t		private_lock;	/* for use by the address_space */
-	struct list_head	private_list;	/* ditto */
 } __attribute__((aligned(sizeof(long))));
 	/*
 	 * On most architectures that alignment is already the case; but
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 01/17] fs: Remove i_devices from inode
  2014-05-02 13:19 ` [PATCH 01/17] fs: Remove i_devices from inode Jan Kara
@ 2014-05-02 14:42   ` Christoph Hellwig
  2014-05-20 15:28     ` Jan Kara
  0 siblings, 1 reply; 22+ messages in thread
From: Christoph Hellwig @ 2014-05-02 14:42 UTC (permalink / raw)
  To: Jan Kara; +Cc: linux-fsdevel, Al Viro

On Fri, May 02, 2014 at 03:19:52PM +0200, Jan Kara wrote:
> Struct inode has an i_devices list head which is used when inode
> represents a block device or a character device for tracking all inodes
> referencing that device. The only use of that tracking is to remove
> references to the device from inodes when the device is released.
> However both block device and character device code take reference to
> the device together with referencing it from an inode (block device
> code takes inode reference, character device code kobj reference) thus
> device cannot be released while there are any inodes referencing it.
> 
> Remove the useless code and i_devices from struct inode.


While I like the change I don't think it's quite correct.  i_cdev starts
out NULL and the inode is not added to the list for character devices.
Then chrdev_open comes in and sets up i_cdev, as well as grabbing one
reference per open, and __fput drops this reference.

So far, so good - but for a char device node that was opened before
but isn't open anymore we have the inode pointing to a struct cdev
which doesn't have a reference.  When we now unregister the char

I think this can be easily solved by changing the cdev_put call in
__fput into something that will clear i_cdev if it dropped the last
references.

In addition to that I would suggest to split this into 3 patches
for bdev, cdev and final removal so that the steps are better
documented.

The block device side has similar problems because we only hold
references on the block device inode, not the device inode.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 07/17] fs: Generic infrastructure for optional inode fields
  2014-05-02 13:19 ` [PATCH 07/17] fs: Generic infrastructure for optional inode fields Jan Kara
@ 2014-05-02 14:44   ` Christoph Hellwig
  2014-05-02 19:44     ` Jan Kara
  0 siblings, 1 reply; 22+ messages in thread
From: Christoph Hellwig @ 2014-05-02 14:44 UTC (permalink / raw)
  To: Jan Kara; +Cc: linux-fsdevel, Al Viro

On Fri, May 02, 2014 at 03:19:58PM +0200, Jan Kara wrote:
> There are parts of struct inode which are used only by a few filesystems
> (e.g. i_dquot pointers, i_mapping->private_list, ...). Thus all the

Why don't you move the private_list into the fs inode like you did
in the previous few patches and remove the references from the generic
code?  This would involve some heavy lifting in buffer.c, but that's
long overdue anyway.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 07/17] fs: Generic infrastructure for optional inode fields
  2014-05-02 14:44   ` Christoph Hellwig
@ 2014-05-02 19:44     ` Jan Kara
  0 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-02 19:44 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jan Kara, linux-fsdevel, Al Viro

On Fri 02-05-14 07:44:09, Christoph Hellwig wrote:
> On Fri, May 02, 2014 at 03:19:58PM +0200, Jan Kara wrote:
> > There are parts of struct inode which are used only by a few filesystems
> > (e.g. i_dquot pointers, i_mapping->private_list, ...). Thus all the
> 
> Why don't you move the private_list into the fs inode like you did
> in the previous few patches and remove the references from the generic
> code?  This would involve some heavy lifting in buffer.c, but that's
> long overdue anyway.
  So I was looking into that option as well and even had some prelimiary
patch. It is certainly doable but it seemed a bit ugly to pass around
another pointer for the list of metadata buffers. It is especially ugly for
stuff like remove_inode_buffers() or invalidate_inode_buffers(). There we'd
have to provide new filesystem callbacks or something like that...

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 01/17] fs: Remove i_devices from inode
  2014-05-02 14:42   ` Christoph Hellwig
@ 2014-05-20 15:28     ` Jan Kara
  0 siblings, 0 replies; 22+ messages in thread
From: Jan Kara @ 2014-05-20 15:28 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jan Kara, linux-fsdevel, Al Viro

On Fri 02-05-14 07:42:14, Christoph Hellwig wrote:
> On Fri, May 02, 2014 at 03:19:52PM +0200, Jan Kara wrote:
> > Struct inode has an i_devices list head which is used when inode
> > represents a block device or a character device for tracking all inodes
> > referencing that device. The only use of that tracking is to remove
> > references to the device from inodes when the device is released.
> > However both block device and character device code take reference to
> > the device together with referencing it from an inode (block device
> > code takes inode reference, character device code kobj reference) thus
> > device cannot be released while there are any inodes referencing it.
> > 
> > Remove the useless code and i_devices from struct inode.
> 
> While I like the change I don't think it's quite correct.  i_cdev starts
> out NULL and the inode is not added to the list for character devices.
> Then chrdev_open comes in and sets up i_cdev, as well as grabbing one
> reference per open, and __fput drops this reference.
> 
> So far, so good - but for a char device node that was opened before
> but isn't open anymore we have the inode pointing to a struct cdev
> which doesn't have a reference.  When we now unregister the char
> 
> I think this can be easily solved by changing the cdev_put call in
> __fput into something that will clear i_cdev if it dropped the last
> references.
  So I finally got to checking this. The trouble I have is that we'd need
to clear i_cdev when we are closing the last open file for the device inode
and we don't track number of openers currently... So it doesn't seem to be
easily possible to get rid of i_devices after all.

								Honza
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2014-05-20 15:28 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-02 13:19 [PATCH 00/17] Shrink inode size Jan Kara
2014-05-02 13:19 ` [PATCH 01/17] fs: Remove i_devices from inode Jan Kara
2014-05-02 14:42   ` Christoph Hellwig
2014-05-20 15:28     ` Jan Kara
2014-05-02 13:19 ` [PATCH 02/17] fs: Save pointer in struct inode by better unioning Jan Kara
2014-05-02 13:19 ` [PATCH 03/17] hugetlbfs: Use own list instead of mapping->private_list Jan Kara
2014-05-02 13:19 ` [PATCH 04/17] aio: Use i_private instead of mapping->private_data Jan Kara
2014-05-02 13:19 ` [PATCH 05/17] virtio_balloon: Store mapping directly in balloon_dev_info Jan Kara
2014-05-02 13:19 ` [PATCH 06/17] fs: Don't store backing dev mapping in inode->i_mapping->private_data Jan Kara
2014-05-02 13:19 ` [PATCH 07/17] fs: Generic infrastructure for optional inode fields Jan Kara
2014-05-02 14:44   ` Christoph Hellwig
2014-05-02 19:44     ` Jan Kara
2014-05-02 13:19 ` [PATCH 08/17] fs: Convert i_data.private_list to use optional field infrastructure Jan Kara
2014-05-02 13:20 ` [PATCH 09/17] ext2: Use own optional list for metadata bhs Jan Kara
2014-05-02 13:20 ` [PATCH 10/17] udf: " Jan Kara
2014-05-02 13:20 ` [PATCH 11/17] affs: " Jan Kara
2014-05-02 13:20 ` [PATCH 12/17] bfs: " Jan Kara
2014-05-02 13:20 ` [PATCH 13/17] fat: " Jan Kara
2014-05-02 13:20 ` [PATCH 14/17] minix: " Jan Kara
2014-05-02 13:20 ` [PATCH 15/17] sysv: " Jan Kara
2014-05-02 13:20 ` [PATCH 16/17] ext4: " Jan Kara
2014-05-02 13:20 ` [PATCH 17/17] fs: Remove mapping->private_list Jan Kara

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).