All of lore.kernel.org
 help / color / mirror / Atom feed
From: Cong Wang <amwang@redhat.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: linux-kernel@vger.kernel.org, akpm@linux-foundation.org,
	Pekka Enberg <penberg@kernel.org>, Christoph Hellwig <hch@lst.de>,
	Hugh Dickins <hughd@google.com>,
	Dave Hansen <dave@linux.vnet.ibm.com>,
	Lennart Poettering <lennart@poettering.net>,
	Kay Sievers <kay.sievers@vrfy.org>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	linux-mm@kvack.org
Subject: Re: [V4 PATCH 1/2] tmpfs: add fallocate support
Date: Tue, 29 Nov 2011 15:23:58 +0800	[thread overview]
Message-ID: <4ED4888E.9040402@redhat.com> (raw)
In-Reply-To: <20111129150210.ad266dd7.kamezawa.hiroyu@jp.fujitsu.com>

[-- Attachment #1: Type: text/plain, Size: 774 bytes --]

ao? 2011a1'11ae??29ae?JPY 14:02, KAMEZAWA Hiroyuki a??e??:
>
> You can't know whether the 'page' is allocated by alloc_page() in fallocate()
> or just found as exiting one.
> Then, yourwill corrupt existing pages in error path.
> Is it allowed ?
>

According to the comment,

/*
  * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
  *
  * If we allocate a new one we do not mark it dirty. That's up to the
  * vm. If we swap it in we mark it dirty since we also free the swap
  * entry since a page cannot live in both the swap and page cache
  */

so we can know if the page is newly allocated by checking page dirty bit.
Or am I missing something?

But whoops, I sent a wrong version of this patch, the below one is
the correct one. Sorry for this.


[-- Attachment #2: 0001-tmpfs-add-fallocate-support.patch --]
[-- Type: text/plain, Size: 4320 bytes --]

Subject: [V4 PATCH 1/2] tmpfs: add fallocate support

Systemd needs tmpfs to support fallocate [1], to be able
to safely use mmap(), regarding SIGBUS, on files on the
/dev/shm filesystem. The glibc fallback loop for -ENOSYS
on fallocate is just ugly.

This patch adds fallocate support to tmpfs, and as we
already have shmem_truncate_range(), it is also easy to
add FALLOC_FL_PUNCH_HOLE support too.

1. http://lkml.org/lkml/2011/10/20/275

V3->V4:
Handle 'undo' ENOSPC more correctly.

V2->V3:
a) Read i_size directly after holding i_mutex;
b) Call page_cache_release() too after shmem_getpage();
c) Undo previous changes when -ENOSPC.

Cc: Pekka Enberg <penberg@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: WANG Cong <amwang@redhat.com>

---
commit ca055ad343a0d629f8f1fad1df30796d2292f6a2
Author: Cong Wang <amwang@redhat.com>
Date:   Wed Nov 23 13:16:26 2011 +0800

    tmpfs: add fallocate support
---
 mm/shmem.c |   90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 90 insertions(+), 0 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index d672250..6a6fc66 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -30,6 +30,7 @@
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/swap.h>
+#include <linux/falloc.h>
 
 static struct vfsmount *shm_mnt;
 
@@ -1016,6 +1017,35 @@ failed:
 	return error;
 }
 
+static void shmem_putpage_noswap(struct inode *inode, pgoff_t index, bool new)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct shmem_inode_info *info;
+	struct shmem_sb_info *sbinfo;
+	struct page *page;
+
+	page = find_lock_page(mapping, index);
+
+	if (page) {
+		info = SHMEM_I(inode);
+		sbinfo = SHMEM_SB(inode->i_sb);
+		shmem_acct_block(info->flags);
+		if (!new && PageDirty(page)) {
+			ClearPageDirty(page);
+			delete_from_page_cache(page);
+			spin_lock(&info->lock);
+			info->alloced--;
+			inode->i_blocks -= BLOCKS_PER_PAGE;
+			spin_unlock(&info->lock);
+		}
+		if (sbinfo->max_blocks)
+			percpu_counter_add(&sbinfo->used_blocks, -1);
+		shmem_unacct_blocks(info->flags, 1);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+}
+
 static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -1431,6 +1461,65 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 	return error;
 }
 
+static long shmem_fallocate(struct file *file, int mode,
+				loff_t offset, loff_t len)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	pgoff_t start = offset >> PAGE_CACHE_SHIFT;
+	pgoff_t end = DIV_ROUND_UP((offset + len), PAGE_CACHE_SIZE);
+	pgoff_t index = start;
+	loff_t i_size;
+	struct page *page = NULL;
+	int ret = 0;
+
+	if (IS_SWAPFILE(inode))
+		return -ETXTBSY;
+
+	mutex_lock(&inode->i_mutex);
+	i_size = inode->i_size;
+	if (mode & FALLOC_FL_PUNCH_HOLE) {
+		if (!(offset > i_size || (end << PAGE_CACHE_SHIFT) > i_size))
+			shmem_truncate_range(inode, offset,
+					     (end << PAGE_CACHE_SHIFT) - 1);
+		goto unlock;
+	}
+
+	if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+		ret = inode_newsize_ok(inode, (offset + len));
+		if (ret)
+			goto unlock;
+	}
+
+	while (index < end) {
+		ret = shmem_getpage(inode, index, &page, SGP_WRITE, NULL);
+		if (ret) {
+			if (ret == -ENOSPC)
+				goto undo;
+			else
+				goto unlock;
+		}
+		if (page) {
+			unlock_page(page);
+			page_cache_release(page);
+		}
+		index++;
+	}
+	if (!(mode & FALLOC_FL_KEEP_SIZE) && (index << PAGE_CACHE_SHIFT) > i_size)
+		i_size_write(inode, index << PAGE_CACHE_SHIFT);
+
+	goto unlock;
+
+undo:
+	while (index > start) {
+		shmem_putpage_noswap(inode, index, true);
+		index--;
+	}
+
+unlock:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2286,6 +2375,7 @@ static const struct file_operations shmem_file_operations = {
 	.fsync		= noop_fsync,
 	.splice_read	= shmem_file_splice_read,
 	.splice_write	= generic_file_splice_write,
+	.fallocate	= shmem_fallocate,
 #endif
 };
 

WARNING: multiple messages have this Message-ID (diff)
From: Cong Wang <amwang@redhat.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: linux-kernel@vger.kernel.org, akpm@linux-foundation.org,
	Pekka Enberg <penberg@kernel.org>, Christoph Hellwig <hch@lst.de>,
	Hugh Dickins <hughd@google.com>,
	Dave Hansen <dave@linux.vnet.ibm.com>,
	Lennart Poettering <lennart@poettering.net>,
	Kay Sievers <kay.sievers@vrfy.org>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	linux-mm@kvack.org
Subject: Re: [V4 PATCH 1/2] tmpfs: add fallocate support
Date: Tue, 29 Nov 2011 15:23:58 +0800	[thread overview]
Message-ID: <4ED4888E.9040402@redhat.com> (raw)
In-Reply-To: <20111129150210.ad266dd7.kamezawa.hiroyu@jp.fujitsu.com>

[-- Attachment #1: Type: text/plain, Size: 770 bytes --]

于 2011年11月29日 14:02, KAMEZAWA Hiroyuki 写道:
>
> You can't know whether the 'page' is allocated by alloc_page() in fallocate()
> or just found as exiting one.
> Then, yourwill corrupt existing pages in error path.
> Is it allowed ?
>

According to the comment,

/*
  * shmem_getpage_gfp - find page in cache, or get from swap, or allocate
  *
  * If we allocate a new one we do not mark it dirty. That's up to the
  * vm. If we swap it in we mark it dirty since we also free the swap
  * entry since a page cannot live in both the swap and page cache
  */

so we can know if the page is newly allocated by checking page dirty bit.
Or am I missing something?

But whoops, I sent a wrong version of this patch, the below one is
the correct one. Sorry for this.


[-- Attachment #2: 0001-tmpfs-add-fallocate-support.patch --]
[-- Type: text/plain, Size: 4320 bytes --]

Subject: [V4 PATCH 1/2] tmpfs: add fallocate support

Systemd needs tmpfs to support fallocate [1], to be able
to safely use mmap(), regarding SIGBUS, on files on the
/dev/shm filesystem. The glibc fallback loop for -ENOSYS
on fallocate is just ugly.

This patch adds fallocate support to tmpfs, and as we
already have shmem_truncate_range(), it is also easy to
add FALLOC_FL_PUNCH_HOLE support too.

1. http://lkml.org/lkml/2011/10/20/275

V3->V4:
Handle 'undo' ENOSPC more correctly.

V2->V3:
a) Read i_size directly after holding i_mutex;
b) Call page_cache_release() too after shmem_getpage();
c) Undo previous changes when -ENOSPC.

Cc: Pekka Enberg <penberg@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Lennart Poettering <lennart@poettering.net>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: WANG Cong <amwang@redhat.com>

---
commit ca055ad343a0d629f8f1fad1df30796d2292f6a2
Author: Cong Wang <amwang@redhat.com>
Date:   Wed Nov 23 13:16:26 2011 +0800

    tmpfs: add fallocate support
---
 mm/shmem.c |   90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 90 insertions(+), 0 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index d672250..6a6fc66 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -30,6 +30,7 @@
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/swap.h>
+#include <linux/falloc.h>
 
 static struct vfsmount *shm_mnt;
 
@@ -1016,6 +1017,35 @@ failed:
 	return error;
 }
 
+static void shmem_putpage_noswap(struct inode *inode, pgoff_t index, bool new)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct shmem_inode_info *info;
+	struct shmem_sb_info *sbinfo;
+	struct page *page;
+
+	page = find_lock_page(mapping, index);
+
+	if (page) {
+		info = SHMEM_I(inode);
+		sbinfo = SHMEM_SB(inode->i_sb);
+		shmem_acct_block(info->flags);
+		if (!new && PageDirty(page)) {
+			ClearPageDirty(page);
+			delete_from_page_cache(page);
+			spin_lock(&info->lock);
+			info->alloced--;
+			inode->i_blocks -= BLOCKS_PER_PAGE;
+			spin_unlock(&info->lock);
+		}
+		if (sbinfo->max_blocks)
+			percpu_counter_add(&sbinfo->used_blocks, -1);
+		shmem_unacct_blocks(info->flags, 1);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+}
+
 static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -1431,6 +1461,65 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 	return error;
 }
 
+static long shmem_fallocate(struct file *file, int mode,
+				loff_t offset, loff_t len)
+{
+	struct inode *inode = file->f_path.dentry->d_inode;
+	pgoff_t start = offset >> PAGE_CACHE_SHIFT;
+	pgoff_t end = DIV_ROUND_UP((offset + len), PAGE_CACHE_SIZE);
+	pgoff_t index = start;
+	loff_t i_size;
+	struct page *page = NULL;
+	int ret = 0;
+
+	if (IS_SWAPFILE(inode))
+		return -ETXTBSY;
+
+	mutex_lock(&inode->i_mutex);
+	i_size = inode->i_size;
+	if (mode & FALLOC_FL_PUNCH_HOLE) {
+		if (!(offset > i_size || (end << PAGE_CACHE_SHIFT) > i_size))
+			shmem_truncate_range(inode, offset,
+					     (end << PAGE_CACHE_SHIFT) - 1);
+		goto unlock;
+	}
+
+	if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+		ret = inode_newsize_ok(inode, (offset + len));
+		if (ret)
+			goto unlock;
+	}
+
+	while (index < end) {
+		ret = shmem_getpage(inode, index, &page, SGP_WRITE, NULL);
+		if (ret) {
+			if (ret == -ENOSPC)
+				goto undo;
+			else
+				goto unlock;
+		}
+		if (page) {
+			unlock_page(page);
+			page_cache_release(page);
+		}
+		index++;
+	}
+	if (!(mode & FALLOC_FL_KEEP_SIZE) && (index << PAGE_CACHE_SHIFT) > i_size)
+		i_size_write(inode, index << PAGE_CACHE_SHIFT);
+
+	goto unlock;
+
+undo:
+	while (index > start) {
+		shmem_putpage_noswap(inode, index, true);
+		index--;
+	}
+
+unlock:
+	mutex_unlock(&inode->i_mutex);
+	return ret;
+}
+
 static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb);
@@ -2286,6 +2375,7 @@ static const struct file_operations shmem_file_operations = {
 	.fsync		= noop_fsync,
 	.splice_read	= shmem_file_splice_read,
 	.splice_write	= generic_file_splice_write,
+	.fallocate	= shmem_fallocate,
 #endif
 };
 

  reply	other threads:[~2011-11-29  7:24 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-29  5:33 [V4 PATCH 1/2] tmpfs: add fallocate support Cong Wang
2011-11-29  5:33 ` Cong Wang
2011-11-29  5:33 ` [V2 PATCH 2/2] fs: wire up .truncate_range and .fallocate Cong Wang
2011-11-29  5:33   ` Cong Wang
2011-11-29  6:02 ` [V4 PATCH 1/2] tmpfs: add fallocate support KAMEZAWA Hiroyuki
2011-11-29  6:02   ` KAMEZAWA Hiroyuki
2011-11-29  7:23   ` Cong Wang [this message]
2011-11-29  7:23     ` Cong Wang
2011-11-29  7:57     ` KAMEZAWA Hiroyuki
2011-11-29  7:57       ` KAMEZAWA Hiroyuki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4ED4888E.9040402@redhat.com \
    --to=amwang@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hch@lst.de \
    --cc=hughd@google.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kay.sievers@vrfy.org \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=lennart@poettering.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=penberg@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.