linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Matthias Wirth <matthias.wirth@gmail.com>
To: Matthias Wirth <matthias.wirth@gmail.com>
Cc: Lukas Senger <lukas@fridolin.com>,
	Matthew Wilcox <matthew@wil.cx>, Jeff Layton <jlayton@redhat.com>,
	"J. Bruce Fields" <bfields@fieldses.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@suse.cz>, Rik van Riel <riel@redhat.com>,
	Lisa Du <cldu@marvell.com>, Paul Mackerras <paulus@samba.org>,
	Sasha Levin <sasha.levin@oracle.com>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Fengguang Wu <fengguang.wu@intel.com>,
	Shaohua Li <shli@kernel.org>,
	Alexey Kardashevskiy <aik@ozlabs.ru>,
	Minchan Kim <minchan@kernel.org>,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Al Viro <viro@zeniv.linux.org.uk>,
	Steven Whitehouse <swhiteho@redhat.com>,
	Mel Gorman <mgorman@suse.de>,
	Cody P Schafer <cody@linux.vnet.ibm.com>,
	Jiang Liu <liuj97@gmail.com>,
	David Rientjes <rientjes@google.com>,
	"Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Zhang Yanfei <zhangyanfei@cn.fujitsu.com>,
	Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>,
	Lukas Czerner <lczerner@redhat.com>,
	Damien Ramonda <damien.ramonda@intel.com>,
	Mark Rutland <mark.rutland@arm.com>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Subject: [PATCH] mm: implement POSIX_FADV_NOREUSE
Date: Tue, 11 Mar 2014 11:25:41 +0100	[thread overview]
Message-ID: <1394533550-18485-1-git-send-email-matthias.wirth@gmail.com> (raw)

Backups, logrotation and indexers don't need files they read to remain
in the page cache. Their pages can be reclaimed early and should not
displace useful pages. POSIX specifices the POSIX_FADV_NOREUSE flag for
these use cases but it's currently a noop.

In our implementation pages marked with the NoReuse flag are added to
the tail of the LRU list the first time they are read. Therefore they
are the first to be reclaimed.

We needed to add flags to the file and page structs in order to pass
down the hint to the actual call to list_add.

Signed-off-by: Matthias Wirth <matthias.wirth@gmail.com>
Signed-off-by: Lukas Senger <lukas@fridolin.com>
---
 include/linux/fs.h         | 3 +++
 include/linux/mm_inline.h  | 7 ++++++-
 include/linux/page-flags.h | 2 ++
 mm/fadvise.c               | 4 ++++
 mm/filemap.c               | 3 +++
 mm/page_alloc.c            | 1 +
 mm/readahead.c             | 2 ++
 7 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 881accf..3e80149 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -123,6 +123,9 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File is opened with O_PATH; almost nothing can be done with it */
 #define FMODE_PATH		((__force fmode_t)0x4000)
 
+/* Expect one read only (effect on page cache behavior) */
+#define FMODE_NOREUSE		((__force fmode_t)0x8000)
+
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x1000000)
 
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index cf55945..1bed771 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -27,7 +27,12 @@ static __always_inline void add_page_to_lru_list(struct page *page,
 {
 	int nr_pages = hpage_nr_pages(page);
 	mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
-	list_add(&page->lru, &lruvec->lists[lru]);
+	if (unlikely(PageNoReuse(page))) {
+		ClearPageNoReuse(page);
+		list_add_tail(&page->lru, &lruvec->lists[lru]);
+	} else {
+		list_add(&page->lru, &lruvec->lists[lru]);
+	}
 	__mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages);
 }
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d1fe1a7..ee5af4c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -109,6 +109,7 @@ enum pageflags {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	PG_compound_lock,
 #endif
+	PG_noreuse,		/* page is added to tail of LRU list */
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -206,6 +207,7 @@ __PAGEFLAG(Slab, slab)
 PAGEFLAG(Checked, checked)		/* Used by some filesystems */
 PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned)	/* Xen */
 PAGEFLAG(SavePinned, savepinned);			/* Xen */
+PAGEFLAG(NoReuse, noreuse);
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
 
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 3bcfd81..387d10a 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -80,6 +80,7 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
 		f.file->f_ra.ra_pages = bdi->ra_pages;
 		spin_lock(&f.file->f_lock);
 		f.file->f_mode &= ~FMODE_RANDOM;
+		f.file->f_mode &= ~FMODE_NOREUSE;
 		spin_unlock(&f.file->f_lock);
 		break;
 	case POSIX_FADV_RANDOM:
@@ -111,6 +112,9 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
 					   nrpages);
 		break;
 	case POSIX_FADV_NOREUSE:
+		spin_lock(&f.file->f_lock);
+		f.file->f_mode |= FMODE_NOREUSE;
+		spin_unlock(&f.file->f_lock);
 		break;
 	case POSIX_FADV_DONTNEED:
 		if (!bdi_write_congested(mapping->backing_dev_info))
diff --git a/mm/filemap.c b/mm/filemap.c
index 97474c1..8f57ca8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1630,6 +1630,9 @@ no_cached_page:
 			desc->error = -ENOMEM;
 			goto out;
 		}
+		if (filp->f_mode & FMODE_NOREUSE)
+			SetPageNoReuse(page);
+
 		error = add_to_page_cache_lru(page, mapping,
 						index, GFP_KERNEL);
 		if (error) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 336ee92..a756165 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6512,6 +6512,7 @@ static const struct trace_print_flags pageflag_names[] = {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	{1UL << PG_compound_lock,	"compound_lock"	},
 #endif
+	{1UL << PG_noreuse,		"noreuse"	},
 };
 
 static void dump_page_flags(unsigned long flags)
diff --git a/mm/readahead.c b/mm/readahead.c
index 29c5e1a..e8d9221 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -189,6 +189,8 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 		list_add(&page->lru, &page_pool);
 		if (page_idx == nr_to_read - lookahead_size)
 			SetPageReadahead(page);
+		if (filp->f_mode & FMODE_NOREUSE)
+			SetPageNoReuse(page);
 		ret++;
 	}
 
-- 
1.8.3.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

             reply	other threads:[~2014-03-11 10:27 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-11 10:25 Matthias Wirth [this message]
2014-03-11 14:06 ` [PATCH] mm: implement POSIX_FADV_NOREUSE Michal Hocko
2014-03-11 15:24   ` Dave Hansen
2014-03-11 21:27     ` Andrew Morton
2014-03-12 11:59       ` Lukas Senger
2014-03-12 14:46         ` Michal Hocko
2014-03-12 16:05         ` Dave Hansen
2014-03-13 12:40           ` Lukas Senger
2014-03-13 18:43 ` [PATCHv2] " Matthias Wirth
2014-03-13 20:01   ` Andrew Morton
2014-03-14 12:34     ` Lukas Senger
2014-03-14 15:52 ` [PATCHv3] " Matthias Wirth
2014-03-18 15:14   ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1394533550-18485-1-git-send-email-matthias.wirth@gmail.com \
    --to=matthias.wirth@gmail.com \
    --cc=aik@ozlabs.ru \
    --cc=akpm@linux-foundation.org \
    --cc=benh@kernel.crashing.org \
    --cc=bfields@fieldses.org \
    --cc=cldu@marvell.com \
    --cc=cody@linux.vnet.ibm.com \
    --cc=damien.ramonda@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=fengguang.wu@intel.com \
    --cc=hannes@cmpxchg.org \
    --cc=jlayton@redhat.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=lczerner@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liuj97@gmail.com \
    --cc=lukas@fridolin.com \
    --cc=mark.rutland@arm.com \
    --cc=matthew@wil.cx \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.cz \
    --cc=minchan@kernel.org \
    --cc=paulus@samba.org \
    --cc=raghavendra.kt@linux.vnet.ibm.com \
    --cc=riel@redhat.com \
    --cc=rientjes@google.com \
    --cc=sasha.levin@oracle.com \
    --cc=shli@kernel.org \
    --cc=srivatsa.bhat@linux.vnet.ibm.com \
    --cc=swhiteho@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=zhangyanfei@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).