From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: Linus Torvalds <torvalds@linux-foundation.org>,
Andrew Morton <akpm@linux-foundation.org>,
Mel Gorman <mgorman@suse.de>, Rik van Riel <riel@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>,
Matthew Wilcox <matthew.r.wilcox@intel.com>,
Dave Hansen <dave.hansen@linux.intel.com>,
linux-mm@kvack.org,
"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCH 1/2] mm: extend ->fault interface to fault in few pages around fault address
Date: Tue, 11 Feb 2014 05:05:56 +0200 [thread overview]
Message-ID: <1392087957-15730-2-git-send-email-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <1392087957-15730-1-git-send-email-kirill.shutemov@linux.intel.com>
If (flags & FAULT_FLAG_AROUND) fault handler asks ->fault to fill
->pages array with ->nr_pages pages if they are ready to map.
If a page is not ready to be map, no need to wait for it: skip to the
next.
It's okay to have some (or all) elements of the array set to NULL.
Indexes of pages must be in range between ->min and ->max inclusive.
Array must not contain page with index ->pgoff, in should be in ->pages.
->fault must set VM_FAULT_AROUND bit in return code, if it fills the
array.
Pages must be locked.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
include/linux/mm.h | 24 +++++++++++++++++++++
mm/memory.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 79 insertions(+), 6 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f28f46eade6a..fe5629bc9e5b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -191,6 +191,7 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */
#define FAULT_FLAG_TRIED 0x40 /* second try */
#define FAULT_FLAG_USER 0x80 /* The fault originated in userspace */
+#define FAULT_FLAG_AROUND 0x100 /* Try to get few pages a time */
/*
* vm_fault is filled by the the pagefault handler and passed to the vma's
@@ -210,6 +211,28 @@ struct vm_fault {
* is set (which is also implied by
* VM_FAULT_ERROR).
*/
+
+ /*
+ * If (flags & FAULT_FLAG_AROUND) fault handler asks ->fault to fill
+ * ->pages array with ->nr_pages pages if they are ready to map.
+ *
+ * If a page is not ready to be map, no need to wait for it: skip to
+ * the next.
+ *
+ * It's okay to have some (or all) elements of the array set to NULL.
+ *
+ * Indexes of pages must be in range between ->min and ->max inclusive.
+ * Array must not contain page with index ->pgoff, in should be in
+ * ->pages.
+ *
+ * ->fault must set VM_FAULT_AROUND bit in return code, if it fills the
+ * array.
+ *
+ * Pages must be locked.
+ */
+ int nr_pages;
+ pgoff_t min, max;
+ struct page **pages;
};
/*
@@ -1004,6 +1027,7 @@ static inline int page_mapped(struct page *page)
#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */
#define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */
#define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */
+#define VM_FAULT_AROUND 0x1000 /* ->pages is filled */
#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
diff --git a/mm/memory.c b/mm/memory.c
index 68c3dc141059..47ab9d6e1666 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3287,27 +3287,52 @@ oom:
}
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
- pgoff_t pgoff, unsigned int flags, struct page **page)
+ pgoff_t pgoff, unsigned int flags, struct page **page,
+ struct page **pages, int nr_pages)
{
struct vm_fault vmf;
- int ret;
+ int i, ret;
vmf.virtual_address = (void __user *)(address & PAGE_MASK);
vmf.pgoff = pgoff;
vmf.flags = flags;
vmf.page = NULL;
+ if (flags & FAULT_FLAG_AROUND) {
+ vmf.pages = pages;
+ vmf.nr_pages = nr_pages;
+
+ /*
+ * From page for address aligned down to FAULT_AROUND_PAGES
+ * baundary, to the end of page table.
+ */
+ vmf.min = pgoff - ((address >> PAGE_SHIFT) & (nr_pages - 1));
+ vmf.min = min(pgoff, vmf.min); /* underflow */
+ vmf.max = pgoff + PTRS_PER_PTE - 1 -
+ ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
+ /* Both should be inside the vma */
+ vmf.min = max(vma->vm_pgoff, vmf.min);
+ vmf.max = min(vma_pages(vma) + vma->vm_pgoff - 1, vmf.max);
+ }
+
ret = vma->vm_ops->fault(vma, &vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
if (unlikely(PageHWPoison(vmf.page))) {
+ for (i = 0; (ret & VM_FAULT_AROUND) && i < nr_pages; i++) {
+ if (!pages[i])
+ continue;
+ unlock_page(pages[i]);
+ page_cache_release(vmf.page);
+ }
if (ret & VM_FAULT_LOCKED)
unlock_page(vmf.page);
page_cache_release(vmf.page);
return VM_FAULT_HWPOISON;
}
+ /* pages on ->nr_pages are always return locked */
if (unlikely(!(ret & VM_FAULT_LOCKED)))
lock_page(vmf.page);
else
@@ -3341,16 +3366,21 @@ static void do_set_pte(struct vm_area_struct *vma, unsigned long address,
update_mmu_cache(vma, address, pte);
}
+#define FAULT_AROUND_PAGES 32
static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd,
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{
struct page *fault_page;
+ struct page *pages[FAULT_AROUND_PAGES];
spinlock_t *ptl;
pte_t *pte;
- int ret;
+ int i, ret;
- ret = __do_fault(vma, address, pgoff, flags, &fault_page);
+ if (!(flags & FAULT_FLAG_NONLINEAR))
+ flags |= FAULT_FLAG_AROUND;
+ ret = __do_fault(vma, address, pgoff, flags, &fault_page,
+ pages, ARRAY_SIZE(pages));
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
@@ -3362,6 +3392,25 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return ret;
}
do_set_pte(vma, address, fault_page, pte, false, false);
+ for (i = 0; (ret & VM_FAULT_AROUND) && i < ARRAY_SIZE(pages); i++) {
+ pte_t *_pte;
+ unsigned long addr;
+ if (!pages[i])
+ continue;
+ VM_BUG_ON_PAGE(!PageLocked(pages[i]), pages[i]);
+ if (PageHWPoison(pages[i]))
+ goto skip;
+ _pte = pte + pages[i]->index - pgoff;
+ if (!pte_none(*_pte))
+ goto skip;
+ addr = address + PAGE_SIZE * (pages[i]->index - pgoff);
+ do_set_pte(vma, addr, pages[i], _pte, false, false);
+ unlock_page(pages[i]);
+ continue;
+skip:
+ unlock_page(pages[i]);
+ put_page(pages[i]);
+ }
pte_unmap_unlock(pte, ptl);
unlock_page(fault_page);
return ret;
@@ -3388,7 +3437,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM;
}
- ret = __do_fault(vma, address, pgoff, flags, &fault_page);
+ ret = __do_fault(vma, address, pgoff, flags, &fault_page, NULL, 0);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
goto uncharge_out;
@@ -3423,7 +3472,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int dirtied = 0;
int ret, tmp;
- ret = __do_fault(vma, address, pgoff, flags, &fault_page);
+ ret = __do_fault(vma, address, pgoff, flags, &fault_page, NULL, 0);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
--
1.8.5.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2014-02-11 3:06 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-02-11 3:05 [RFC, PATCH 0/2] mm: map few pages around fault address if they are in page cache Kirill A. Shutemov
2014-02-11 3:05 ` Kirill A. Shutemov [this message]
2014-02-11 3:05 ` [PATCH 2/2] mm: implement FAULT_FLAG_AROUND in filemap_fault() Kirill A. Shutemov
2014-02-11 21:39 ` [RFC, PATCH 0/2] mm: map few pages around fault address if they are in page cache Andrew Morton
2014-02-11 23:52 ` Linus Torvalds
2014-02-11 23:58 ` Kirill A. Shutemov
2014-02-12 0:25 ` Linus Torvalds
2014-02-12 0:44 ` Kirill A. Shutemov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1392087957-15730-2-git-send-email-kirill.shutemov@linux.intel.com \
--to=kirill.shutemov@linux.intel.com \
--cc=ak@linux.intel.com \
--cc=akpm@linux-foundation.org \
--cc=dave.hansen@linux.intel.com \
--cc=linux-mm@kvack.org \
--cc=matthew.r.wilcox@intel.com \
--cc=mgorman@suse.de \
--cc=riel@redhat.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).