linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Gleb Natapov <gleb@redhat.com>
To: kvm@vger.kernel.org
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, avi@redhat.com,
	mingo@elte.hu, a.p.zijlstra@chello.nl, tglx@linutronix.de,
	hpa@zytor.com, riel@redhat.com, cl@linux-foundation.org,
	mtosatti@redhat.com
Subject: [PATCH v6 01/12] Add get_user_pages() variant that fails if major fault is required.
Date: Mon,  4 Oct 2010 17:56:23 +0200	[thread overview]
Message-ID: <1286207794-16120-2-git-send-email-gleb@redhat.com> (raw)
In-Reply-To: <1286207794-16120-1-git-send-email-gleb@redhat.com>

This patch add get_user_pages() variant that only succeeds if getting
a reference to a page doesn't require major fault.

Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 fs/ncpfs/mmap.c    |    2 ++
 include/linux/mm.h |    5 +++++
 mm/filemap.c       |    3 +++
 mm/memory.c        |   31 ++++++++++++++++++++++++++++---
 mm/shmem.c         |    8 +++++++-
 5 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 56f5b3a..b9c4f36 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -39,6 +39,8 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
 	int bufsize;
 	int pos; /* XXX: loff_t ? */
 
+	if (vmf->flags & FAULT_FLAG_MINOR)
+		return VM_FAULT_MAJOR | VM_FAULT_ERROR;
 	/*
 	 * ncpfs has nothing against high pages as long
 	 * as recvmsg and memset works on it
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 74949fb..da32900 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -144,6 +144,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_WRITE	0x01	/* Fault was a write access */
 #define FAULT_FLAG_NONLINEAR	0x02	/* Fault was via a nonlinear mapping */
 #define FAULT_FLAG_MKWRITE	0x04	/* Fault was mkwrite of existing pte */
+#define FAULT_FLAG_MINOR	0x08	/* Do only minor fault */
 
 /*
  * This interface is used by x86 PAT code to identify a pfn mapping that is
@@ -848,6 +849,9 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			unsigned long start, int nr_pages, int write, int force,
 			struct page **pages, struct vm_area_struct **vmas);
+int get_user_pages_noio(struct task_struct *tsk, struct mm_struct *mm,
+			unsigned long start, int nr_pages, int write, int force,
+			struct page **pages, struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
 struct page *get_dump_page(unsigned long addr);
@@ -1394,6 +1398,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_DUMP	0x08	/* give error on hole if it would be zero */
 #define FOLL_FORCE	0x10	/* get_user_pages read/write w/o permission */
+#define FOLL_MINOR	0x20	/* do only minor page faults */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/filemap.c b/mm/filemap.c
index 3d4df44..ef28b6d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1548,6 +1548,9 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 			goto no_cached_page;
 		}
 	} else {
+		if (vmf->flags & FAULT_FLAG_MINOR)
+			return VM_FAULT_MAJOR | VM_FAULT_ERROR;
+
 		/* No page in the page cache at all */
 		do_sync_mmap_readahead(vma, ra, file, offset);
 		count_vm_event(PGMAJFAULT);
diff --git a/mm/memory.c b/mm/memory.c
index 0e18b4d..b221458 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1441,10 +1441,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			cond_resched();
 			while (!(page = follow_page(vma, start, foll_flags))) {
 				int ret;
+				unsigned int fault_fl =
+					((foll_flags & FOLL_WRITE) ?
+					FAULT_FLAG_WRITE : 0) |
+					((foll_flags & FOLL_MINOR) ?
+					FAULT_FLAG_MINOR : 0);
 
-				ret = handle_mm_fault(mm, vma, start,
-					(foll_flags & FOLL_WRITE) ?
-					FAULT_FLAG_WRITE : 0);
+				ret = handle_mm_fault(mm, vma, start, fault_fl);
 
 				if (ret & VM_FAULT_ERROR) {
 					if (ret & VM_FAULT_OOM)
@@ -1452,6 +1455,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 					if (ret &
 					    (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
 						return i ? i : -EFAULT;
+					else if (ret & VM_FAULT_MAJOR)
+						return i ? i : -EFAULT;
 					BUG();
 				}
 				if (ret & VM_FAULT_MAJOR)
@@ -1562,6 +1567,23 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 }
 EXPORT_SYMBOL(get_user_pages);
 
+int get_user_pages_noio(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, int nr_pages, int write, int force,
+		struct page **pages, struct vm_area_struct **vmas)
+{
+	int flags = FOLL_TOUCH | FOLL_MINOR;
+
+	if (pages)
+		flags |= FOLL_GET;
+	if (write)
+		flags |= FOLL_WRITE;
+	if (force)
+		flags |= FOLL_FORCE;
+
+	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
+}
+EXPORT_SYMBOL(get_user_pages_noio);
+
 /**
  * get_dump_page() - pin user page in memory while writing it to core dump
  * @addr: user address
@@ -2648,6 +2670,9 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
 	page = lookup_swap_cache(entry);
 	if (!page) {
+		if (flags & FAULT_FLAG_MINOR)
+			return VM_FAULT_MAJOR | VM_FAULT_ERROR;
+
 		grab_swap_token(mm); /* Contend for token _before_ read-in */
 		page = swapin_readahead(entry,
 					GFP_HIGHUSER_MOVABLE, vma, address);
diff --git a/mm/shmem.c b/mm/shmem.c
index 080b09a..470d8a7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1228,6 +1228,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
 	swp_entry_t swap;
 	gfp_t gfp;
 	int error;
+	int flags = type ? *type : 0;
 
 	if (idx >= SHMEM_MAX_INDEX)
 		return -EFBIG;
@@ -1287,6 +1288,11 @@ repeat:
 		swappage = lookup_swap_cache(swap);
 		if (!swappage) {
 			shmem_swp_unmap(entry);
+			if (flags & FAULT_FLAG_MINOR) {
+				spin_unlock(&info->lock);
+				*type = VM_FAULT_MAJOR | VM_FAULT_ERROR;
+				goto failed;
+			}
 			/* here we actually do the io */
 			if (type && !(*type & VM_FAULT_MAJOR)) {
 				__count_vm_event(PGMAJFAULT);
@@ -1510,7 +1516,7 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
 	int error;
-	int ret;
+	int ret = (int)vmf->flags;
 
 	if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
 		return VM_FAULT_SIGBUS;
-- 
1.7.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2010-10-04 15:56 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-04 15:56 [PATCH v6 00/12] KVM: Add host swap event notifications for PV guest Gleb Natapov
2010-10-04 15:56 ` Gleb Natapov [this message]
2010-10-04 15:56 ` [PATCH v6 02/12] Halt vcpu if page it tries to access is swapped out Gleb Natapov
2010-10-05  1:20   ` Rik van Riel
2010-10-05 14:59   ` Marcelo Tosatti
2010-10-06 10:50     ` Avi Kivity
2010-10-06 10:52       ` Gleb Natapov
2010-10-07  9:54         ` Avi Kivity
2010-10-07 17:48           ` Gleb Natapov
2010-10-06 11:15     ` Gleb Natapov
2010-10-07  9:50   ` Avi Kivity
2010-10-07  9:52     ` Avi Kivity
2010-10-07 13:24     ` Rik van Riel
2010-10-07 13:29       ` Avi Kivity
2010-10-07 17:47     ` Gleb Natapov
2010-10-09 18:30       ` Avi Kivity
2010-10-09 18:32         ` Avi Kivity
2010-10-10  7:30           ` Gleb Natapov
2010-10-10  7:29         ` Gleb Natapov
2010-10-10 15:55           ` Avi Kivity
2010-10-10 15:56             ` Avi Kivity
2010-10-10 16:17               ` Gleb Natapov
2010-10-10 16:16             ` Gleb Natapov
2010-10-04 15:56 ` [PATCH v6 03/12] Retry fault before vmentry Gleb Natapov
2010-10-05 15:54   ` Marcelo Tosatti
2010-10-06 11:07     ` Gleb Natapov
2010-10-06 14:20       ` Marcelo Tosatti
2010-10-07 18:44         ` Gleb Natapov
2010-10-08 16:07           ` Marcelo Tosatti
2010-10-07 12:29   ` Avi Kivity
2010-10-07 17:21     ` Gleb Natapov
2010-10-09 18:42       ` Avi Kivity
2010-10-10  7:35         ` Gleb Natapov
2010-10-04 15:56 ` [PATCH v6 04/12] Add memory slot versioning and use it to provide fast guest write interface Gleb Natapov
2010-10-05  1:29   ` Rik van Riel
2010-10-05 16:57   ` Marcelo Tosatti
2010-10-06 11:14     ` Gleb Natapov
2010-10-06 14:38       ` Marcelo Tosatti
2010-10-06 20:08         ` Gleb Natapov
2010-10-07 10:00           ` Avi Kivity
2010-10-07 15:42             ` Marcelo Tosatti
2010-10-07 16:03               ` Gleb Natapov
2010-10-07 16:20                 ` Avi Kivity
2010-10-07 17:23                   ` Gleb Natapov
2010-10-10 12:48                     ` Avi Kivity
2010-10-07 12:31   ` Avi Kivity
2010-10-04 15:56 ` [PATCH v6 05/12] Move kvm_smp_prepare_boot_cpu() from kvmclock.c to kvm.c Gleb Natapov
2010-10-04 15:56 ` [PATCH v6 06/12] Add PV MSR to enable asynchronous page faults delivery Gleb Natapov
2010-10-07 12:42   ` Avi Kivity
2010-10-07 17:53     ` Gleb Natapov
2010-10-10 12:47       ` Avi Kivity
2010-10-10 13:27         ` Gleb Natapov
2010-10-07 12:58   ` Avi Kivity
2010-10-07 17:59     ` Gleb Natapov
2010-10-09 18:43       ` Avi Kivity
2010-10-04 15:56 ` [PATCH v6 07/12] Add async PF initialization to PV guest Gleb Natapov
2010-10-05  2:34   ` Rik van Riel
2010-10-05 18:25   ` Marcelo Tosatti
2010-10-06 10:55     ` Gleb Natapov
2010-10-06 14:45       ` Marcelo Tosatti
2010-10-06 20:05         ` Gleb Natapov
2010-10-07 12:50   ` Avi Kivity
2010-10-08  7:54     ` Gleb Natapov
2010-10-09 18:44       ` Avi Kivity
2010-10-04 15:56 ` [PATCH v6 08/12] Handle async PF in a guest Gleb Natapov
2010-10-07 13:10   ` Avi Kivity
2010-10-07 17:14     ` Gleb Natapov
2010-10-07 17:18       ` Avi Kivity
2010-10-07 17:48         ` Rik van Riel
2010-10-07 18:03         ` Gleb Natapov
2010-10-09 18:48           ` Avi Kivity
2010-10-10  7:56             ` Gleb Natapov
2010-10-10 12:40               ` Avi Kivity
2010-10-10 12:32     ` Gleb Natapov
2010-10-10 12:38       ` Avi Kivity
2010-10-10 13:22         ` Gleb Natapov
2010-10-04 15:56 ` [PATCH v6 09/12] Inject asynchronous page fault into a PV guest if page is swapped out Gleb Natapov
2010-10-05  2:36   ` Rik van Riel
2010-10-05 19:00   ` Marcelo Tosatti
2010-10-06 10:42     ` Gleb Natapov
2010-10-04 15:56 ` [PATCH v6 10/12] Handle async PF in non preemptable context Gleb Natapov
2010-10-05 19:51   ` Marcelo Tosatti
2010-10-06 10:41     ` Gleb Natapov
2010-10-10 14:25       ` Gleb Natapov
2010-10-04 15:56 ` [PATCH v6 11/12] Let host know whether the guest can handle async PF in non-userspace context Gleb Natapov
2010-10-07 13:36   ` Avi Kivity
2010-10-04 15:56 ` [PATCH v6 12/12] Send async PF when guest is not in userspace too Gleb Natapov
2010-10-05  2:37   ` Rik van Riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1286207794-16120-2-git-send-email-gleb@redhat.com \
    --to=gleb@redhat.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=avi@redhat.com \
    --cc=cl@linux-foundation.org \
    --cc=hpa@zytor.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=mtosatti@redhat.com \
    --cc=riel@redhat.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).