Re: [PATCHSET 0/2] Allow single pagefault in write access of a VM_MIXEDMAP mapping

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Matthew Wilcox <willy@linux.intel.com>
To: Boaz Harrosh <boaz@plexistor.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	Ross Zwisler <ross.zwisler@linux.intel.com>,
	Oleg Nesterov <oleg@redhat.com>, Mel Gorman <mgorman@suse.de>,
	Johannes Weiner <hannes@cmpxchg.org>
Subject: Re: [PATCHSET 0/2] Allow single pagefault in write access of a VM_MIXEDMAP mapping
Date: Tue, 12 Jan 2016 08:29:57 -0500	[thread overview]
Message-ID: <20160112132957.GA21285@linux.intel.com> (raw)
In-Reply-To: <20160111193523.GA8945@linux.intel.com>

On Mon, Jan 11, 2016 at 02:35:23PM -0500, Matthew Wilcox wrote:
> I would rather see the memory.c code move in the direction of the
> huge_memory.c code.  How about something like this?

Whoops, missed some bits in the DAX conversion where we'd return an -errno instead of VM_FAULT flags.  Take two.

diff --git a/fs/dax.c b/fs/dax.c
index a610cbe..deff70f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -498,6 +498,7 @@ EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
 static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 			struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+	const bool write = vmf->flags & FAULT_FLAG_WRITE;
 	unsigned long vaddr = (unsigned long)vmf->virtual_address;
 	struct address_space *mapping = inode->i_mapping;
 	struct block_device *bdev = bh->b_bdev;
@@ -506,7 +507,7 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 		.size = bh->b_size,
 	};
 	pgoff_t size;
-	int error;
+	int result;
 
 	i_mmap_lock_read(mapping);
 
@@ -518,15 +519,11 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 	 * allocated past the end of the file.
 	 */
 	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if (unlikely(vmf->pgoff >= size)) {
-		error = -EIO;
-		goto out;
-	}
+	if (unlikely(vmf->pgoff >= size))
+		goto sigbus;
 
-	if (dax_map_atomic(bdev, &dax) < 0) {
-		error = PTR_ERR(dax.addr);
-		goto out;
-	}
+	if (dax_map_atomic(bdev, &dax) < 0)
+		goto sigbus;
 
 	if (buffer_unwritten(bh) || buffer_new(bh)) {
 		clear_pmem(dax.addr, PAGE_SIZE);
@@ -534,17 +531,19 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
 	}
 	dax_unmap_atomic(bdev, &dax);
 
-	error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
-			vmf->flags & FAULT_FLAG_WRITE);
-	if (error)
-		goto out;
+	if (dax_radix_entry(mapping, vmf->pgoff, dax.sector, false, write))
+		goto sigbus;
 
-	error = vm_insert_mixed(vma, vaddr, dax.pfn);
+	result = vmf_insert_pfn(vma, vaddr, dax.pfn, write);
 
  out:
 	i_mmap_unlock_read(mapping);
 
-	return error;
+	return result;
+
+ sigbus:
+	result = VM_FAULT_SIGBUS;
+	goto out;
 }
 
 static int dax_pte_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
@@ -559,7 +558,7 @@ static int dax_pte_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	unsigned blkbits = inode->i_blkbits;
 	sector_t block;
 	pgoff_t size;
-	int error;
+	int result, error;
 	int major = 0;
 
 	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -660,13 +659,14 @@ static int dax_pte_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
 	 * indicate what the callback should do via the uptodate variable, same
 	 * as for normal BH based IO completions.
 	 */
-	error = dax_insert_mapping(inode, &bh, vma, vmf);
+	result = dax_insert_mapping(inode, &bh, vma, vmf);
 	if (buffer_unwritten(&bh)) {
 		if (complete_unwritten)
-			complete_unwritten(&bh, !error);
+			complete_unwritten(&bh, !(result & VM_FAULT_ERROR));
 		else
 			WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE));
 	}
+	return result | major;
 
  out:
 	if (error == -ENOMEM)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27dbd1b..a95242c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2170,8 +2170,10 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
-int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+int vm_insert_pfn(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn);
+int vmf_insert_pfn(struct vm_area_struct *, unsigned long addr,
+			pfn_t pfn, bool write);
 int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			pfn_t pfn);
 int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
diff --git a/mm/memory.c b/mm/memory.c
index 708a0c7c..b93bcba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1505,8 +1505,15 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_insert_page);
 
+static pte_t maybe_pte_mkwrite(pte_t pte, struct vm_area_struct *vma)
+{
+	if (likely(vma->vm_flags & VM_WRITE))
+		pte = pte_mkwrite(pte);
+	return pte;
+}
+
 static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
-			pfn_t pfn, pgprot_t prot)
+			pfn_t pfn, pgprot_t prot, bool write)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	int retval;
@@ -1526,6 +1533,10 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 		entry = pte_mkdevmap(pfn_t_pte(pfn, prot));
 	else
 		entry = pte_mkspecial(pfn_t_pte(pfn, prot));
+	if (write) {
+		entry = pte_mkyoung(pte_mkdirty(entry));
+		entry = maybe_pte_mkwrite(entry, vma);
+	}
 	set_pte_at(mm, addr, pte, entry);
 	update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */
 
@@ -1537,26 +1548,28 @@ out:
 }
 
 /**
- * vm_insert_pfn - insert single pfn into user vma
+ * vmf_insert_pfn - insert single pfn into user vma
  * @vma: user vma to map to
  * @addr: target user address of this page
  * @pfn: source kernel pfn
+ * @write: Whether to insert a writable entry
  *
  * Similar to vm_insert_page, this allows drivers to insert individual pages
  * they've allocated into a user vma. Same comments apply.
  *
  * This function should only be called from a vm_ops->fault handler, and
- * in that case the handler should return NULL.
+ * the return value from this function is suitable for returning from that
+ * handler.
  *
  * vma cannot be a COW mapping.
  *
  * As this is called only for pages that do not currently exist, we
  * do not need to flush old virtual caches or the TLB.
  */
-int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
-			unsigned long pfn)
+int vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+			pfn_t pfn, bool write)
 {
-	int ret;
+	int error;
 	pgprot_t pgprot = vma->vm_page_prot;
 	/*
 	 * Technically, architectures with pte_special can avoid all these
@@ -1568,16 +1581,29 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 	BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
 						(VM_PFNMAP|VM_MIXEDMAP));
 	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
-	BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
+	BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_t_valid(pfn));
 
 	if (addr < vma->vm_start || addr >= vma->vm_end)
-		return -EFAULT;
-	if (track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)))
-		return -EINVAL;
+		return VM_FAULT_SIGBUS;
+	if (track_pfn_insert(vma, &pgprot, pfn))
+		return VM_FAULT_SIGBUS;
 
-	ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot);
+	error = insert_pfn(vma, addr, pfn, pgprot, write);
+	if (error == -EBUSY || !error)
+		return VM_FAULT_NOPAGE;
+	return VM_FAULT_SIGBUS;
+}
+EXPORT_SYMBOL(vmf_insert_pfn);
 
-	return ret;
+/* TODO: Convert users to vmf_insert_pfn */
+int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn)
+{
+	int result = vmf_insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV),
+								false);
+	if (result & VM_FAULT_ERROR)
+		return -EFAULT;
+	return 0;
 }
 EXPORT_SYMBOL(vm_insert_pfn);
 
@@ -1602,7 +1628,7 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 		page = pfn_t_to_page(pfn);
 		return insert_page(vma, addr, page, vma->vm_page_prot);
 	}
-	return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+	return insert_pfn(vma, addr, pfn, vma->vm_page_prot, false);
 }
 EXPORT_SYMBOL(vm_insert_mixed);
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

     prev parent reply	other threads:[~2016-01-12 13:30 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-10 13:59 [PATCHSET 0/2] Allow single pagefault in write access of a VM_MIXEDMAP mapping Boaz Harrosh
2016-01-10 14:02 ` [PATCH 1/2] mm: Allow single pagefault on mmap-write with VM_MIXEDMAP Boaz Harrosh
2016-01-10 14:03 ` [PATCH 2/2] dax: Only fault once on mmap write access Boaz Harrosh
2016-01-11  1:19 ` [PATCHSET 0/2] Allow single pagefault in write access of a VM_MIXEDMAP mapping Dan Williams
2016-01-11  9:22   ` Boaz Harrosh
2016-01-11 16:37     ` Dan Williams
2016-01-11 19:35 ` Matthew Wilcox
2016-01-12 13:29   ` Matthew Wilcox [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:a610cbe dfblob:deff70f dfblob:27dbd1b dfblob:a95242c
dfblob:708a0c7 dfblob:b93bcba )
 OR (
bs:"Re: [PATCHSET 0/2] Allow single pagefault in write access of a VM_MIXEDMAP mapping" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160112132957.GA21285@linux.intel.com \
    --to=willy@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=boaz@plexistor.com \
    --cc=dan.j.williams@intel.com \
    --cc=hannes@cmpxchg.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=oleg@redhat.com \
    --cc=ross.zwisler@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).