linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support
  2016-08-04  8:14 [PATCH 0/7] userfaultfd: add support for shared memory Mike Rapoport
@ 2016-08-04  8:14 ` Mike Rapoport
  0 siblings, 0 replies; 3+ messages in thread
From: Mike Rapoport @ 2016-08-04  8:14 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: Hugh Dickins, Pavel Emelyanov, linux-mm, linux-kernel,
	Mike Rapoport

shmem_mcopy_atomic_pte is the low level routine that implements
the userfaultfd UFFDIO_COPY command.  It is based on the existing
mcopy_atomic_pte routine with modifications for shared memory pages.

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
---
 include/linux/shmem_fs.h |  11 +++++
 mm/shmem.c               | 109 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 120 insertions(+)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d4780c..8dcbdfd 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -83,4 +83,15 @@ static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
 
 #endif
 
+#ifdef CONFIG_SHMEM
+extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
+				  struct vm_area_struct *dst_vma,
+				  unsigned long dst_addr,
+				  unsigned long src_addr,
+				  struct page **pagep);
+#else
+#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
+			       src_addr, pagep)        ({ BUG(); 0; })
+#endif
+
 #endif
diff --git a/mm/shmem.c b/mm/shmem.c
index a361449..fcf560c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -69,6 +69,7 @@ static struct vfsmount *shm_mnt;
 #include <linux/syscalls.h>
 #include <linux/fcntl.h>
 #include <uapi/linux/memfd.h>
+#include <linux/rmap.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -1548,6 +1549,114 @@ bool shmem_mapping(struct address_space *mapping)
 	return mapping->host->i_sb->s_op == &shmem_ops;
 }
 
+int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
+			   pmd_t *dst_pmd,
+			   struct vm_area_struct *dst_vma,
+			   unsigned long dst_addr,
+			   unsigned long src_addr,
+			   struct page **pagep)
+{
+	struct inode *inode = file_inode(dst_vma->vm_file);
+	struct shmem_inode_info *info = SHMEM_I(inode);
+	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+	struct address_space *mapping = inode->i_mapping;
+	gfp_t gfp = mapping_gfp_mask(mapping);
+	pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
+	struct mem_cgroup *memcg;
+	spinlock_t *ptl;
+	void *page_kaddr;
+	struct page *page;
+	pte_t _dst_pte, *dst_pte;
+	int ret;
+
+	if (!*pagep) {
+		ret = -ENOMEM;
+		if (shmem_acct_block(info->flags))
+			goto out;
+		if (sbinfo->max_blocks) {
+			if (percpu_counter_compare(&sbinfo->used_blocks,
+						   sbinfo->max_blocks) >= 0)
+				goto out_unacct_blocks;
+			percpu_counter_inc(&sbinfo->used_blocks);
+		}
+
+		page = shmem_alloc_page(gfp, info, pgoff);
+		if (!page)
+			goto out_dec_used_blocks;
+
+		page_kaddr = kmap_atomic(page);
+		ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
+				     PAGE_SIZE);
+		kunmap_atomic(page_kaddr);
+
+		/* fallback to copy_from_user outside mmap_sem */
+		if (unlikely(ret)) {
+			*pagep = page;
+			/* don't free the page */
+			return -EFAULT;
+		}
+	} else {
+		page = *pagep;
+		*pagep = NULL;
+	}
+
+	_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
+	if (dst_vma->vm_flags & VM_WRITE)
+		_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
+
+	ret = -EEXIST;
+	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+	if (!pte_none(*dst_pte))
+		goto out_release_uncharge_unlock;
+
+	__SetPageUptodate(page);
+
+	ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg,
+				    false);
+	if (ret)
+		goto out_release_uncharge_unlock;
+	ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
+	if (!ret) {
+		ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL);
+		radix_tree_preload_end();
+	}
+	if (ret) {
+		mem_cgroup_cancel_charge(page, memcg, false);
+		goto out_release_uncharge_unlock;
+	}
+
+	mem_cgroup_commit_charge(page, memcg, false, false);
+	lru_cache_add_anon(page);
+
+	spin_lock(&info->lock);
+	info->alloced++;
+	inode->i_blocks += BLOCKS_PER_PAGE;
+	shmem_recalc_inode(inode);
+	spin_unlock(&info->lock);
+
+	inc_mm_counter(dst_mm, mm_counter_file(page));
+	page_add_file_rmap(page);
+	set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+
+	/* No need to invalidate - it was non-present before */
+	update_mmu_cache(dst_vma, dst_addr, dst_pte);
+	unlock_page(page);
+	pte_unmap_unlock(dst_pte, ptl);
+	ret = 0;
+out:
+	return ret;
+out_release_uncharge_unlock:
+	pte_unmap_unlock(dst_pte, ptl);
+	mem_cgroup_cancel_charge(page, memcg, false);
+	put_page(page);
+out_dec_used_blocks:
+	if (sbinfo->max_blocks)
+		percpu_counter_add(&sbinfo->used_blocks, -1);
+out_unacct_blocks:
+	shmem_unacct_blocks(info->flags, 1);
+	goto out;
+}
+
 #ifdef CONFIG_TMPFS
 static const struct inode_operations shmem_symlink_inode_operations;
 static const struct inode_operations shmem_short_symlink_operations;
-- 
1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support
       [not found] <004301d1ee32$fc583630$f508a290$@alibaba-inc.com>
@ 2016-08-04  9:37 ` Hillf Danton
  2016-08-04 13:18   ` Mike Rapoport
  0 siblings, 1 reply; 3+ messages in thread
From: Hillf Danton @ 2016-08-04  9:37 UTC (permalink / raw)
  To: 'Mike Rapoport'; +Cc: linux-mm

> 
> +int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
> +			   pmd_t *dst_pmd,
> +			   struct vm_area_struct *dst_vma,
> +			   unsigned long dst_addr,
> +			   unsigned long src_addr,
> +			   struct page **pagep)
> +{
> +	struct inode *inode = file_inode(dst_vma->vm_file);
> +	struct shmem_inode_info *info = SHMEM_I(inode);
> +	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
> +	struct address_space *mapping = inode->i_mapping;
> +	gfp_t gfp = mapping_gfp_mask(mapping);
> +	pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
> +	struct mem_cgroup *memcg;
> +	spinlock_t *ptl;
> +	void *page_kaddr;
> +	struct page *page;
> +	pte_t _dst_pte, *dst_pte;
> +	int ret;
> +
> +	if (!*pagep) {
> +		ret = -ENOMEM;
> +		if (shmem_acct_block(info->flags))
> +			goto out;
> +		if (sbinfo->max_blocks) {
> +			if (percpu_counter_compare(&sbinfo->used_blocks,
> +						   sbinfo->max_blocks) >= 0)
> +				goto out_unacct_blocks;
> +			percpu_counter_inc(&sbinfo->used_blocks);
> +		}
> +
> +		page = shmem_alloc_page(gfp, info, pgoff);
> +		if (!page)
> +			goto out_dec_used_blocks;
> +
> +		page_kaddr = kmap_atomic(page);
> +		ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
> +				     PAGE_SIZE);
> +		kunmap_atomic(page_kaddr);
> +
> +		/* fallback to copy_from_user outside mmap_sem */
> +		if (unlikely(ret)) {
> +			*pagep = page;
> +			/* don't free the page */
> +			return -EFAULT;
> +		}
> +	} else {
> +		page = *pagep;
> +		*pagep = NULL;
> +	}
> +
> +	_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
> +	if (dst_vma->vm_flags & VM_WRITE)
> +		_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
> +
> +	ret = -EEXIST;
> +	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
> +	if (!pte_none(*dst_pte))
> +		goto out_release_uncharge_unlock;
> +
> +	__SetPageUptodate(page);
> +
> +	ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg,
> +				    false);
> +	if (ret)
> +		goto out_release_uncharge_unlock;
> +	ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);

You have to load radix tree without &ptl held.

Hillf

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support
  2016-08-04  9:37 ` [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support Hillf Danton
@ 2016-08-04 13:18   ` Mike Rapoport
  0 siblings, 0 replies; 3+ messages in thread
From: Mike Rapoport @ 2016-08-04 13:18 UTC (permalink / raw)
  To: Hillf Danton; +Cc: linux-mm

On Thu, Aug 04, 2016 at 05:37:16PM +0800, Hillf Danton wrote:
> > 
> > +int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
> > +			   pmd_t *dst_pmd,
> > +			   struct vm_area_struct *dst_vma,
> > +			   unsigned long dst_addr,
> > +			   unsigned long src_addr,
> > +			   struct page **pagep)
> > +{
> > +	struct inode *inode = file_inode(dst_vma->vm_file);
> > +	struct shmem_inode_info *info = SHMEM_I(inode);
> > +	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
> > +	struct address_space *mapping = inode->i_mapping;
> > +	gfp_t gfp = mapping_gfp_mask(mapping);
> > +	pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
> > +	struct mem_cgroup *memcg;
> > +	spinlock_t *ptl;
> > +	void *page_kaddr;
> > +	struct page *page;
> > +	pte_t _dst_pte, *dst_pte;
> > +	int ret;
> > +
> > +	if (!*pagep) {
> > +		ret = -ENOMEM;
> > +		if (shmem_acct_block(info->flags))
> > +			goto out;
> > +		if (sbinfo->max_blocks) {
> > +			if (percpu_counter_compare(&sbinfo->used_blocks,
> > +						   sbinfo->max_blocks) >= 0)
> > +				goto out_unacct_blocks;
> > +			percpu_counter_inc(&sbinfo->used_blocks);
> > +		}
> > +
> > +		page = shmem_alloc_page(gfp, info, pgoff);
> > +		if (!page)
> > +			goto out_dec_used_blocks;
> > +
> > +		page_kaddr = kmap_atomic(page);
> > +		ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
> > +				     PAGE_SIZE);
> > +		kunmap_atomic(page_kaddr);
> > +
> > +		/* fallback to copy_from_user outside mmap_sem */
> > +		if (unlikely(ret)) {
> > +			*pagep = page;
> > +			/* don't free the page */
> > +			return -EFAULT;
> > +		}
> > +	} else {
> > +		page = *pagep;
> > +		*pagep = NULL;
> > +	}
> > +
> > +	_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
> > +	if (dst_vma->vm_flags & VM_WRITE)
> > +		_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
> > +
> > +	ret = -EEXIST;
> > +	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
> > +	if (!pte_none(*dst_pte))
> > +		goto out_release_uncharge_unlock;
> > +
> > +	__SetPageUptodate(page);
> > +
> > +	ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg,
> > +				    false);
> > +	if (ret)
> > +		goto out_release_uncharge_unlock;
> > +	ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
> 
> You have to load radix tree without &ptl held.

Thanks, will fix.
 
> Hillf
> 

--
Sincerely yours,
Mike.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-08-04 13:19 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <004301d1ee32$fc583630$f508a290$@alibaba-inc.com>
2016-08-04  9:37 ` [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support Hillf Danton
2016-08-04 13:18   ` Mike Rapoport
2016-08-04  8:14 [PATCH 0/7] userfaultfd: add support for shared memory Mike Rapoport
2016-08-04  8:14 ` [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support Mike Rapoport

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).