* [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support
2016-08-04 8:14 [PATCH 0/7] userfaultfd: add support for shared memory Mike Rapoport
@ 2016-08-04 8:14 ` Mike Rapoport
0 siblings, 0 replies; 3+ messages in thread
From: Mike Rapoport @ 2016-08-04 8:14 UTC (permalink / raw)
To: Andrea Arcangeli
Cc: Hugh Dickins, Pavel Emelyanov, linux-mm, linux-kernel,
Mike Rapoport
shmem_mcopy_atomic_pte is the low level routine that implements
the userfaultfd UFFDIO_COPY command. It is based on the existing
mcopy_atomic_pte routine with modifications for shared memory pages.
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
---
include/linux/shmem_fs.h | 11 +++++
mm/shmem.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 120 insertions(+)
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d4780c..8dcbdfd 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -83,4 +83,15 @@ static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
#endif
+#ifdef CONFIG_SHMEM
+extern int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ struct page **pagep);
+#else
+#define shmem_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
+ src_addr, pagep) ({ BUG(); 0; })
+#endif
+
#endif
diff --git a/mm/shmem.c b/mm/shmem.c
index a361449..fcf560c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -69,6 +69,7 @@ static struct vfsmount *shm_mnt;
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <uapi/linux/memfd.h>
+#include <linux/rmap.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -1548,6 +1549,114 @@ bool shmem_mapping(struct address_space *mapping)
return mapping->host->i_sb->s_op == &shmem_ops;
}
+int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
+ pmd_t *dst_pmd,
+ struct vm_area_struct *dst_vma,
+ unsigned long dst_addr,
+ unsigned long src_addr,
+ struct page **pagep)
+{
+ struct inode *inode = file_inode(dst_vma->vm_file);
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ struct address_space *mapping = inode->i_mapping;
+ gfp_t gfp = mapping_gfp_mask(mapping);
+ pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
+ struct mem_cgroup *memcg;
+ spinlock_t *ptl;
+ void *page_kaddr;
+ struct page *page;
+ pte_t _dst_pte, *dst_pte;
+ int ret;
+
+ if (!*pagep) {
+ ret = -ENOMEM;
+ if (shmem_acct_block(info->flags))
+ goto out;
+ if (sbinfo->max_blocks) {
+ if (percpu_counter_compare(&sbinfo->used_blocks,
+ sbinfo->max_blocks) >= 0)
+ goto out_unacct_blocks;
+ percpu_counter_inc(&sbinfo->used_blocks);
+ }
+
+ page = shmem_alloc_page(gfp, info, pgoff);
+ if (!page)
+ goto out_dec_used_blocks;
+
+ page_kaddr = kmap_atomic(page);
+ ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
+ PAGE_SIZE);
+ kunmap_atomic(page_kaddr);
+
+ /* fallback to copy_from_user outside mmap_sem */
+ if (unlikely(ret)) {
+ *pagep = page;
+ /* don't free the page */
+ return -EFAULT;
+ }
+ } else {
+ page = *pagep;
+ *pagep = NULL;
+ }
+
+ _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
+ if (dst_vma->vm_flags & VM_WRITE)
+ _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
+
+ ret = -EEXIST;
+ dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+ if (!pte_none(*dst_pte))
+ goto out_release_uncharge_unlock;
+
+ __SetPageUptodate(page);
+
+ ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg,
+ false);
+ if (ret)
+ goto out_release_uncharge_unlock;
+ ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
+ if (!ret) {
+ ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL);
+ radix_tree_preload_end();
+ }
+ if (ret) {
+ mem_cgroup_cancel_charge(page, memcg, false);
+ goto out_release_uncharge_unlock;
+ }
+
+ mem_cgroup_commit_charge(page, memcg, false, false);
+ lru_cache_add_anon(page);
+
+ spin_lock(&info->lock);
+ info->alloced++;
+ inode->i_blocks += BLOCKS_PER_PAGE;
+ shmem_recalc_inode(inode);
+ spin_unlock(&info->lock);
+
+ inc_mm_counter(dst_mm, mm_counter_file(page));
+ page_add_file_rmap(page);
+ set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(dst_vma, dst_addr, dst_pte);
+ unlock_page(page);
+ pte_unmap_unlock(dst_pte, ptl);
+ ret = 0;
+out:
+ return ret;
+out_release_uncharge_unlock:
+ pte_unmap_unlock(dst_pte, ptl);
+ mem_cgroup_cancel_charge(page, memcg, false);
+ put_page(page);
+out_dec_used_blocks:
+ if (sbinfo->max_blocks)
+ percpu_counter_add(&sbinfo->used_blocks, -1);
+out_unacct_blocks:
+ shmem_unacct_blocks(info->flags, 1);
+ goto out;
+}
+
#ifdef CONFIG_TMPFS
static const struct inode_operations shmem_symlink_inode_operations;
static const struct inode_operations shmem_short_symlink_operations;
--
1.9.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support
[not found] <004301d1ee32$fc583630$f508a290$@alibaba-inc.com>
@ 2016-08-04 9:37 ` Hillf Danton
2016-08-04 13:18 ` Mike Rapoport
0 siblings, 1 reply; 3+ messages in thread
From: Hillf Danton @ 2016-08-04 9:37 UTC (permalink / raw)
To: 'Mike Rapoport'; +Cc: linux-mm
>
> +int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
> + pmd_t *dst_pmd,
> + struct vm_area_struct *dst_vma,
> + unsigned long dst_addr,
> + unsigned long src_addr,
> + struct page **pagep)
> +{
> + struct inode *inode = file_inode(dst_vma->vm_file);
> + struct shmem_inode_info *info = SHMEM_I(inode);
> + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
> + struct address_space *mapping = inode->i_mapping;
> + gfp_t gfp = mapping_gfp_mask(mapping);
> + pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
> + struct mem_cgroup *memcg;
> + spinlock_t *ptl;
> + void *page_kaddr;
> + struct page *page;
> + pte_t _dst_pte, *dst_pte;
> + int ret;
> +
> + if (!*pagep) {
> + ret = -ENOMEM;
> + if (shmem_acct_block(info->flags))
> + goto out;
> + if (sbinfo->max_blocks) {
> + if (percpu_counter_compare(&sbinfo->used_blocks,
> + sbinfo->max_blocks) >= 0)
> + goto out_unacct_blocks;
> + percpu_counter_inc(&sbinfo->used_blocks);
> + }
> +
> + page = shmem_alloc_page(gfp, info, pgoff);
> + if (!page)
> + goto out_dec_used_blocks;
> +
> + page_kaddr = kmap_atomic(page);
> + ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
> + PAGE_SIZE);
> + kunmap_atomic(page_kaddr);
> +
> + /* fallback to copy_from_user outside mmap_sem */
> + if (unlikely(ret)) {
> + *pagep = page;
> + /* don't free the page */
> + return -EFAULT;
> + }
> + } else {
> + page = *pagep;
> + *pagep = NULL;
> + }
> +
> + _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
> + if (dst_vma->vm_flags & VM_WRITE)
> + _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
> +
> + ret = -EEXIST;
> + dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
> + if (!pte_none(*dst_pte))
> + goto out_release_uncharge_unlock;
> +
> + __SetPageUptodate(page);
> +
> + ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg,
> + false);
> + if (ret)
> + goto out_release_uncharge_unlock;
> + ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
You have to load radix tree without &ptl held.
Hillf
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support
2016-08-04 9:37 ` [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support Hillf Danton
@ 2016-08-04 13:18 ` Mike Rapoport
0 siblings, 0 replies; 3+ messages in thread
From: Mike Rapoport @ 2016-08-04 13:18 UTC (permalink / raw)
To: Hillf Danton; +Cc: linux-mm
On Thu, Aug 04, 2016 at 05:37:16PM +0800, Hillf Danton wrote:
> >
> > +int shmem_mcopy_atomic_pte(struct mm_struct *dst_mm,
> > + pmd_t *dst_pmd,
> > + struct vm_area_struct *dst_vma,
> > + unsigned long dst_addr,
> > + unsigned long src_addr,
> > + struct page **pagep)
> > +{
> > + struct inode *inode = file_inode(dst_vma->vm_file);
> > + struct shmem_inode_info *info = SHMEM_I(inode);
> > + struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
> > + struct address_space *mapping = inode->i_mapping;
> > + gfp_t gfp = mapping_gfp_mask(mapping);
> > + pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
> > + struct mem_cgroup *memcg;
> > + spinlock_t *ptl;
> > + void *page_kaddr;
> > + struct page *page;
> > + pte_t _dst_pte, *dst_pte;
> > + int ret;
> > +
> > + if (!*pagep) {
> > + ret = -ENOMEM;
> > + if (shmem_acct_block(info->flags))
> > + goto out;
> > + if (sbinfo->max_blocks) {
> > + if (percpu_counter_compare(&sbinfo->used_blocks,
> > + sbinfo->max_blocks) >= 0)
> > + goto out_unacct_blocks;
> > + percpu_counter_inc(&sbinfo->used_blocks);
> > + }
> > +
> > + page = shmem_alloc_page(gfp, info, pgoff);
> > + if (!page)
> > + goto out_dec_used_blocks;
> > +
> > + page_kaddr = kmap_atomic(page);
> > + ret = copy_from_user(page_kaddr, (const void __user *)src_addr,
> > + PAGE_SIZE);
> > + kunmap_atomic(page_kaddr);
> > +
> > + /* fallback to copy_from_user outside mmap_sem */
> > + if (unlikely(ret)) {
> > + *pagep = page;
> > + /* don't free the page */
> > + return -EFAULT;
> > + }
> > + } else {
> > + page = *pagep;
> > + *pagep = NULL;
> > + }
> > +
> > + _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
> > + if (dst_vma->vm_flags & VM_WRITE)
> > + _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
> > +
> > + ret = -EEXIST;
> > + dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
> > + if (!pte_none(*dst_pte))
> > + goto out_release_uncharge_unlock;
> > +
> > + __SetPageUptodate(page);
> > +
> > + ret = mem_cgroup_try_charge(page, dst_mm, gfp, &memcg,
> > + false);
> > + if (ret)
> > + goto out_release_uncharge_unlock;
> > + ret = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
>
> You have to load radix tree without &ptl held.
Thanks, will fix.
> Hillf
>
--
Sincerely yours,
Mike.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2016-08-04 13:19 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <004301d1ee32$fc583630$f508a290$@alibaba-inc.com>
2016-08-04 9:37 ` [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support Hillf Danton
2016-08-04 13:18 ` Mike Rapoport
2016-08-04 8:14 [PATCH 0/7] userfaultfd: add support for shared memory Mike Rapoport
2016-08-04 8:14 ` [PATCH 2/7] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support Mike Rapoport
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).