* [PATCH] [RFC] proc: Add mmap callback for /proc/<pid>/mem
@ 2024-09-13 17:40 Haider Miraj
2024-09-15 20:45 ` kernel test robot
2024-09-15 20:45 ` kernel test robot
0 siblings, 2 replies; 3+ messages in thread
From: Haider Miraj @ 2024-09-13 17:40 UTC (permalink / raw)
To: Andrew Morton; +Cc: xe-linux-external, linux-kernel, linux-fsdevel
This patch introduces memory mapping (mmap) support for the /proc/<pid>/mem
interface. The new functionality allows users to map the memory of a
process into their address space reusing the same pages
The idea is to mmap another process's memory by first pinning the pages in
memory and then using `remap_pfn_range` to map them as device memory, reusing
the same pages. A list of pinned pages is maintained and released back on the
close call. This design has certain limitations.
I am seeking comments and advice on the following:
- Given that read access to `/proc/<pid>/mem` is already allowed for
privileged users, are there specific reasons or concerns that have prevented
the implementation of `mmap` for this interface?
- Is there a way to insert anonymous pages into a file-backed VMA so that it
honors reverse mapping, eliminating the need to keep track of pinned pages?
- I plan to implement a page fault handler as well.
I am looking for feedback on how to improve this implementation and what
additional considerations are necessary for it to be accepted by the community.
Cc: xe-linux-external@cisco.com
Signed-off-by: Haider Miraj <hmiraj@cisco.com>
---
fs/proc/base.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 129 insertions(+)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 72a1acd03675..405de47d0c1c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -117,6 +117,17 @@
static u8 nlink_tid __ro_after_init;
static u8 nlink_tgid __ro_after_init;
+struct vma_info {
+ struct list_head page_list_head;
+ uintptr_t vma_start_addr;
+ uintptr_t vma_end_addr;
+};
+
+struct page_list_item {
+ struct list_head list;
+ struct page *page;
+};
+
struct pid_entry {
const char *name;
unsigned int len;
@@ -926,12 +937,130 @@ static int mem_release(struct inode *inode, struct file *file)
return 0;
}
+static void mem_vma_close(struct vm_area_struct *vma)
+{
+ struct vma_info *info;
+ struct page_list_item *item, *tmp;
+
+ info = vma->vm_private_data;
+
+ if (info) {
+ /* Avoid cleanup if we are being split, instead print warning */
+ if (info->vma_start_addr == vma->vm_start &&
+ info->vma_end_addr == vma->vm_end) {
+ /* Iterate over the list and free each item and call put_page */
+ list_for_each_entry_safe(item, tmp,
+ &info->page_list_head, list) {
+ list_del(&item->list);
+ put_page(item->page);
+ kfree(item);
+ }
+
+ kfree(info);
+ vma->vm_private_data = NULL;
+ } else {
+ pr_warn("%s: VMA has been split, operation not supported\n", __func__);
+ }
+ }
+}
+
+static const struct vm_operations_struct mem_vm_ops = {
+ .close = mem_vma_close,
+};
+
+/**
+ * mem_mmap - Memory mapping function
+ *
+ * This function implements mmap call for /proc/<pid>/mem.
+ *
+ * Assumptions and Limitations:
+ * - This function does not handle reverse mapping, which is required for swapping.
+ * - The VMA is not expected to be split with an unmap call.
+ */
+static int mem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ uintptr_t addr, target_start_addr, target_end_addr;
+ struct page_list_item *item;
+ struct page *page, *zero_page;
+ unsigned long zero_page_pfn;
+ struct vma_info *info;
+ long pinned;
+ int ret;
+
+ /* Retrieve mm of the target process*/
+ struct mm_struct *mm = (struct mm_struct *)file->private_data;
+ size_t size = vma->vm_end - vma->vm_start;
+ uintptr_t start_addr = vma->vm_start;
+
+ target_start_addr = vma->vm_pgoff << PAGE_SHIFT; /* Multiply by PAGE_SIZE */
+ target_end_addr = target_start_addr + size;
+
+ if (!mm)
+ return -EINVAL;
+
+ info = kmalloc(sizeof(struct vma_info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&info->page_list_head);
+ info->vma_start_addr = vma->vm_start;
+ info->vma_end_addr = vma->vm_end;
+
+ vma->vm_private_data = info;
+ vma->vm_ops = &mem_vm_ops;
+
+ zero_page = ZERO_PAGE(0);
+ zero_page_pfn = page_to_pfn(zero_page);
+
+ /* Acquire the mmap_lock before pinning the page (get_user_pages_remote) */
+ down_read(&mm->mmap_lock);
+
+ for (addr = target_start_addr; addr < target_end_addr; addr += PAGE_SIZE) {
+ unsigned long pfn;
+
+ /* Pin the user page */
+ pinned = get_user_pages_remote(mm, addr, 1, FOLL_GET | FOLL_NOFAULT,
+ &page, NULL, NULL);
+ /* Page is not resident (FOLL_NOFAULT), we will skip to the next address */
+ if (pinned <= 0) {
+ ret = remap_pfn_range(vma, start_addr, zero_page_pfn, PAGE_SIZE,
+ vma->vm_page_prot);
+ if (ret)
+ goto err_unlock;
+ start_addr += PAGE_SIZE;
+ continue;
+ }
+
+ /* We need to keep track of pages which are pinned */
+ item = kmalloc(sizeof(struct page_list_item), GFP_KERNEL);
+ if (!item) {
+ kfree(info);
+ return -ENOMEM;
+ }
+
+ item->page = page;
+ list_add(&item->list, &info->page_list_head);
+ pfn = page_to_pfn(page);
+
+ /* Remap the page frame under current vma */
+ ret = remap_pfn_range(vma, start_addr, pfn, PAGE_SIZE,
+ vma->vm_page_prot);
+ if (ret)
+ kfree(item);
+
+ start_addr += PAGE_SIZE;
+ }
+err_unlock:
+ up_read(&mm->mmap_lock);
+ return 0;
+}
+
static const struct file_operations proc_mem_operations = {
.llseek = mem_lseek,
.read = mem_read,
.write = mem_write,
.open = mem_open,
.release = mem_release,
+ .mmap = mem_mmap,
};
static int environ_open(struct inode *inode, struct file *file)
--
2.35.6
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] [RFC] proc: Add mmap callback for /proc/<pid>/mem
2024-09-13 17:40 [PATCH] [RFC] proc: Add mmap callback for /proc/<pid>/mem Haider Miraj
@ 2024-09-15 20:45 ` kernel test robot
2024-09-15 20:45 ` kernel test robot
1 sibling, 0 replies; 3+ messages in thread
From: kernel test robot @ 2024-09-15 20:45 UTC (permalink / raw)
To: Haider Miraj; +Cc: oe-kbuild-all
Hi Haider,
[This is a private test report for your RFC patch.]
kernel test robot noticed the following build errors:
[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on linus/master v6.11]
[cannot apply to next-20240913]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Haider-Miraj/proc-Add-mmap-callback-for-proc-pid-mem/20240914-014144
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20240913174003.1786581-1-hmiraj%40cisco.com
patch subject: [PATCH] [RFC] proc: Add mmap callback for /proc/<pid>/mem
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20240916/202409160444.PG4iJNtn-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240916/202409160444.PG4iJNtn-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409160444.PG4iJNtn-lkp@intel.com/
All errors (new ones prefixed by >>):
fs/proc/base.c: In function 'mem_mmap':
>> fs/proc/base.c:1021:26: error: too many arguments to function 'get_user_pages_remote'
1021 | pinned = get_user_pages_remote(mm, addr, 1, FOLL_GET | FOLL_NOFAULT,
| ^~~~~~~~~~~~~~~~~~~~~
In file included from fs/proc/base.c:67:
include/linux/mm.h:2462:6: note: declared here
2462 | long get_user_pages_remote(struct mm_struct *mm,
| ^~~~~~~~~~~~~~~~~~~~~
vim +/get_user_pages_remote +1021 fs/proc/base.c
970
971 /**
972 * mem_mmap - Memory mapping function
973 *
974 * This function implements mmap call for /proc/<pid>/mem.
975 *
976 * Assumptions and Limitations:
977 * - This function does not handle reverse mapping, which is required for swapping.
978 * - The VMA is not expected to be split with an unmap call.
979 */
980 static int mem_mmap(struct file *file, struct vm_area_struct *vma)
981 {
982 uintptr_t addr, target_start_addr, target_end_addr;
983 struct page_list_item *item;
984 struct page *page, *zero_page;
985 unsigned long zero_page_pfn;
986 struct vma_info *info;
987 long pinned;
988 int ret;
989
990 /* Retrieve mm of the target process*/
991 struct mm_struct *mm = (struct mm_struct *)file->private_data;
992 size_t size = vma->vm_end - vma->vm_start;
993 uintptr_t start_addr = vma->vm_start;
994
995 target_start_addr = vma->vm_pgoff << PAGE_SHIFT; /* Multiply by PAGE_SIZE */
996 target_end_addr = target_start_addr + size;
997
998 if (!mm)
999 return -EINVAL;
1000
1001 info = kmalloc(sizeof(struct vma_info), GFP_KERNEL);
1002 if (!info)
1003 return -ENOMEM;
1004 INIT_LIST_HEAD(&info->page_list_head);
1005 info->vma_start_addr = vma->vm_start;
1006 info->vma_end_addr = vma->vm_end;
1007
1008 vma->vm_private_data = info;
1009 vma->vm_ops = &mem_vm_ops;
1010
1011 zero_page = ZERO_PAGE(0);
1012 zero_page_pfn = page_to_pfn(zero_page);
1013
1014 /* Acquire the mmap_lock before pinning the page (get_user_pages_remote) */
1015 down_read(&mm->mmap_lock);
1016
1017 for (addr = target_start_addr; addr < target_end_addr; addr += PAGE_SIZE) {
1018 unsigned long pfn;
1019
1020 /* Pin the user page */
> 1021 pinned = get_user_pages_remote(mm, addr, 1, FOLL_GET | FOLL_NOFAULT,
1022 &page, NULL, NULL);
1023 /* Page is not resident (FOLL_NOFAULT), we will skip to the next address */
1024 if (pinned <= 0) {
1025 ret = remap_pfn_range(vma, start_addr, zero_page_pfn, PAGE_SIZE,
1026 vma->vm_page_prot);
1027 if (ret)
1028 goto err_unlock;
1029 start_addr += PAGE_SIZE;
1030 continue;
1031 }
1032
1033 /* We need to keep track of pages which are pinned */
1034 item = kmalloc(sizeof(struct page_list_item), GFP_KERNEL);
1035 if (!item) {
1036 kfree(info);
1037 return -ENOMEM;
1038 }
1039
1040 item->page = page;
1041 list_add(&item->list, &info->page_list_head);
1042 pfn = page_to_pfn(page);
1043
1044 /* Remap the page frame under current vma */
1045 ret = remap_pfn_range(vma, start_addr, pfn, PAGE_SIZE,
1046 vma->vm_page_prot);
1047 if (ret)
1048 kfree(item);
1049
1050 start_addr += PAGE_SIZE;
1051 }
1052 err_unlock:
1053 up_read(&mm->mmap_lock);
1054 return 0;
1055 }
1056
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] [RFC] proc: Add mmap callback for /proc/<pid>/mem
2024-09-13 17:40 [PATCH] [RFC] proc: Add mmap callback for /proc/<pid>/mem Haider Miraj
2024-09-15 20:45 ` kernel test robot
@ 2024-09-15 20:45 ` kernel test robot
1 sibling, 0 replies; 3+ messages in thread
From: kernel test robot @ 2024-09-15 20:45 UTC (permalink / raw)
To: Haider Miraj; +Cc: llvm, oe-kbuild-all
Hi Haider,
[This is a private test report for your RFC patch.]
kernel test robot noticed the following build errors:
[auto build test ERROR on akpm-mm/mm-everything]
[also build test ERROR on linus/master v6.11]
[cannot apply to next-20240913]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Haider-Miraj/proc-Add-mmap-callback-for-proc-pid-mem/20240914-014144
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link: https://lore.kernel.org/r/20240913174003.1786581-1-hmiraj%40cisco.com
patch subject: [PATCH] [RFC] proc: Add mmap callback for /proc/<pid>/mem
config: x86_64-allnoconfig (https://download.01.org/0day-ci/archive/20240916/202409160450.N6EXHAts-lkp@intel.com/config)
compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240916/202409160450.N6EXHAts-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409160450.N6EXHAts-lkp@intel.com/
All errors (new ones prefixed by >>):
>> fs/proc/base.c:1022:20: error: too many arguments to function call, expected 6, have 7
1021 | pinned = get_user_pages_remote(mm, addr, 1, FOLL_GET | FOLL_NOFAULT,
| ~~~~~~~~~~~~~~~~~~~~~
1022 | &page, NULL, NULL);
| ^~~~
include/linux/stddef.h:8:14: note: expanded from macro 'NULL'
8 | #define NULL ((void *)0)
| ^~~~~~~~~~~
include/linux/mm.h:2462:6: note: 'get_user_pages_remote' declared here
2462 | long get_user_pages_remote(struct mm_struct *mm,
| ^ ~~~~~~~~~~~~~~~~~~~~~
2463 | unsigned long start, unsigned long nr_pages,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2464 | unsigned int gup_flags, struct page **pages,
| ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2465 | int *locked);
| ~~~~~~~~~~~
1 error generated.
vim +1022 fs/proc/base.c
970
971 /**
972 * mem_mmap - Memory mapping function
973 *
974 * This function implements mmap call for /proc/<pid>/mem.
975 *
976 * Assumptions and Limitations:
977 * - This function does not handle reverse mapping, which is required for swapping.
978 * - The VMA is not expected to be split with an unmap call.
979 */
980 static int mem_mmap(struct file *file, struct vm_area_struct *vma)
981 {
982 uintptr_t addr, target_start_addr, target_end_addr;
983 struct page_list_item *item;
984 struct page *page, *zero_page;
985 unsigned long zero_page_pfn;
986 struct vma_info *info;
987 long pinned;
988 int ret;
989
990 /* Retrieve mm of the target process*/
991 struct mm_struct *mm = (struct mm_struct *)file->private_data;
992 size_t size = vma->vm_end - vma->vm_start;
993 uintptr_t start_addr = vma->vm_start;
994
995 target_start_addr = vma->vm_pgoff << PAGE_SHIFT; /* Multiply by PAGE_SIZE */
996 target_end_addr = target_start_addr + size;
997
998 if (!mm)
999 return -EINVAL;
1000
1001 info = kmalloc(sizeof(struct vma_info), GFP_KERNEL);
1002 if (!info)
1003 return -ENOMEM;
1004 INIT_LIST_HEAD(&info->page_list_head);
1005 info->vma_start_addr = vma->vm_start;
1006 info->vma_end_addr = vma->vm_end;
1007
1008 vma->vm_private_data = info;
1009 vma->vm_ops = &mem_vm_ops;
1010
1011 zero_page = ZERO_PAGE(0);
1012 zero_page_pfn = page_to_pfn(zero_page);
1013
1014 /* Acquire the mmap_lock before pinning the page (get_user_pages_remote) */
1015 down_read(&mm->mmap_lock);
1016
1017 for (addr = target_start_addr; addr < target_end_addr; addr += PAGE_SIZE) {
1018 unsigned long pfn;
1019
1020 /* Pin the user page */
1021 pinned = get_user_pages_remote(mm, addr, 1, FOLL_GET | FOLL_NOFAULT,
> 1022 &page, NULL, NULL);
1023 /* Page is not resident (FOLL_NOFAULT), we will skip to the next address */
1024 if (pinned <= 0) {
1025 ret = remap_pfn_range(vma, start_addr, zero_page_pfn, PAGE_SIZE,
1026 vma->vm_page_prot);
1027 if (ret)
1028 goto err_unlock;
1029 start_addr += PAGE_SIZE;
1030 continue;
1031 }
1032
1033 /* We need to keep track of pages which are pinned */
1034 item = kmalloc(sizeof(struct page_list_item), GFP_KERNEL);
1035 if (!item) {
1036 kfree(info);
1037 return -ENOMEM;
1038 }
1039
1040 item->page = page;
1041 list_add(&item->list, &info->page_list_head);
1042 pfn = page_to_pfn(page);
1043
1044 /* Remap the page frame under current vma */
1045 ret = remap_pfn_range(vma, start_addr, pfn, PAGE_SIZE,
1046 vma->vm_page_prot);
1047 if (ret)
1048 kfree(item);
1049
1050 start_addr += PAGE_SIZE;
1051 }
1052 err_unlock:
1053 up_read(&mm->mmap_lock);
1054 return 0;
1055 }
1056
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-09-15 20:45 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-13 17:40 [PATCH] [RFC] proc: Add mmap callback for /proc/<pid>/mem Haider Miraj
2024-09-15 20:45 ` kernel test robot
2024-09-15 20:45 ` kernel test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.