All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <ak@muc.de>
To: torvalds@osdl.org
Cc: akpm@osdl.org, linux-kernel@vger.kernel.org, bos@serpentine.com
Subject: [PATCH] Fix mlockall for PROT_NONE mappings
Date: Thu, 9 Oct 2003 12:42:18 +0200	[thread overview]
Message-ID: <20031009104218.GA1935@averell> (raw)


The x86-64 ld.so always puts a PROT_NONE mapping into every 64bit process'
address space.

2a95791000-2a9586d000 ---p 00124000 03:01 788090                         /lib64/libc.so.6

This broke mlockall on x86-64 which ran into this mapping and always
returned an error and stopped early before mlocking everything.

This patch fixes mlockall to ignore errors for such mappings.

I added a new argument force==2 to get_user_pages that means to ignore
SIGBUS or unaccessible pages errors. MAY_* is still checked for like
with the old force ==1, it just doesn't error out now for SIGBUS
errors on handle_mm_fault. 

make_pages_present also has a new "tolerant" argument now that sets
force == 2. The patch sets it for mlockall and mmap (which didn't 
check its return arguments), but not for mlock(). Arguably it could
be not set for mmap on a VM_LOCKED vma.

-Andi

diff -u linux-test7-work/mm/memory.c-o linux-test7-work/mm/memory.c
--- linux-test7-work/mm/memory.c-o	2003-10-09 00:29:01.000000000 +0200
+++ linux-test7-work/mm/memory.c	2003-12-04 11:27:43.000000000 +0100
@@ -674,12 +674,15 @@
 
 static inline struct page *get_page_map(struct page *page)
 {
+	if (!page)
+		return 0;
 	if (!pfn_valid(page_to_pfn(page)))
 		return 0;
 	return page;
 }
 
 
+/* force == 2 means tolerate holes in the mapping */
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long start, int len, int write, int force,
 		struct page **pages, struct vm_area_struct **vmas)
@@ -763,6 +766,10 @@
 					tsk->maj_flt++;
 					break;
 				case VM_FAULT_SIGBUS:
+					if (force == 2) { 
+						map = NULL;
+						break;
+					}
 					return i ? i : -EFAULT;
 				case VM_FAULT_OOM:
 					return i ? i : -ENOMEM;
@@ -770,19 +777,22 @@
 					BUG();
 				}
 				spin_lock(&mm->page_table_lock);
+				if (!map) 
+					break; 
 			}
 			if (pages) {
 				pages[i] = get_page_map(map);
-				if (!pages[i]) {
+				if (!pages[i] && force != 2) {
 					spin_unlock(&mm->page_table_lock);
 					while (i--)
 						page_cache_release(pages[i]);
 					i = -EFAULT;
 					goto out;
+				} else { 
+					flush_dcache_page(pages[i]);
+					if (!PageReserved(pages[i]))
+						page_cache_get(pages[i]);
 				}
-				flush_dcache_page(pages[i]);
-				if (!PageReserved(pages[i]))
-					page_cache_get(pages[i]);
 			}
 			if (vmas)
 				vmas[i] = vma;
@@ -1655,7 +1665,7 @@
 	return pmd_offset(pgd, address);
 }
 
-int make_pages_present(unsigned long addr, unsigned long end)
+int make_pages_present(unsigned long addr, unsigned long end, int tolerant)
 {
 	int ret, len, write;
 	struct vm_area_struct * vma;
@@ -1668,7 +1678,7 @@
 		BUG();
 	len = (end+PAGE_SIZE-1)/PAGE_SIZE-addr/PAGE_SIZE;
 	ret = get_user_pages(current, current->mm, addr,
-			len, write, 0, NULL, NULL);
+			len, write, tolerant ? 2 : 0, NULL, NULL);
 	if (ret < 0)
 		return ret;
 	return ret == len ? 0 : -1;
diff -u linux-test7-work/mm/mlock.c-o linux-test7-work/mm/mlock.c
--- linux-test7-work/mm/mlock.c-o	2003-09-28 10:53:25.000000000 +0200
+++ linux-test7-work/mm/mlock.c	2003-12-04 10:58:22.000000000 +0100
@@ -10,7 +10,8 @@
 
 
 static int mlock_fixup(struct vm_area_struct * vma, 
-	unsigned long start, unsigned long end, unsigned int newflags)
+	unsigned long start, unsigned long end, unsigned int newflags,
+	int tolerant)
 {
 	struct mm_struct * mm = vma->vm_mm;
 	int pages;
@@ -43,7 +44,7 @@
 	pages = (end - start) >> PAGE_SHIFT;
 	if (newflags & VM_LOCKED) {
 		pages = -pages;
-		ret = make_pages_present(start, end);
+		ret = make_pages_present(start, end, tolerant);
 	}
 
 	vma->vm_mm->locked_vm -= pages;
@@ -79,13 +80,13 @@
 			newflags &= ~VM_LOCKED;
 
 		if (vma->vm_end >= end) {
-			error = mlock_fixup(vma, nstart, end, newflags);
+			error = mlock_fixup(vma, nstart, end, newflags, 0);
 			break;
 		}
 
 		tmp = vma->vm_end;
 		next = vma->vm_next;
-		error = mlock_fixup(vma, nstart, tmp, newflags);
+		error = mlock_fixup(vma, nstart, tmp, newflags, 0);
 		if (error)
 			break;
 		nstart = tmp;
@@ -154,7 +155,7 @@
 		newflags = vma->vm_flags | VM_LOCKED;
 		if (!(flags & MCL_CURRENT))
 			newflags &= ~VM_LOCKED;
-		error = mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags);
+		error = mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags, 1);
 		if (error)
 			break;
 	}
diff -u linux-test7-work/mm/mremap.c-o linux-test7-work/mm/mremap.c
--- linux-test7-work/mm/mremap.c-o	2003-09-11 04:12:42.000000000 +0200
+++ linux-test7-work/mm/mremap.c	2003-12-04 11:00:25.000000000 +0100
@@ -281,7 +281,7 @@
 			current->mm->locked_vm += new_len >> PAGE_SHIFT;
 			if (new_len > old_len)
 				make_pages_present(new_addr + old_len,
-						   new_addr + new_len);
+						   new_addr + new_len, 2);
 		}
 		return new_addr;
 	}
@@ -405,7 +405,7 @@
 			if (vma->vm_flags & VM_LOCKED) {
 				current->mm->locked_vm += pages;
 				make_pages_present(addr + old_len,
-						   addr + new_len);
+						   addr + new_len, 2);
 			}
 			ret = addr;
 			goto out;
diff -u linux-test7-work/mm/mmap.c-o linux-test7-work/mm/mmap.c
--- linux-test7-work/mm/mmap.c-o	2003-10-09 00:29:01.000000000 +0200
+++ linux-test7-work/mm/mmap.c	2003-12-04 11:02:17.000000000 +0100
@@ -655,7 +655,7 @@
 	mm->total_vm += len >> PAGE_SHIFT;
 	if (vm_flags & VM_LOCKED) {
 		mm->locked_vm += len >> PAGE_SHIFT;
-		make_pages_present(addr, addr + len);
+		make_pages_present(addr, addr + len, 2);
 	}
 	if (flags & MAP_POPULATE) {
 		up_write(&mm->mmap_sem);
@@ -910,7 +910,7 @@
 	if (!prev || expand_stack(prev, addr))
 		return NULL;
 	if (prev->vm_flags & VM_LOCKED) {
-		make_pages_present(addr, prev->vm_end);
+		make_pages_present(addr, prev->vm_end, 2);
 	}
 	return prev;
 }
@@ -971,7 +971,7 @@
 	if (expand_stack(vma, addr))
 		return NULL;
 	if (vma->vm_flags & VM_LOCKED) {
-		make_pages_present(addr, start);
+		make_pages_present(addr, start, 2);
 	}
 	return vma;
 }
@@ -1345,7 +1345,7 @@
 	mm->total_vm += len >> PAGE_SHIFT;
 	if (flags & VM_LOCKED) {
 		mm->locked_vm += len >> PAGE_SHIFT;
-		make_pages_present(addr, addr + len);
+		make_pages_present(addr, addr + len, 2);
 	}
 	return addr;
 }
diff -u linux-test7-work/include/linux/mm.h-o linux-test7-work/include/linux/mm.h
--- linux-test7-work/include/linux/mm.h-o	2003-10-09 00:29:00.000000000 +0200
+++ linux-test7-work/include/linux/mm.h	2003-12-04 11:00:25.000000000 +0100
@@ -433,7 +433,7 @@
 extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
 extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
 extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
-extern int make_pages_present(unsigned long addr, unsigned long end);
+extern int make_pages_present(unsigned long addr, unsigned long end, int tolerant);
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
 extern long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long nonblock);
 extern long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice);


             reply	other threads:[~2003-10-09 10:42 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-10-09 10:42 Andi Kleen [this message]
2003-10-09 10:49 ` [PATCH] Fix mlockall for PROT_NONE mappings Muli Ben-Yehuda
2003-10-09 11:22   ` Andi Kleen
2003-10-09 11:24     ` Muli Ben-Yehuda
2003-10-09 11:31       ` Andi Kleen
2003-10-09 14:44 ` Linus Torvalds
2003-10-09 14:52   ` Andi Kleen
2003-10-09 14:56     ` Linus Torvalds
2003-10-09 15:12       ` Andi Kleen
2003-10-09 15:17         ` Linus Torvalds
2003-10-09 15:33           ` Andi Kleen
2003-10-09 15:40             ` Linus Torvalds
2003-10-09 16:34               ` Andi Kleen
2003-10-09 17:28                 ` Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20031009104218.GA1935@averell \
    --to=ak@muc.de \
    --cc=akpm@osdl.org \
    --cc=bos@serpentine.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.