linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] mm: vmalloc: simplify vread/vwrite to use existing mappings
@ 2017-06-07 18:20 Ard Biesheuvel
  2017-06-07 18:22 ` Ard Biesheuvel
  2017-06-08 16:06 ` Russell King - ARM Linux
  0 siblings, 2 replies; 4+ messages in thread
From: Ard Biesheuvel @ 2017-06-07 18:20 UTC (permalink / raw)
  To: linux-mm
  Cc: akpm, mhocko, zhongjiang, labbott, mark.rutland, linux-arm-kernel,
	Ard Biesheuvel

The vread() and vwrite() routines contain elaborate plumbing to access
the contents of vmalloc/vmap regions safely. According to the comments,
this removes the need for locking, but given that both these routines
execute with the vmap_area_lock spinlock held anyway, this is not much
of an advantage, and so the only safety these routines provide is the
assurance that only valid mappings are dereferenced.

The current safe path iterates over each mapping page by page, and
kmap()'s each one individually, which is expensive and unnecessary.
Instead, let's use kern_addr_valid() to establish on a per-VMA basis
whether we may safely derefence them, and do so via its mapping in
the VMALLOC region. This can be done safely due to the fact that we
are holding the vmap_area_lock spinlock.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 mm/vmalloc.c | 103 ++------------------
 1 file changed, 10 insertions(+), 93 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 34a1c3e46ed7..982d29511f92 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1983,87 +1983,6 @@ void *vmalloc_32_user(unsigned long size)
 }
 EXPORT_SYMBOL(vmalloc_32_user);
 
-/*
- * small helper routine , copy contents to buf from addr.
- * If the page is not present, fill zero.
- */
-
-static int aligned_vread(char *buf, char *addr, unsigned long count)
-{
-	struct page *p;
-	int copied = 0;
-
-	while (count) {
-		unsigned long offset, length;
-
-		offset = offset_in_page(addr);
-		length = PAGE_SIZE - offset;
-		if (length > count)
-			length = count;
-		p = vmalloc_to_page(addr);
-		/*
-		 * To do safe access to this _mapped_ area, we need
-		 * lock. But adding lock here means that we need to add
-		 * overhead of vmalloc()/vfree() calles for this _debug_
-		 * interface, rarely used. Instead of that, we'll use
-		 * kmap() and get small overhead in this access function.
-		 */
-		if (p) {
-			/*
-			 * we can expect USER0 is not used (see vread/vwrite's
-			 * function description)
-			 */
-			void *map = kmap_atomic(p);
-			memcpy(buf, map + offset, length);
-			kunmap_atomic(map);
-		} else
-			memset(buf, 0, length);
-
-		addr += length;
-		buf += length;
-		copied += length;
-		count -= length;
-	}
-	return copied;
-}
-
-static int aligned_vwrite(char *buf, char *addr, unsigned long count)
-{
-	struct page *p;
-	int copied = 0;
-
-	while (count) {
-		unsigned long offset, length;
-
-		offset = offset_in_page(addr);
-		length = PAGE_SIZE - offset;
-		if (length > count)
-			length = count;
-		p = vmalloc_to_page(addr);
-		/*
-		 * To do safe access to this _mapped_ area, we need
-		 * lock. But adding lock here means that we need to add
-		 * overhead of vmalloc()/vfree() calles for this _debug_
-		 * interface, rarely used. Instead of that, we'll use
-		 * kmap() and get small overhead in this access function.
-		 */
-		if (p) {
-			/*
-			 * we can expect USER0 is not used (see vread/vwrite's
-			 * function description)
-			 */
-			void *map = kmap_atomic(p);
-			memcpy(map + offset, buf, length);
-			kunmap_atomic(map);
-		}
-		addr += length;
-		buf += length;
-		copied += length;
-		count -= length;
-	}
-	return copied;
-}
-
 /**
  *	vread() -  read vmalloc area in a safe way.
  *	@buf:		buffer for reading data
@@ -2083,10 +2002,8 @@ static int aligned_vwrite(char *buf, char *addr, unsigned long count)
  *	If [addr...addr+count) doesn't includes any intersects with alive
  *	vm_struct area, returns 0. @buf should be kernel's buffer.
  *
- *	Note: In usual ops, vread() is never necessary because the caller
- *	should know vmalloc() area is valid and can use memcpy().
- *	This is for routines which have to access vmalloc area without
- *	any informaion, as /dev/kmem.
+ *	Note: This routine executes with the vmap_area_lock spinlock held,
+ *	which means it can safely access mappings at their virtual address.
  *
  */
 
@@ -2125,8 +2042,9 @@ long vread(char *buf, char *addr, unsigned long count)
 		n = vaddr + get_vm_area_size(vm) - addr;
 		if (n > count)
 			n = count;
-		if (!(vm->flags & VM_IOREMAP))
-			aligned_vread(buf, addr, n);
+		if (!(vm->flags & VM_IOREMAP) &&
+		    kern_addr_valid((unsigned long)addr))
+			memcpy(buf, addr, n);
 		else /* IOREMAP area is treated as memory hole */
 			memset(buf, 0, n);
 		buf += n;
@@ -2165,10 +2083,8 @@ long vread(char *buf, char *addr, unsigned long count)
  *	If [addr...addr+count) doesn't includes any intersects with alive
  *	vm_struct area, returns 0. @buf should be kernel's buffer.
  *
- *	Note: In usual ops, vwrite() is never necessary because the caller
- *	should know vmalloc() area is valid and can use memcpy().
- *	This is for routines which have to access vmalloc area without
- *	any informaion, as /dev/kmem.
+ *	Note: This routine executes with the vmap_area_lock spinlock held,
+ *	which means it can safely access mappings at their virtual address.
  */
 
 long vwrite(char *buf, char *addr, unsigned long count)
@@ -2206,8 +2122,9 @@ long vwrite(char *buf, char *addr, unsigned long count)
 		n = vaddr + get_vm_area_size(vm) - addr;
 		if (n > count)
 			n = count;
-		if (!(vm->flags & VM_IOREMAP)) {
-			aligned_vwrite(buf, addr, n);
+		if (!(vm->flags & VM_IOREMAP) &&
+		    kern_addr_valid((unsigned long)addr)) {
+			memcpy(addr, buf, n);
 			copied++;
 		}
 		buf += n;
-- 
2.9.3

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: vmalloc: simplify vread/vwrite to use existing mappings
  2017-06-07 18:20 [PATCH] mm: vmalloc: simplify vread/vwrite to use existing mappings Ard Biesheuvel
@ 2017-06-07 18:22 ` Ard Biesheuvel
  2017-06-08 16:06 ` Russell King - ARM Linux
  1 sibling, 0 replies; 4+ messages in thread
From: Ard Biesheuvel @ 2017-06-07 18:22 UTC (permalink / raw)
  To: linux-mm@kvack.org
  Cc: Andrew Morton, Michal Hocko, Zhong Jiang, Laura Abbott,
	Mark Rutland, linux-arm-kernel@lists.infradead.org,
	Ard Biesheuvel

On 7 June 2017 at 18:20, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> The vread() and vwrite() routines contain elaborate plumbing to access
> the contents of vmalloc/vmap regions safely. According to the comments,
> this removes the need for locking, but given that both these routines
> execute with the vmap_area_lock spinlock held anyway, this is not much
> of an advantage, and so the only safety these routines provide is the
> assurance that only valid mappings are dereferenced.
>
> The current safe path iterates over each mapping page by page, and
> kmap()'s each one individually, which is expensive and unnecessary.
> Instead, let's use kern_addr_valid() to establish on a per-VMA basis
> whether we may safely derefence them, and do so via its mapping in
> the VMALLOC region. This can be done safely due to the fact that we
> are holding the vmap_area_lock spinlock.
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---

Failed to add:
This patch should be an improvement by itself, but it also works around
an issue on arm64, where this code gets confused by the presence of huge
mappings in the VMALLOC region, e.g., when accessing /proc/kcore.


>  mm/vmalloc.c | 103 ++------------------
>  1 file changed, 10 insertions(+), 93 deletions(-)
>
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 34a1c3e46ed7..982d29511f92 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1983,87 +1983,6 @@ void *vmalloc_32_user(unsigned long size)
>  }
>  EXPORT_SYMBOL(vmalloc_32_user);
>
> -/*
> - * small helper routine , copy contents to buf from addr.
> - * If the page is not present, fill zero.
> - */
> -
> -static int aligned_vread(char *buf, char *addr, unsigned long count)
> -{
> -       struct page *p;
> -       int copied = 0;
> -
> -       while (count) {
> -               unsigned long offset, length;
> -
> -               offset = offset_in_page(addr);
> -               length = PAGE_SIZE - offset;
> -               if (length > count)
> -                       length = count;
> -               p = vmalloc_to_page(addr);
> -               /*
> -                * To do safe access to this _mapped_ area, we need
> -                * lock. But adding lock here means that we need to add
> -                * overhead of vmalloc()/vfree() calles for this _debug_
> -                * interface, rarely used. Instead of that, we'll use
> -                * kmap() and get small overhead in this access function.
> -                */
> -               if (p) {
> -                       /*
> -                        * we can expect USER0 is not used (see vread/vwrite's
> -                        * function description)
> -                        */
> -                       void *map = kmap_atomic(p);
> -                       memcpy(buf, map + offset, length);
> -                       kunmap_atomic(map);
> -               } else
> -                       memset(buf, 0, length);
> -
> -               addr += length;
> -               buf += length;
> -               copied += length;
> -               count -= length;
> -       }
> -       return copied;
> -}
> -
> -static int aligned_vwrite(char *buf, char *addr, unsigned long count)
> -{
> -       struct page *p;
> -       int copied = 0;
> -
> -       while (count) {
> -               unsigned long offset, length;
> -
> -               offset = offset_in_page(addr);
> -               length = PAGE_SIZE - offset;
> -               if (length > count)
> -                       length = count;
> -               p = vmalloc_to_page(addr);
> -               /*
> -                * To do safe access to this _mapped_ area, we need
> -                * lock. But adding lock here means that we need to add
> -                * overhead of vmalloc()/vfree() calles for this _debug_
> -                * interface, rarely used. Instead of that, we'll use
> -                * kmap() and get small overhead in this access function.
> -                */
> -               if (p) {
> -                       /*
> -                        * we can expect USER0 is not used (see vread/vwrite's
> -                        * function description)
> -                        */
> -                       void *map = kmap_atomic(p);
> -                       memcpy(map + offset, buf, length);
> -                       kunmap_atomic(map);
> -               }
> -               addr += length;
> -               buf += length;
> -               copied += length;
> -               count -= length;
> -       }
> -       return copied;
> -}
> -
>  /**
>   *     vread() -  read vmalloc area in a safe way.
>   *     @buf:           buffer for reading data
> @@ -2083,10 +2002,8 @@ static int aligned_vwrite(char *buf, char *addr, unsigned long count)
>   *     If [addr...addr+count) doesn't includes any intersects with alive
>   *     vm_struct area, returns 0. @buf should be kernel's buffer.
>   *
> - *     Note: In usual ops, vread() is never necessary because the caller
> - *     should know vmalloc() area is valid and can use memcpy().
> - *     This is for routines which have to access vmalloc area without
> - *     any informaion, as /dev/kmem.
> + *     Note: This routine executes with the vmap_area_lock spinlock held,
> + *     which means it can safely access mappings at their virtual address.
>   *
>   */
>
> @@ -2125,8 +2042,9 @@ long vread(char *buf, char *addr, unsigned long count)
>                 n = vaddr + get_vm_area_size(vm) - addr;
>                 if (n > count)
>                         n = count;
> -               if (!(vm->flags & VM_IOREMAP))
> -                       aligned_vread(buf, addr, n);
> +               if (!(vm->flags & VM_IOREMAP) &&
> +                   kern_addr_valid((unsigned long)addr))
> +                       memcpy(buf, addr, n);
>                 else /* IOREMAP area is treated as memory hole */
>                         memset(buf, 0, n);
>                 buf += n;
> @@ -2165,10 +2083,8 @@ long vread(char *buf, char *addr, unsigned long count)
>   *     If [addr...addr+count) doesn't includes any intersects with alive
>   *     vm_struct area, returns 0. @buf should be kernel's buffer.
>   *
> - *     Note: In usual ops, vwrite() is never necessary because the caller
> - *     should know vmalloc() area is valid and can use memcpy().
> - *     This is for routines which have to access vmalloc area without
> - *     any informaion, as /dev/kmem.
> + *     Note: This routine executes with the vmap_area_lock spinlock held,
> + *     which means it can safely access mappings at their virtual address.
>   */
>
>  long vwrite(char *buf, char *addr, unsigned long count)
> @@ -2206,8 +2122,9 @@ long vwrite(char *buf, char *addr, unsigned long count)
>                 n = vaddr + get_vm_area_size(vm) - addr;
>                 if (n > count)
>                         n = count;
> -               if (!(vm->flags & VM_IOREMAP)) {
> -                       aligned_vwrite(buf, addr, n);
> +               if (!(vm->flags & VM_IOREMAP) &&
> +                   kern_addr_valid((unsigned long)addr)) {
> +                       memcpy(addr, buf, n);
>                         copied++;
>                 }
>                 buf += n;
> --
> 2.9.3
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: vmalloc: simplify vread/vwrite to use existing mappings
  2017-06-07 18:20 [PATCH] mm: vmalloc: simplify vread/vwrite to use existing mappings Ard Biesheuvel
  2017-06-07 18:22 ` Ard Biesheuvel
@ 2017-06-08 16:06 ` Russell King - ARM Linux
  2017-06-08 16:15   ` Ard Biesheuvel
  1 sibling, 1 reply; 4+ messages in thread
From: Russell King - ARM Linux @ 2017-06-08 16:06 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: linux-mm, mark.rutland, mhocko, akpm, zhongjiang,
	linux-arm-kernel, labbott

On Wed, Jun 07, 2017 at 06:20:52PM +0000, Ard Biesheuvel wrote:
> The current safe path iterates over each mapping page by page, and
> kmap()'s each one individually, which is expensive and unnecessary.
> Instead, let's use kern_addr_valid() to establish on a per-VMA basis
> whether we may safely derefence them, and do so via its mapping in
> the VMALLOC region. This can be done safely due to the fact that we
> are holding the vmap_area_lock spinlock.

This doesn't sound correct if you look at the definition of
kern_addr_valid().  For example, x86-32 has:

/*
 * kern_addr_valid() is (1) for FLATMEM and (0) for
 * SPARSEMEM and DISCONTIGMEM
 */
#ifdef CONFIG_FLATMEM
#define kern_addr_valid(addr)   (1)
#else
#define kern_addr_valid(kaddr)  (0)
#endif

The majority of architectures simply do:

#define kern_addr_valid(addr)   (1)

So, the result is that on the majority of architectures, we're now
going to simply dereference 'addr' with very little in the way of
checks.

I think this makes these functions racy - the point at which the
entry is placed onto the vmalloc list is quite different from the
point where the page table entries for it are populated (which
happens with the lock dropped.)  So, I think this is asking for
an oops.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] mm: vmalloc: simplify vread/vwrite to use existing mappings
  2017-06-08 16:06 ` Russell King - ARM Linux
@ 2017-06-08 16:15   ` Ard Biesheuvel
  0 siblings, 0 replies; 4+ messages in thread
From: Ard Biesheuvel @ 2017-06-08 16:15 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: linux-mm@kvack.org, Mark Rutland, Michal Hocko, Andrew Morton,
	Zhong Jiang, linux-arm-kernel@lists.infradead.org, Laura Abbott

On 8 June 2017 at 16:06, Russell King - ARM Linux <linux@armlinux.org.uk> wrote:
> On Wed, Jun 07, 2017 at 06:20:52PM +0000, Ard Biesheuvel wrote:
>> The current safe path iterates over each mapping page by page, and
>> kmap()'s each one individually, which is expensive and unnecessary.
>> Instead, let's use kern_addr_valid() to establish on a per-VMA basis
>> whether we may safely derefence them, and do so via its mapping in
>> the VMALLOC region. This can be done safely due to the fact that we
>> are holding the vmap_area_lock spinlock.
>
> This doesn't sound correct if you look at the definition of
> kern_addr_valid().  For example, x86-32 has:
>
> /*
>  * kern_addr_valid() is (1) for FLATMEM and (0) for
>  * SPARSEMEM and DISCONTIGMEM
>  */
> #ifdef CONFIG_FLATMEM
> #define kern_addr_valid(addr)   (1)
> #else
> #define kern_addr_valid(kaddr)  (0)
> #endif
>
> The majority of architectures simply do:
>
> #define kern_addr_valid(addr)   (1)
>

That is interesting, thanks for pointing it out.

The function read_kcore() [which is where the issue I am trying to fix
originates] currently has this logic:

  if (kern_addr_valid(start)) {
          unsigned long n;

          /*
           * Using bounce buffer to bypass the
           * hardened user copy kernel text checks.
           */
          memcpy(buf, (char *) start, tsz);
          n = copy_to_user(buffer, buf, tsz);
          /*
           * We cannot distinguish between fault on source
           * and fault on destination. When this happens
           * we clear too and hope it will trigger the
           * EFAULT again.
           */
          if (n) {
                  if (clear_user(buffer + tsz - n,
                                          n))
                          return -EFAULT;
          }
  } else {
          if (clear_user(buffer, tsz))
                  return -EFAULT;
  }

and the implementation I looked at [on arm64] happens to be the only
one that does something non-trivial.

> So, the result is that on the majority of architectures, we're now
> going to simply dereference 'addr' with very little in the way of
> checks.
>

Indeed.

> I think this makes these functions racy - the point at which the
> entry is placed onto the vmalloc list is quite different from the
> point where the page table entries for it are populated (which
> happens with the lock dropped.)  So, I think this is asking for
> an oops.
>

Fair enough. I will try to find a different approach then.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-06-08 16:15 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-06-07 18:20 [PATCH] mm: vmalloc: simplify vread/vwrite to use existing mappings Ard Biesheuvel
2017-06-07 18:22 ` Ard Biesheuvel
2017-06-08 16:06 ` Russell King - ARM Linux
2017-06-08 16:15   ` Ard Biesheuvel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).