All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <20160118140955.GB20244@bbox>

diff --git a/a/1.txt b/N1/1.txt
index 2b5fd2c..74f71c4 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -51,3 +51,95 @@ On Mon, Jan 18, 2016 at 01:18:31PM +0100, Vlastimil Babka wrote:
 True.
 Let's go with this. I hope it's the last.
 Thanks, guys.
+
+>From 389bbcbad9aba7d86a575b8c6ea3b8985cc801ea Mon Sep 17 00:00:00 2001
+From: Junil Lee <junil0814.lee@lge.com>
+Date: Mon, 18 Jan 2016 23:01:29 +0900
+Subject: [PATCH v5] zsmalloc: fix migrate_zspage-zs_free race condition
+
+record_obj() in migrate_zspage() does not preserve handle's
+HANDLE_PIN_BIT, set by find_aloced_obj()->trypin_tag(), and implicitly
+(accidentally) un-pins the handle, while migrate_zspage() still performs
+an explicit unpin_tag() on the that handle.
+This additional explicit unpin_tag() introduces a race condition with
+zs_free(), which can pin that handle by this time, so the handle becomes
+un-pinned.
+
+Schematically, it goes like this:
+
+CPU0					CPU1
+migrate_zspage
+  find_alloced_obj
+    trypin_tag
+      set HANDLE_PIN_BIT			zs_free()
+						  pin_tag()
+  obj_malloc() -- new object, no tag
+  record_obj() -- remove HANDLE_PIN_BIT	    set HANDLE_PIN_BIT
+  unpin_tag()  -- remove zs_free's HANDLE_PIN_BIT
+
+The race condition may result in a NULL pointer dereference:
+	Unable to handle kernel NULL pointer dereference at virtual address 00000000
+	CPU: 0 PID: 19001 Comm: CookieMonsterCl Tainted:
+	PC is at get_zspage_mapping+0x0/0x24
+	LR is at obj_free.isra.22+0x64/0x128
+	Call trace:
+		[<ffffffc0001a3aa8>] get_zspage_mapping+0x0/0x24
+		[<ffffffc0001a4918>] zs_free+0x88/0x114
+		[<ffffffc00053ae54>] zram_free_page+0x64/0xcc
+		[<ffffffc00053af4c>] zram_slot_free_notify+0x90/0x108
+		[<ffffffc000196638>] swap_entry_free+0x278/0x294
+		[<ffffffc000199008>] free_swap_and_cache+0x38/0x11c
+		[<ffffffc0001837ac>] unmap_single_vma+0x480/0x5c8
+		[<ffffffc000184350>] unmap_vmas+0x44/0x60
+		[<ffffffc00018a53c>] exit_mmap+0x50/0x110
+		[<ffffffc00009e408>] mmput+0x58/0xe0
+		[<ffffffc0000a2854>] do_exit+0x320/0x8dc
+		[<ffffffc0000a3cb4>] do_group_exit+0x44/0xa8
+		[<ffffffc0000ae1bc>] get_signal+0x538/0x580
+		[<ffffffc000087e44>] do_signal+0x98/0x4b8
+		[<ffffffc00008843c>] do_notify_resume+0x14/0x5c
+
+This patch keeps the lock bit in migration path and update
+value atomically.
+
+Signed-off-by: Junil Lee <junil0814.lee@lge.com>
+Signed-off-by: Minchan Kim <minchan@kernel.org>
+Cc: <stable@vger.kernel.org> [4.1+]
+---
+ mm/zsmalloc.c | 14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
+index e7414cec220b..2d7c4c11fc63 100644
+--- a/mm/zsmalloc.c
++++ b/mm/zsmalloc.c
+@@ -309,7 +309,12 @@ static void free_handle(struct zs_pool *pool, unsigned long handle)
+ 
+ static void record_obj(unsigned long handle, unsigned long obj)
+ {
+-	*(unsigned long *)handle = obj;
++	/*
++	 * lsb of @obj represents handle lock while other bits
++	 * represent object value the handle is pointing so
++	 * updating shouldn't do store tearing.
++	 */
++	WRITE_ONCE(*(unsigned long *)handle, obj);
+ }
+ 
+ /* zpool driver */
+@@ -1635,6 +1640,13 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
+ 		free_obj = obj_malloc(d_page, class, handle);
+ 		zs_object_copy(free_obj, used_obj, class);
+ 		index++;
++		/*
++		 * record_obj updates handle's value to free_obj and it will
++		 * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which
++		 * breaks synchronization using pin_tag(e,g, zs_free) so
++		 * let's keep the lock bit.
++		 */
++		free_obj |= BIT(HANDLE_PIN_BIT);
+ 		record_obj(handle, free_obj);
+ 		unpin_tag(handle);
+ 		obj_free(pool, class, used_obj);
+-- 
+1.9.1
diff --git a/a/content_digest b/N1/content_digest
index f68ccc1..2380748 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -70,6 +70,98 @@
  "\n"
  "True.\n"
  "Let's go with this. I hope it's the last.\n"
- Thanks, guys.
+ "Thanks, guys.\n"
+ "\n"
+ ">From 389bbcbad9aba7d86a575b8c6ea3b8985cc801ea Mon Sep 17 00:00:00 2001\n"
+ "From: Junil Lee <junil0814.lee@lge.com>\n"
+ "Date: Mon, 18 Jan 2016 23:01:29 +0900\n"
+ "Subject: [PATCH v5] zsmalloc: fix migrate_zspage-zs_free race condition\n"
+ "\n"
+ "record_obj() in migrate_zspage() does not preserve handle's\n"
+ "HANDLE_PIN_BIT, set by find_aloced_obj()->trypin_tag(), and implicitly\n"
+ "(accidentally) un-pins the handle, while migrate_zspage() still performs\n"
+ "an explicit unpin_tag() on the that handle.\n"
+ "This additional explicit unpin_tag() introduces a race condition with\n"
+ "zs_free(), which can pin that handle by this time, so the handle becomes\n"
+ "un-pinned.\n"
+ "\n"
+ "Schematically, it goes like this:\n"
+ "\n"
+ "CPU0\t\t\t\t\tCPU1\n"
+ "migrate_zspage\n"
+ "  find_alloced_obj\n"
+ "    trypin_tag\n"
+ "      set HANDLE_PIN_BIT\t\t\tzs_free()\n"
+ "\t\t\t\t\t\t  pin_tag()\n"
+ "  obj_malloc() -- new object, no tag\n"
+ "  record_obj() -- remove HANDLE_PIN_BIT\t    set HANDLE_PIN_BIT\n"
+ "  unpin_tag()  -- remove zs_free's HANDLE_PIN_BIT\n"
+ "\n"
+ "The race condition may result in a NULL pointer dereference:\n"
+ "\tUnable to handle kernel NULL pointer dereference at virtual address 00000000\n"
+ "\tCPU: 0 PID: 19001 Comm: CookieMonsterCl Tainted:\n"
+ "\tPC is at get_zspage_mapping+0x0/0x24\n"
+ "\tLR is at obj_free.isra.22+0x64/0x128\n"
+ "\tCall trace:\n"
+ "\t\t[<ffffffc0001a3aa8>] get_zspage_mapping+0x0/0x24\n"
+ "\t\t[<ffffffc0001a4918>] zs_free+0x88/0x114\n"
+ "\t\t[<ffffffc00053ae54>] zram_free_page+0x64/0xcc\n"
+ "\t\t[<ffffffc00053af4c>] zram_slot_free_notify+0x90/0x108\n"
+ "\t\t[<ffffffc000196638>] swap_entry_free+0x278/0x294\n"
+ "\t\t[<ffffffc000199008>] free_swap_and_cache+0x38/0x11c\n"
+ "\t\t[<ffffffc0001837ac>] unmap_single_vma+0x480/0x5c8\n"
+ "\t\t[<ffffffc000184350>] unmap_vmas+0x44/0x60\n"
+ "\t\t[<ffffffc00018a53c>] exit_mmap+0x50/0x110\n"
+ "\t\t[<ffffffc00009e408>] mmput+0x58/0xe0\n"
+ "\t\t[<ffffffc0000a2854>] do_exit+0x320/0x8dc\n"
+ "\t\t[<ffffffc0000a3cb4>] do_group_exit+0x44/0xa8\n"
+ "\t\t[<ffffffc0000ae1bc>] get_signal+0x538/0x580\n"
+ "\t\t[<ffffffc000087e44>] do_signal+0x98/0x4b8\n"
+ "\t\t[<ffffffc00008843c>] do_notify_resume+0x14/0x5c\n"
+ "\n"
+ "This patch keeps the lock bit in migration path and update\n"
+ "value atomically.\n"
+ "\n"
+ "Signed-off-by: Junil Lee <junil0814.lee@lge.com>\n"
+ "Signed-off-by: Minchan Kim <minchan@kernel.org>\n"
+ "Cc: <stable@vger.kernel.org> [4.1+]\n"
+ "---\n"
+ " mm/zsmalloc.c | 14 +++++++++++++-\n"
+ " 1 file changed, 13 insertions(+), 1 deletion(-)\n"
+ "\n"
+ "diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c\n"
+ "index e7414cec220b..2d7c4c11fc63 100644\n"
+ "--- a/mm/zsmalloc.c\n"
+ "+++ b/mm/zsmalloc.c\n"
+ "@@ -309,7 +309,12 @@ static void free_handle(struct zs_pool *pool, unsigned long handle)\n"
+ " \n"
+ " static void record_obj(unsigned long handle, unsigned long obj)\n"
+ " {\n"
+ "-\t*(unsigned long *)handle = obj;\n"
+ "+\t/*\n"
+ "+\t * lsb of @obj represents handle lock while other bits\n"
+ "+\t * represent object value the handle is pointing so\n"
+ "+\t * updating shouldn't do store tearing.\n"
+ "+\t */\n"
+ "+\tWRITE_ONCE(*(unsigned long *)handle, obj);\n"
+ " }\n"
+ " \n"
+ " /* zpool driver */\n"
+ "@@ -1635,6 +1640,13 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,\n"
+ " \t\tfree_obj = obj_malloc(d_page, class, handle);\n"
+ " \t\tzs_object_copy(free_obj, used_obj, class);\n"
+ " \t\tindex++;\n"
+ "+\t\t/*\n"
+ "+\t\t * record_obj updates handle's value to free_obj and it will\n"
+ "+\t\t * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which\n"
+ "+\t\t * breaks synchronization using pin_tag(e,g, zs_free) so\n"
+ "+\t\t * let's keep the lock bit.\n"
+ "+\t\t */\n"
+ "+\t\tfree_obj |= BIT(HANDLE_PIN_BIT);\n"
+ " \t\trecord_obj(handle, free_obj);\n"
+ " \t\tunpin_tag(handle);\n"
+ " \t\tobj_free(pool, class, used_obj);\n"
+ "-- \n"
+ 1.9.1
 
-f729dc202dc009fa1e91b7c5c72073e645ac89be36ab30bdfd2d68d50a2d98b5
+9ee19fc76f4d2513809fff4b6560de0aaf4a4ac48e16ce4ce2ff356499b1f246

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.