diff for duplicates of <20160118140955.GB20244@bbox> diff --git a/a/1.txt b/N1/1.txt index 2b5fd2c..74f71c4 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -51,3 +51,95 @@ On Mon, Jan 18, 2016 at 01:18:31PM +0100, Vlastimil Babka wrote: True. Let's go with this. I hope it's the last. Thanks, guys. + +>From 389bbcbad9aba7d86a575b8c6ea3b8985cc801ea Mon Sep 17 00:00:00 2001 +From: Junil Lee <junil0814.lee@lge.com> +Date: Mon, 18 Jan 2016 23:01:29 +0900 +Subject: [PATCH v5] zsmalloc: fix migrate_zspage-zs_free race condition + +record_obj() in migrate_zspage() does not preserve handle's +HANDLE_PIN_BIT, set by find_aloced_obj()->trypin_tag(), and implicitly +(accidentally) un-pins the handle, while migrate_zspage() still performs +an explicit unpin_tag() on the that handle. +This additional explicit unpin_tag() introduces a race condition with +zs_free(), which can pin that handle by this time, so the handle becomes +un-pinned. + +Schematically, it goes like this: + +CPU0 CPU1 +migrate_zspage + find_alloced_obj + trypin_tag + set HANDLE_PIN_BIT zs_free() + pin_tag() + obj_malloc() -- new object, no tag + record_obj() -- remove HANDLE_PIN_BIT set HANDLE_PIN_BIT + unpin_tag() -- remove zs_free's HANDLE_PIN_BIT + +The race condition may result in a NULL pointer dereference: + Unable to handle kernel NULL pointer dereference at virtual address 00000000 + CPU: 0 PID: 19001 Comm: CookieMonsterCl Tainted: + PC is at get_zspage_mapping+0x0/0x24 + LR is at obj_free.isra.22+0x64/0x128 + Call trace: + [<ffffffc0001a3aa8>] get_zspage_mapping+0x0/0x24 + [<ffffffc0001a4918>] zs_free+0x88/0x114 + [<ffffffc00053ae54>] zram_free_page+0x64/0xcc + [<ffffffc00053af4c>] zram_slot_free_notify+0x90/0x108 + [<ffffffc000196638>] swap_entry_free+0x278/0x294 + [<ffffffc000199008>] free_swap_and_cache+0x38/0x11c + [<ffffffc0001837ac>] unmap_single_vma+0x480/0x5c8 + [<ffffffc000184350>] unmap_vmas+0x44/0x60 + [<ffffffc00018a53c>] exit_mmap+0x50/0x110 + [<ffffffc00009e408>] mmput+0x58/0xe0 + [<ffffffc0000a2854>] do_exit+0x320/0x8dc + [<ffffffc0000a3cb4>] do_group_exit+0x44/0xa8 + [<ffffffc0000ae1bc>] get_signal+0x538/0x580 + [<ffffffc000087e44>] do_signal+0x98/0x4b8 + [<ffffffc00008843c>] do_notify_resume+0x14/0x5c + +This patch keeps the lock bit in migration path and update +value atomically. + +Signed-off-by: Junil Lee <junil0814.lee@lge.com> +Signed-off-by: Minchan Kim <minchan@kernel.org> +Cc: <stable@vger.kernel.org> [4.1+] +--- + mm/zsmalloc.c | 14 +++++++++++++- + 1 file changed, 13 insertions(+), 1 deletion(-) + +diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c +index e7414cec220b..2d7c4c11fc63 100644 +--- a/mm/zsmalloc.c ++++ b/mm/zsmalloc.c +@@ -309,7 +309,12 @@ static void free_handle(struct zs_pool *pool, unsigned long handle) + + static void record_obj(unsigned long handle, unsigned long obj) + { +- *(unsigned long *)handle = obj; ++ /* ++ * lsb of @obj represents handle lock while other bits ++ * represent object value the handle is pointing so ++ * updating shouldn't do store tearing. ++ */ ++ WRITE_ONCE(*(unsigned long *)handle, obj); + } + + /* zpool driver */ +@@ -1635,6 +1640,13 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class, + free_obj = obj_malloc(d_page, class, handle); + zs_object_copy(free_obj, used_obj, class); + index++; ++ /* ++ * record_obj updates handle's value to free_obj and it will ++ * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which ++ * breaks synchronization using pin_tag(e,g, zs_free) so ++ * let's keep the lock bit. ++ */ ++ free_obj |= BIT(HANDLE_PIN_BIT); + record_obj(handle, free_obj); + unpin_tag(handle); + obj_free(pool, class, used_obj); +-- +1.9.1 diff --git a/a/content_digest b/N1/content_digest index f68ccc1..2380748 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -70,6 +70,98 @@ "\n" "True.\n" "Let's go with this. I hope it's the last.\n" - Thanks, guys. + "Thanks, guys.\n" + "\n" + ">From 389bbcbad9aba7d86a575b8c6ea3b8985cc801ea Mon Sep 17 00:00:00 2001\n" + "From: Junil Lee <junil0814.lee@lge.com>\n" + "Date: Mon, 18 Jan 2016 23:01:29 +0900\n" + "Subject: [PATCH v5] zsmalloc: fix migrate_zspage-zs_free race condition\n" + "\n" + "record_obj() in migrate_zspage() does not preserve handle's\n" + "HANDLE_PIN_BIT, set by find_aloced_obj()->trypin_tag(), and implicitly\n" + "(accidentally) un-pins the handle, while migrate_zspage() still performs\n" + "an explicit unpin_tag() on the that handle.\n" + "This additional explicit unpin_tag() introduces a race condition with\n" + "zs_free(), which can pin that handle by this time, so the handle becomes\n" + "un-pinned.\n" + "\n" + "Schematically, it goes like this:\n" + "\n" + "CPU0\t\t\t\t\tCPU1\n" + "migrate_zspage\n" + " find_alloced_obj\n" + " trypin_tag\n" + " set HANDLE_PIN_BIT\t\t\tzs_free()\n" + "\t\t\t\t\t\t pin_tag()\n" + " obj_malloc() -- new object, no tag\n" + " record_obj() -- remove HANDLE_PIN_BIT\t set HANDLE_PIN_BIT\n" + " unpin_tag() -- remove zs_free's HANDLE_PIN_BIT\n" + "\n" + "The race condition may result in a NULL pointer dereference:\n" + "\tUnable to handle kernel NULL pointer dereference at virtual address 00000000\n" + "\tCPU: 0 PID: 19001 Comm: CookieMonsterCl Tainted:\n" + "\tPC is at get_zspage_mapping+0x0/0x24\n" + "\tLR is at obj_free.isra.22+0x64/0x128\n" + "\tCall trace:\n" + "\t\t[<ffffffc0001a3aa8>] get_zspage_mapping+0x0/0x24\n" + "\t\t[<ffffffc0001a4918>] zs_free+0x88/0x114\n" + "\t\t[<ffffffc00053ae54>] zram_free_page+0x64/0xcc\n" + "\t\t[<ffffffc00053af4c>] zram_slot_free_notify+0x90/0x108\n" + "\t\t[<ffffffc000196638>] swap_entry_free+0x278/0x294\n" + "\t\t[<ffffffc000199008>] free_swap_and_cache+0x38/0x11c\n" + "\t\t[<ffffffc0001837ac>] unmap_single_vma+0x480/0x5c8\n" + "\t\t[<ffffffc000184350>] unmap_vmas+0x44/0x60\n" + "\t\t[<ffffffc00018a53c>] exit_mmap+0x50/0x110\n" + "\t\t[<ffffffc00009e408>] mmput+0x58/0xe0\n" + "\t\t[<ffffffc0000a2854>] do_exit+0x320/0x8dc\n" + "\t\t[<ffffffc0000a3cb4>] do_group_exit+0x44/0xa8\n" + "\t\t[<ffffffc0000ae1bc>] get_signal+0x538/0x580\n" + "\t\t[<ffffffc000087e44>] do_signal+0x98/0x4b8\n" + "\t\t[<ffffffc00008843c>] do_notify_resume+0x14/0x5c\n" + "\n" + "This patch keeps the lock bit in migration path and update\n" + "value atomically.\n" + "\n" + "Signed-off-by: Junil Lee <junil0814.lee@lge.com>\n" + "Signed-off-by: Minchan Kim <minchan@kernel.org>\n" + "Cc: <stable@vger.kernel.org> [4.1+]\n" + "---\n" + " mm/zsmalloc.c | 14 +++++++++++++-\n" + " 1 file changed, 13 insertions(+), 1 deletion(-)\n" + "\n" + "diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c\n" + "index e7414cec220b..2d7c4c11fc63 100644\n" + "--- a/mm/zsmalloc.c\n" + "+++ b/mm/zsmalloc.c\n" + "@@ -309,7 +309,12 @@ static void free_handle(struct zs_pool *pool, unsigned long handle)\n" + " \n" + " static void record_obj(unsigned long handle, unsigned long obj)\n" + " {\n" + "-\t*(unsigned long *)handle = obj;\n" + "+\t/*\n" + "+\t * lsb of @obj represents handle lock while other bits\n" + "+\t * represent object value the handle is pointing so\n" + "+\t * updating shouldn't do store tearing.\n" + "+\t */\n" + "+\tWRITE_ONCE(*(unsigned long *)handle, obj);\n" + " }\n" + " \n" + " /* zpool driver */\n" + "@@ -1635,6 +1640,13 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,\n" + " \t\tfree_obj = obj_malloc(d_page, class, handle);\n" + " \t\tzs_object_copy(free_obj, used_obj, class);\n" + " \t\tindex++;\n" + "+\t\t/*\n" + "+\t\t * record_obj updates handle's value to free_obj and it will\n" + "+\t\t * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which\n" + "+\t\t * breaks synchronization using pin_tag(e,g, zs_free) so\n" + "+\t\t * let's keep the lock bit.\n" + "+\t\t */\n" + "+\t\tfree_obj |= BIT(HANDLE_PIN_BIT);\n" + " \t\trecord_obj(handle, free_obj);\n" + " \t\tunpin_tag(handle);\n" + " \t\tobj_free(pool, class, used_obj);\n" + "-- \n" + 1.9.1 -f729dc202dc009fa1e91b7c5c72073e645ac89be36ab30bdfd2d68d50a2d98b5 +9ee19fc76f4d2513809fff4b6560de0aaf4a4ac48e16ce4ce2ff356499b1f246
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.