diff for duplicates of <20170912063102.GA2068@bbox> diff --git a/a/1.txt b/N1/1.txt index 813b1c8..288298b 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -8,3 +8,219 @@ On Tue, Sep 12, 2017 at 03:04:56PM +0900, Sergey Senozhatsky wrote: Several modifications during development finally makes me remove the NULL check. Thanks for catching it as well as style fix-up of other patch, Sergey. + +>From ac07fb0012bcd39c7a5bf5262c70c45d2e7df314 Mon Sep 17 00:00:00 2001 +From: Minchan Kim <minchan@kernel.org> +Date: Tue, 12 Sep 2017 08:44:29 +0900 +Subject: [PATCH 5/5] mm:swap: skip swapcache for swapin of synchronous device + +With fast swap storage, platform want to use swap more aggressively +and swap-in is crucial to application latency. + +The rw_page based synchronous devices like zram, pmem and btt are such +fast storage. When I profile swapin performance with zram lz4 decompress +test, S/W overhead is more than 70%. Maybe, it would be bigger in nvdimm. + +This patch aims for reducing swap-in latency via skipping swapcache +if swap device is synchronous device like rw_page based device. +It enhances 45% my swapin test(5G sequential swapin, no readahead, +from 2.41sec to 1.64sec). + +Cc: Dan Williams <dan.j.williams@intel.com> +Cc: Ross Zwisler <ross.zwisler@linux.intel.com> +Cc: Hugh Dickins <hughd@google.com> +Signed-off-by: Minchan Kim <minchan@kernel.org> +--- + include/linux/swap.h | 1 + + mm/memory.c | 50 +++++++++++++++++++++++++++++++++++--------------- + mm/page_io.c | 6 +++--- + mm/swapfile.c | 11 +++++++---- + 4 files changed, 46 insertions(+), 22 deletions(-) + +diff --git a/include/linux/swap.h b/include/linux/swap.h +index 739d94397c47..fb46a3b55d65 100644 +--- a/include/linux/swap.h ++++ b/include/linux/swap.h +@@ -462,6 +462,7 @@ extern int page_swapcount(struct page *); + extern int __swp_swapcount(swp_entry_t entry); + extern int swp_swapcount(swp_entry_t entry); + extern struct swap_info_struct *page_swap_info(struct page *); ++extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); + extern bool reuse_swap_page(struct page *, int *); + extern int try_to_free_swap(struct page *); + struct backing_dev_info; +diff --git a/mm/memory.c b/mm/memory.c +index ec4e15494901..af1d7f2e9664 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2842,7 +2842,7 @@ EXPORT_SYMBOL(unmap_mapping_range); + int do_swap_page(struct vm_fault *vmf) + { + struct vm_area_struct *vma = vmf->vma; +- struct page *page = NULL, *swapcache; ++ struct page *page = NULL, *swapcache = NULL; + struct mem_cgroup *memcg; + struct vma_swap_readahead swap_ra; + swp_entry_t entry; +@@ -2881,17 +2881,35 @@ int do_swap_page(struct vm_fault *vmf) + } + goto out; + } ++ ++ + delayacct_set_flag(DELAYACCT_PF_SWAPIN); + if (!page) + page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, + vmf->address); + if (!page) { +- if (vma_readahead) +- page = do_swap_page_readahead(entry, +- GFP_HIGHUSER_MOVABLE, vmf, &swap_ra); +- else +- page = swapin_readahead(entry, +- GFP_HIGHUSER_MOVABLE, vma, vmf->address); ++ struct swap_info_struct *si = swp_swap_info(entry); ++ ++ if (!(si->flags & SWP_SYNCHRONOUS_IO)) { ++ if (vma_readahead) ++ page = do_swap_page_readahead(entry, ++ GFP_HIGHUSER_MOVABLE, vmf, &swap_ra); ++ else ++ page = swapin_readahead(entry, ++ GFP_HIGHUSER_MOVABLE, vma, vmf->address); ++ swapcache = page; ++ } else { ++ /* skip swapcache */ ++ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address); ++ if (page) { ++ __SetPageLocked(page); ++ __SetPageSwapBacked(page); ++ set_page_private(page, entry.val); ++ lru_cache_add_anon(page); ++ swap_readpage(page, true); ++ } ++ } ++ + if (!page) { + /* + * Back out if somebody else faulted in this pte +@@ -2920,7 +2938,6 @@ int do_swap_page(struct vm_fault *vmf) + goto out_release; + } + +- swapcache = page; + locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags); + + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); +@@ -2935,7 +2952,8 @@ int do_swap_page(struct vm_fault *vmf) + * test below, are not enough to exclude that. Even if it is still + * swapcache, we need to check that the page's swap has not changed. + */ +- if (unlikely(!PageSwapCache(page) || page_private(page) != entry.val)) ++ if (unlikely((!PageSwapCache(page) || ++ page_private(page) != entry.val)) && swapcache) + goto out_page; + + page = ksm_might_need_to_copy(page, vma, vmf->address); +@@ -2988,14 +3006,16 @@ int do_swap_page(struct vm_fault *vmf) + pte = pte_mksoft_dirty(pte); + set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); + vmf->orig_pte = pte; +- if (page == swapcache) { +- do_page_add_anon_rmap(page, vma, vmf->address, exclusive); +- mem_cgroup_commit_charge(page, memcg, true, false); +- activate_page(page); +- } else { /* ksm created a completely new copy */ ++ ++ /* ksm created a completely new copy */ ++ if (unlikely(page != swapcache && swapcache)) { + page_add_new_anon_rmap(page, vma, vmf->address, false); + mem_cgroup_commit_charge(page, memcg, false, false); + lru_cache_add_active_or_unevictable(page, vma); ++ } else { ++ do_page_add_anon_rmap(page, vma, vmf->address, exclusive); ++ mem_cgroup_commit_charge(page, memcg, true, false); ++ activate_page(page); + } + + swap_free(entry); +@@ -3003,7 +3023,7 @@ int do_swap_page(struct vm_fault *vmf) + (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) + try_to_free_swap(page); + unlock_page(page); +- if (page != swapcache) { ++ if (page != swapcache && swapcache) { + /* + * Hold the lock to avoid the swap entry to be reused + * until we take the PT lock for the pte_same() check +diff --git a/mm/page_io.c b/mm/page_io.c +index 21502d341a67..d4a98e1f6608 100644 +--- a/mm/page_io.c ++++ b/mm/page_io.c +@@ -346,7 +346,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, + return ret; + } + +-int swap_readpage(struct page *page, bool do_poll) ++int swap_readpage(struct page *page, bool synchronous) + { + struct bio *bio; + int ret = 0; +@@ -354,7 +354,7 @@ int swap_readpage(struct page *page, bool do_poll) + blk_qc_t qc; + struct gendisk *disk; + +- VM_BUG_ON_PAGE(!PageSwapCache(page), page); ++ VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageUptodate(page), page); + if (frontswap_load(page) == 0) { +@@ -402,7 +402,7 @@ int swap_readpage(struct page *page, bool do_poll) + count_vm_event(PSWPIN); + bio_get(bio); + qc = submit_bio(bio); +- while (do_poll) { ++ while (synchronous) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (!READ_ONCE(bio->bi_private)) + break; +diff --git a/mm/swapfile.c b/mm/swapfile.c +index 1305591cde4d..64a3d85226ba 100644 +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -3454,10 +3454,15 @@ int swapcache_prepare(swp_entry_t entry) + return __swap_duplicate(entry, SWAP_HAS_CACHE); + } + ++struct swap_info_struct *swp_swap_info(swp_entry_t entry) ++{ ++ return swap_info[swp_type(entry)]; ++} ++ + struct swap_info_struct *page_swap_info(struct page *page) + { +- swp_entry_t swap = { .val = page_private(page) }; +- return swap_info[swp_type(swap)]; ++ swp_entry_t entry = { .val = page_private(page) }; ++ return swp_swap_info(entry); + } + + /* +@@ -3465,7 +3470,6 @@ struct swap_info_struct *page_swap_info(struct page *page) + */ + struct address_space *__page_file_mapping(struct page *page) + { +- VM_BUG_ON_PAGE(!PageSwapCache(page), page); + return page_swap_info(page)->swap_file->f_mapping; + } + EXPORT_SYMBOL_GPL(__page_file_mapping); +@@ -3473,7 +3477,6 @@ EXPORT_SYMBOL_GPL(__page_file_mapping); + pgoff_t __page_file_index(struct page *page) + { + swp_entry_t swap = { .val = page_private(page) }; +- VM_BUG_ON_PAGE(!PageSwapCache(page), page); + return swp_offset(swap); + } + EXPORT_SYMBOL_GPL(__page_file_index); +-- +2.7.4 diff --git a/a/content_digest b/N1/content_digest index 1e9a83c..5451f14 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -25,6 +25,222 @@ "> what if alloc_page_vma() fails?\n" "\n" "Several modifications during development finally makes me remove the NULL check.\n" - Thanks for catching it as well as style fix-up of other patch, Sergey. + "Thanks for catching it as well as style fix-up of other patch, Sergey.\n" + "\n" + ">From ac07fb0012bcd39c7a5bf5262c70c45d2e7df314 Mon Sep 17 00:00:00 2001\n" + "From: Minchan Kim <minchan@kernel.org>\n" + "Date: Tue, 12 Sep 2017 08:44:29 +0900\n" + "Subject: [PATCH 5/5] mm:swap: skip swapcache for swapin of synchronous device\n" + "\n" + "With fast swap storage, platform want to use swap more aggressively\n" + "and swap-in is crucial to application latency.\n" + "\n" + "The rw_page based synchronous devices like zram, pmem and btt are such\n" + "fast storage. When I profile swapin performance with zram lz4 decompress\n" + "test, S/W overhead is more than 70%. Maybe, it would be bigger in nvdimm.\n" + "\n" + "This patch aims for reducing swap-in latency via skipping swapcache\n" + "if swap device is synchronous device like rw_page based device.\n" + "It enhances 45% my swapin test(5G sequential swapin, no readahead,\n" + "from 2.41sec to 1.64sec).\n" + "\n" + "Cc: Dan Williams <dan.j.williams@intel.com>\n" + "Cc: Ross Zwisler <ross.zwisler@linux.intel.com>\n" + "Cc: Hugh Dickins <hughd@google.com>\n" + "Signed-off-by: Minchan Kim <minchan@kernel.org>\n" + "---\n" + " include/linux/swap.h | 1 +\n" + " mm/memory.c | 50 +++++++++++++++++++++++++++++++++++---------------\n" + " mm/page_io.c | 6 +++---\n" + " mm/swapfile.c | 11 +++++++----\n" + " 4 files changed, 46 insertions(+), 22 deletions(-)\n" + "\n" + "diff --git a/include/linux/swap.h b/include/linux/swap.h\n" + "index 739d94397c47..fb46a3b55d65 100644\n" + "--- a/include/linux/swap.h\n" + "+++ b/include/linux/swap.h\n" + "@@ -462,6 +462,7 @@ extern int page_swapcount(struct page *);\n" + " extern int __swp_swapcount(swp_entry_t entry);\n" + " extern int swp_swapcount(swp_entry_t entry);\n" + " extern struct swap_info_struct *page_swap_info(struct page *);\n" + "+extern struct swap_info_struct *swp_swap_info(swp_entry_t entry);\n" + " extern bool reuse_swap_page(struct page *, int *);\n" + " extern int try_to_free_swap(struct page *);\n" + " struct backing_dev_info;\n" + "diff --git a/mm/memory.c b/mm/memory.c\n" + "index ec4e15494901..af1d7f2e9664 100644\n" + "--- a/mm/memory.c\n" + "+++ b/mm/memory.c\n" + "@@ -2842,7 +2842,7 @@ EXPORT_SYMBOL(unmap_mapping_range);\n" + " int do_swap_page(struct vm_fault *vmf)\n" + " {\n" + " \tstruct vm_area_struct *vma = vmf->vma;\n" + "-\tstruct page *page = NULL, *swapcache;\n" + "+\tstruct page *page = NULL, *swapcache = NULL;\n" + " \tstruct mem_cgroup *memcg;\n" + " \tstruct vma_swap_readahead swap_ra;\n" + " \tswp_entry_t entry;\n" + "@@ -2881,17 +2881,35 @@ int do_swap_page(struct vm_fault *vmf)\n" + " \t\t}\n" + " \t\tgoto out;\n" + " \t}\n" + "+\n" + "+\n" + " \tdelayacct_set_flag(DELAYACCT_PF_SWAPIN);\n" + " \tif (!page)\n" + " \t\tpage = lookup_swap_cache(entry, vma_readahead ? vma : NULL,\n" + " \t\t\t\t\t vmf->address);\n" + " \tif (!page) {\n" + "-\t\tif (vma_readahead)\n" + "-\t\t\tpage = do_swap_page_readahead(entry,\n" + "-\t\t\t\tGFP_HIGHUSER_MOVABLE, vmf, &swap_ra);\n" + "-\t\telse\n" + "-\t\t\tpage = swapin_readahead(entry,\n" + "-\t\t\t\tGFP_HIGHUSER_MOVABLE, vma, vmf->address);\n" + "+\t\tstruct swap_info_struct *si = swp_swap_info(entry);\n" + "+\n" + "+\t\tif (!(si->flags & SWP_SYNCHRONOUS_IO)) {\n" + "+\t\t\tif (vma_readahead)\n" + "+\t\t\t\tpage = do_swap_page_readahead(entry,\n" + "+\t\t\t\t\tGFP_HIGHUSER_MOVABLE, vmf, &swap_ra);\n" + "+\t\t\telse\n" + "+\t\t\t\tpage = swapin_readahead(entry,\n" + "+\t\t\t\t\tGFP_HIGHUSER_MOVABLE, vma, vmf->address);\n" + "+\t\t\tswapcache = page;\n" + "+\t\t} else {\n" + "+\t\t\t/* skip swapcache */\n" + "+\t\t\tpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);\n" + "+\t\t\tif (page) {\n" + "+\t\t\t\t__SetPageLocked(page);\n" + "+\t\t\t\t__SetPageSwapBacked(page);\n" + "+\t\t\t\tset_page_private(page, entry.val);\n" + "+\t\t\t\tlru_cache_add_anon(page);\n" + "+\t\t\t\tswap_readpage(page, true);\n" + "+\t\t\t}\n" + "+\t\t}\n" + "+\n" + " \t\tif (!page) {\n" + " \t\t\t/*\n" + " \t\t\t * Back out if somebody else faulted in this pte\n" + "@@ -2920,7 +2938,6 @@ int do_swap_page(struct vm_fault *vmf)\n" + " \t\tgoto out_release;\n" + " \t}\n" + " \n" + "-\tswapcache = page;\n" + " \tlocked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);\n" + " \n" + " \tdelayacct_clear_flag(DELAYACCT_PF_SWAPIN);\n" + "@@ -2935,7 +2952,8 @@ int do_swap_page(struct vm_fault *vmf)\n" + " \t * test below, are not enough to exclude that. Even if it is still\n" + " \t * swapcache, we need to check that the page's swap has not changed.\n" + " \t */\n" + "-\tif (unlikely(!PageSwapCache(page) || page_private(page) != entry.val))\n" + "+\tif (unlikely((!PageSwapCache(page) ||\n" + "+\t\t\tpage_private(page) != entry.val)) && swapcache)\n" + " \t\tgoto out_page;\n" + " \n" + " \tpage = ksm_might_need_to_copy(page, vma, vmf->address);\n" + "@@ -2988,14 +3006,16 @@ int do_swap_page(struct vm_fault *vmf)\n" + " \t\tpte = pte_mksoft_dirty(pte);\n" + " \tset_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);\n" + " \tvmf->orig_pte = pte;\n" + "-\tif (page == swapcache) {\n" + "-\t\tdo_page_add_anon_rmap(page, vma, vmf->address, exclusive);\n" + "-\t\tmem_cgroup_commit_charge(page, memcg, true, false);\n" + "-\t\tactivate_page(page);\n" + "-\t} else { /* ksm created a completely new copy */\n" + "+\n" + "+\t/* ksm created a completely new copy */\n" + "+\tif (unlikely(page != swapcache && swapcache)) {\n" + " \t\tpage_add_new_anon_rmap(page, vma, vmf->address, false);\n" + " \t\tmem_cgroup_commit_charge(page, memcg, false, false);\n" + " \t\tlru_cache_add_active_or_unevictable(page, vma);\n" + "+\t} else {\n" + "+\t\tdo_page_add_anon_rmap(page, vma, vmf->address, exclusive);\n" + "+\t\tmem_cgroup_commit_charge(page, memcg, true, false);\n" + "+\t\tactivate_page(page);\n" + " \t}\n" + " \n" + " \tswap_free(entry);\n" + "@@ -3003,7 +3023,7 @@ int do_swap_page(struct vm_fault *vmf)\n" + " \t (vma->vm_flags & VM_LOCKED) || PageMlocked(page))\n" + " \t\ttry_to_free_swap(page);\n" + " \tunlock_page(page);\n" + "-\tif (page != swapcache) {\n" + "+\tif (page != swapcache && swapcache) {\n" + " \t\t/*\n" + " \t\t * Hold the lock to avoid the swap entry to be reused\n" + " \t\t * until we take the PT lock for the pte_same() check\n" + "diff --git a/mm/page_io.c b/mm/page_io.c\n" + "index 21502d341a67..d4a98e1f6608 100644\n" + "--- a/mm/page_io.c\n" + "+++ b/mm/page_io.c\n" + "@@ -346,7 +346,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,\n" + " \treturn ret;\n" + " }\n" + " \n" + "-int swap_readpage(struct page *page, bool do_poll)\n" + "+int swap_readpage(struct page *page, bool synchronous)\n" + " {\n" + " \tstruct bio *bio;\n" + " \tint ret = 0;\n" + "@@ -354,7 +354,7 @@ int swap_readpage(struct page *page, bool do_poll)\n" + " \tblk_qc_t qc;\n" + " \tstruct gendisk *disk;\n" + " \n" + "-\tVM_BUG_ON_PAGE(!PageSwapCache(page), page);\n" + "+\tVM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);\n" + " \tVM_BUG_ON_PAGE(!PageLocked(page), page);\n" + " \tVM_BUG_ON_PAGE(PageUptodate(page), page);\n" + " \tif (frontswap_load(page) == 0) {\n" + "@@ -402,7 +402,7 @@ int swap_readpage(struct page *page, bool do_poll)\n" + " \tcount_vm_event(PSWPIN);\n" + " \tbio_get(bio);\n" + " \tqc = submit_bio(bio);\n" + "-\twhile (do_poll) {\n" + "+\twhile (synchronous) {\n" + " \t\tset_current_state(TASK_UNINTERRUPTIBLE);\n" + " \t\tif (!READ_ONCE(bio->bi_private))\n" + " \t\t\tbreak;\n" + "diff --git a/mm/swapfile.c b/mm/swapfile.c\n" + "index 1305591cde4d..64a3d85226ba 100644\n" + "--- a/mm/swapfile.c\n" + "+++ b/mm/swapfile.c\n" + "@@ -3454,10 +3454,15 @@ int swapcache_prepare(swp_entry_t entry)\n" + " \treturn __swap_duplicate(entry, SWAP_HAS_CACHE);\n" + " }\n" + " \n" + "+struct swap_info_struct *swp_swap_info(swp_entry_t entry)\n" + "+{\n" + "+\treturn swap_info[swp_type(entry)];\n" + "+}\n" + "+\n" + " struct swap_info_struct *page_swap_info(struct page *page)\n" + " {\n" + "-\tswp_entry_t swap = { .val = page_private(page) };\n" + "-\treturn swap_info[swp_type(swap)];\n" + "+\tswp_entry_t entry = { .val = page_private(page) };\n" + "+\treturn swp_swap_info(entry);\n" + " }\n" + " \n" + " /*\n" + "@@ -3465,7 +3470,6 @@ struct swap_info_struct *page_swap_info(struct page *page)\n" + " */\n" + " struct address_space *__page_file_mapping(struct page *page)\n" + " {\n" + "-\tVM_BUG_ON_PAGE(!PageSwapCache(page), page);\n" + " \treturn page_swap_info(page)->swap_file->f_mapping;\n" + " }\n" + " EXPORT_SYMBOL_GPL(__page_file_mapping);\n" + "@@ -3473,7 +3477,6 @@ EXPORT_SYMBOL_GPL(__page_file_mapping);\n" + " pgoff_t __page_file_index(struct page *page)\n" + " {\n" + " \tswp_entry_t swap = { .val = page_private(page) };\n" + "-\tVM_BUG_ON_PAGE(!PageSwapCache(page), page);\n" + " \treturn swp_offset(swap);\n" + " }\n" + " EXPORT_SYMBOL_GPL(__page_file_index);\n" + "-- \n" + 2.7.4 -02bb59c7df8a3ba3137e11787f0fbccd54af108e43a0a6bf4436e94ca98b6692 +809b01ee05e7653da7348f4c7b7d31d99f9a7029bc1c712fec6ff7008ffea69e
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.