diff for duplicates of <20181022084659.GA84523@tiger-server> diff --git a/a/1.txt b/N1/1.txt index b634393..10482f8 100644 --- a/a/1.txt +++ b/N1/1.txt @@ -1,7 +1,7 @@ On 2018-10-19 at 12:33:48 -0400, Barret Rhoden wrote: -> On 2018-09-21 at 21:29 David Hildenbrand <david@redhat.com> wrote: +> On 2018-09-21 at 21:29 David Hildenbrand <david-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote: > > On 21/09/2018 20:17, Dan Williams wrote: -> > > On Fri, Sep 21, 2018 at 7:24 AM David Hildenbrand <david@redhat.com> wrote: +> > > On Fri, Sep 21, 2018 at 7:24 AM David Hildenbrand <david-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote: > > > [..] > > >>> Remove the PageReserved flag sounds more reasonable. > > >>> And Could we still have a flag to identify it is a device private memory, or @@ -30,8 +30,4 @@ Added Alex, attached the patch-set. > Barret > > -> -_______________________________________________ -Linux-nvdimm mailing list -Linux-nvdimm@lists.01.org -https://lists.01.org/mailman/listinfo/linux-nvdimm +> diff --git a/a/content_digest b/N1/content_digest index cf7e835..6618701 100644 --- a/a/content_digest +++ b/N1/content_digest @@ -8,29 +8,32 @@ "ref\0CAPcyv4j9K-wkq8oK-8_twWViKhyGSHD7cOE5UoRN-09xKXPq7A@mail.gmail.com\0" "ref\0159bb198-a4a1-0fee-bf57-24c3c28788bd@redhat.com\0" "ref\020181019123348.04ee7dd8@gnomeregan.cam.corp.google.com\0" - "From\0Yi Zhang <yi.z.zhang@linux.intel.com>\0" + "ref\020181019123348.04ee7dd8-5rj//V2oKXqT+5UN0cW69VLMcqb5oVE02SarAXORi/o@public.gmane.org\0" + "From\0Yi Zhang <yi.z.zhang-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>\0" "Subject\0Re: [PATCH V5 4/4] kvm: add a check if pfn is from NVDIMM pmem.\0" "Date\0Mon, 22 Oct 2018 16:47:00 +0800\0" - "To\0Barret Rhoden <brho@google.com>\0" - "Cc\0J\303\251r\303\264me Glisse <jglisse@redhat.com>" - Alexander Duyck <alexander.h.duyck@linux.intel.com> + "To\0Barret Rhoden <brho-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>\0" + "Cc\0J\303\251r\303\264me Glisse <jglisse-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>" + Alexander Duyck <alexander.h.duyck-VuQAYsv1563Yd54FQh9/CA@public.gmane.org> Zhang - Yu C <yu.c.zhang@intel.com> - KVM list <kvm@vger.kernel.org> - linux-nvdimm <linux-nvdimm@lists.01.org> - Jan Kara <jack@suse.cz> - David Hildenbrand <david@redhat.com> - Linux Kernel Mailing List <linux-kernel@vger.kernel.org> - Linux MM <linux-mm@kvack.org> - rkrcmar@redhat.com - Paolo Bonzini <pbonzini@redhat.com> - " Christoph Hellwig <hch@lst.de>\0" + Yu C <yu.c.zhang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> + KVM list <kvm-u79uwXL29TY76Z2rM5mHXA@public.gmane.org> + linux-nvdimm <linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org> + Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org> + David Hildenbrand <david-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> + Linux Kernel Mailing List <linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org> + Linux MM <linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org> + rkrcmar-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org + Paolo Bonzini <pbonzini-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> + Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org> + Zhang + " Yi Z <yi.z.zhang-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>\0" "\00:1\0" "b\0" "On 2018-10-19 at 12:33:48 -0400, Barret Rhoden wrote:\n" - "> On 2018-09-21 at 21:29 David Hildenbrand <david@redhat.com> wrote:\n" + "> On 2018-09-21 at 21:29 David Hildenbrand <david-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:\n" "> > On 21/09/2018 20:17, Dan Williams wrote:\n" - "> > > On Fri, Sep 21, 2018 at 7:24 AM David Hildenbrand <david@redhat.com> wrote:\n" + "> > > On Fri, Sep 21, 2018 at 7:24 AM David Hildenbrand <david-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org> wrote:\n" "> > > [..] \n" "> > >>> Remove the PageReserved flag sounds more reasonable.\n" "> > >>> And Could we still have a flag to identify it is a device private memory, or\n" @@ -59,10 +62,6 @@ "> Barret\n" "> \n" "> \n" - "> \n" - "_______________________________________________\n" - "Linux-nvdimm mailing list\n" - "Linux-nvdimm@lists.01.org\n" - https://lists.01.org/mailman/listinfo/linux-nvdimm + > -fc2f327d6010839dcbbface8f3fb308a7ad10ae6717140856578a9b54d423885 +688e6df0351ebeedfa9de89a5d0ffaa3f285ff449d1a9d2697e4d8496690f8f6
diff --git a/a/1.txt b/N2/1.txt index b634393..416f372 100644 --- a/a/1.txt +++ b/N2/1.txt @@ -30,8 +30,4 @@ Added Alex, attached the patch-set. > Barret > > -> -_______________________________________________ -Linux-nvdimm mailing list -Linux-nvdimm@lists.01.org -https://lists.01.org/mailman/listinfo/linux-nvdimm +> diff --git a/N2/2.1.hdr b/N2/2.1.hdr new file mode 100644 index 0000000..0f1aef1 --- /dev/null +++ b/N2/2.1.hdr @@ -0,0 +1,31 @@ +Return-Path: <alexander.h.duyck@linux.intel.com> +X-Original-To: yi.z.zhang@linux.intel.com +Delivered-To: yi.z.zhang@linux.intel.com +Received: from orsmga001.jf.intel.com (orsmga001.jf.intel.com [10.7.209.18]) + (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) + (No client certificate requested) + by linux.intel.com (Postfix) with ESMTPS id DEBC8580430; + Wed, 17 Oct 2018 16:54:31 -0700 (PDT) +X-ExtLoop1: 1 +X-IronPort-AV: E=Sophos;i="5.54,393,1534834800"; + d="scan'208";a="100366643" +Received: from ahduyck-mobl.amr.corp.intel.com (HELO localhost.localdomain) ([10.7.198.154]) + by orsmga001.jf.intel.com with ESMTP; 17 Oct 2018 16:54:31 -0700 +Subject: [mm PATCH v4 5/6] mm: Add reserved flag setting to set_page_links +From: Alexander Duyck <alexander.h.duyck@linux.intel.com> +To: linux-mm@kvack.org, akpm@linux-foundation.org +Cc: pavel.tatashin@microsoft.com, mhocko@suse.com, dave.jiang@intel.com, + alexander.h.duyck@linux.intel.com, linux-kernel@vger.kernel.org, + willy@infradead.org, davem@davemloft.net, yi.z.zhang@linux.intel.com, + khalid.aziz@oracle.com, rppt@linux.vnet.ibm.com, vbabka@suse.cz, + sparclinux@vger.kernel.org, dan.j.williams@intel.com, + ldufour@linux.vnet.ibm.com, mgorman@techsingularity.net, mingo@kernel.org, + kirill.shutemov@linux.intel.com +Date: Wed, 17 Oct 2018 16:54:31 -0700 +Message-ID: <20181017235431.17213.11512.stgit@localhost.localdomain> +In-Reply-To: <20181017235043.17213.92459.stgit@localhost.localdomain> +References: <20181017235043.17213.92459.stgit@localhost.localdomain> +User-Agent: StGit/0.17.1-dirty +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit diff --git a/N2/2.1.txt b/N2/2.1.txt new file mode 100644 index 0000000..ee09c4a --- /dev/null +++ b/N2/2.1.txt @@ -0,0 +1,114 @@ +This patch modifies the set_page_links function to include the setting of +the reserved flag via a simple AND and OR operation. The motivation for +this is the fact that the existing __set_bit call still seems to have +effects on performance as replacing the call with the AND and OR can reduce +initialization time. + +Looking over the assembly code before and after the change the main +difference between the two is that the reserved bit is stored in a value +that is generated outside of the main initialization loop and is then +written with the other flags field values in one write to the page->flags +value. Previously the generated value was written and then then a btsq +instruction was issued. + +On my x86_64 test system with 3TB of persistent memory per node I saw the +persistent memory initialization time on average drop from 23.49s to +19.12s per node. + +Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com> +--- + include/linux/mm.h | 9 ++++++++- + mm/page_alloc.c | 29 +++++++++++++++++++---------- + 2 files changed, 27 insertions(+), 11 deletions(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 6e2c9631af05..14d06d7d2986 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1171,11 +1171,18 @@ static inline void set_page_node(struct page *page, unsigned long node) + page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; + } + ++static inline void set_page_reserved(struct page *page, bool reserved) ++{ ++ page->flags &= ~(1ul << PG_reserved); ++ page->flags |= (unsigned long)(!!reserved) << PG_reserved; ++} ++ + static inline void set_page_links(struct page *page, enum zone_type zone, +- unsigned long node, unsigned long pfn) ++ unsigned long node, unsigned long pfn, bool reserved) + { + set_page_zone(page, zone); + set_page_node(page, node); ++ set_page_reserved(page, reserved); + #ifdef SECTION_IN_PAGE_FLAGS + set_page_section(page, pfn_to_section_nr(pfn)); + #endif +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index a0b81e0bef03..e7fee7a5f8a3 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1179,7 +1179,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, + unsigned long zone, int nid) + { + mm_zero_struct_page(page); +- set_page_links(page, zone, nid, pfn); ++ set_page_links(page, zone, nid, pfn, false); + init_page_count(page); + page_mapcount_reset(page); + page_cpupid_reset_last(page); +@@ -1195,7 +1195,8 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, + static void __meminit __init_pageblock(unsigned long start_pfn, + unsigned long nr_pages, + unsigned long zone, int nid, +- struct dev_pagemap *pgmap) ++ struct dev_pagemap *pgmap, ++ bool is_reserved) + { + unsigned long nr_pgmask = pageblock_nr_pages - 1; + struct page *start_page = pfn_to_page(start_pfn); +@@ -1231,19 +1232,16 @@ static void __meminit __init_pageblock(unsigned long start_pfn, + * call because of the fact that the pfn number is used to + * get the section_nr and this function should not be + * spanning more than a single section. ++ * ++ * We can use a non-atomic operation for setting the ++ * PG_reserved flag as we are still initializing the pages. + */ +- set_page_links(page, zone, nid, start_pfn); ++ set_page_links(page, zone, nid, start_pfn, is_reserved); + init_page_count(page); + page_mapcount_reset(page); + page_cpupid_reset_last(page); + + /* +- * We can use the non-atomic __set_bit operation for setting +- * the flag as we are still initializing the pages. +- */ +- __SetPageReserved(page); +- +- /* + * ZONE_DEVICE pages union ->lru with a ->pgmap back + * pointer and hmm_data. It is a bug if a ZONE_DEVICE + * page is ever freed or placed on a driver-private list. +@@ -5612,7 +5610,18 @@ static void __meminit __memmap_init_hotplug(unsigned long size, int nid, + pfn = max(ALIGN_DOWN(pfn - 1, pageblock_nr_pages), start_pfn); + stride -= pfn; + +- __init_pageblock(pfn, stride, zone, nid, pgmap); ++ /* ++ * The last argument of __init_pageblock is a boolean ++ * value indicating if the page will be marked as reserved. ++ * ++ * Mark page reserved as it will need to wait for onlining ++ * phase for it to be fully associated with a zone. ++ * ++ * Under certain circumstances ZONE_DEVICE pages may not ++ * need to be marked as reserved, however there is still ++ * code that is depending on this being set for now. ++ */ ++ __init_pageblock(pfn, stride, zone, nid, pgmap, true); + + cond_resched(); + } diff --git a/N2/2.bin b/N2/2.bin new file mode 100644 index 0000000..ae43c6b --- /dev/null +++ b/N2/2.bin @@ -0,0 +1,146 @@ +Return-Path: <alexander.h.duyck@linux.intel.com> +X-Original-To: yi.z.zhang@linux.intel.com +Delivered-To: yi.z.zhang@linux.intel.com +Received: from orsmga001.jf.intel.com (orsmga001.jf.intel.com [10.7.209.18]) + (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) + (No client certificate requested) + by linux.intel.com (Postfix) with ESMTPS id DEBC8580430; + Wed, 17 Oct 2018 16:54:31 -0700 (PDT) +X-ExtLoop1: 1 +X-IronPort-AV: E=Sophos;i="5.54,393,1534834800"; + d="scan'208";a="100366643" +Received: from ahduyck-mobl.amr.corp.intel.com (HELO localhost.localdomain) ([10.7.198.154]) + by orsmga001.jf.intel.com with ESMTP; 17 Oct 2018 16:54:31 -0700 +Subject: [mm PATCH v4 5/6] mm: Add reserved flag setting to set_page_links +From: Alexander Duyck <alexander.h.duyck@linux.intel.com> +To: linux-mm@kvack.org, akpm@linux-foundation.org +Cc: pavel.tatashin@microsoft.com, mhocko@suse.com, dave.jiang@intel.com, + alexander.h.duyck@linux.intel.com, linux-kernel@vger.kernel.org, + willy@infradead.org, davem@davemloft.net, yi.z.zhang@linux.intel.com, + khalid.aziz@oracle.com, rppt@linux.vnet.ibm.com, vbabka@suse.cz, + sparclinux@vger.kernel.org, dan.j.williams@intel.com, + ldufour@linux.vnet.ibm.com, mgorman@techsingularity.net, mingo@kernel.org, + kirill.shutemov@linux.intel.com +Date: Wed, 17 Oct 2018 16:54:31 -0700 +Message-ID: <20181017235431.17213.11512.stgit@localhost.localdomain> +In-Reply-To: <20181017235043.17213.92459.stgit@localhost.localdomain> +References: <20181017235043.17213.92459.stgit@localhost.localdomain> +User-Agent: StGit/0.17.1-dirty +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit + +This patch modifies the set_page_links function to include the setting of +the reserved flag via a simple AND and OR operation. The motivation for +this is the fact that the existing __set_bit call still seems to have +effects on performance as replacing the call with the AND and OR can reduce +initialization time. + +Looking over the assembly code before and after the change the main +difference between the two is that the reserved bit is stored in a value +that is generated outside of the main initialization loop and is then +written with the other flags field values in one write to the page->flags +value. Previously the generated value was written and then then a btsq +instruction was issued. + +On my x86_64 test system with 3TB of persistent memory per node I saw the +persistent memory initialization time on average drop from 23.49s to +19.12s per node. + +Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com> +--- + include/linux/mm.h | 9 ++++++++- + mm/page_alloc.c | 29 +++++++++++++++++++---------- + 2 files changed, 27 insertions(+), 11 deletions(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 6e2c9631af05..14d06d7d2986 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1171,11 +1171,18 @@ static inline void set_page_node(struct page *page, unsigned long node) + page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; + } + ++static inline void set_page_reserved(struct page *page, bool reserved) ++{ ++ page->flags &= ~(1ul << PG_reserved); ++ page->flags |= (unsigned long)(!!reserved) << PG_reserved; ++} ++ + static inline void set_page_links(struct page *page, enum zone_type zone, +- unsigned long node, unsigned long pfn) ++ unsigned long node, unsigned long pfn, bool reserved) + { + set_page_zone(page, zone); + set_page_node(page, node); ++ set_page_reserved(page, reserved); + #ifdef SECTION_IN_PAGE_FLAGS + set_page_section(page, pfn_to_section_nr(pfn)); + #endif +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index a0b81e0bef03..e7fee7a5f8a3 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1179,7 +1179,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, + unsigned long zone, int nid) + { + mm_zero_struct_page(page); +- set_page_links(page, zone, nid, pfn); ++ set_page_links(page, zone, nid, pfn, false); + init_page_count(page); + page_mapcount_reset(page); + page_cpupid_reset_last(page); +@@ -1195,7 +1195,8 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, + static void __meminit __init_pageblock(unsigned long start_pfn, + unsigned long nr_pages, + unsigned long zone, int nid, +- struct dev_pagemap *pgmap) ++ struct dev_pagemap *pgmap, ++ bool is_reserved) + { + unsigned long nr_pgmask = pageblock_nr_pages - 1; + struct page *start_page = pfn_to_page(start_pfn); +@@ -1231,19 +1232,16 @@ static void __meminit __init_pageblock(unsigned long start_pfn, + * call because of the fact that the pfn number is used to + * get the section_nr and this function should not be + * spanning more than a single section. ++ * ++ * We can use a non-atomic operation for setting the ++ * PG_reserved flag as we are still initializing the pages. + */ +- set_page_links(page, zone, nid, start_pfn); ++ set_page_links(page, zone, nid, start_pfn, is_reserved); + init_page_count(page); + page_mapcount_reset(page); + page_cpupid_reset_last(page); + + /* +- * We can use the non-atomic __set_bit operation for setting +- * the flag as we are still initializing the pages. +- */ +- __SetPageReserved(page); +- +- /* + * ZONE_DEVICE pages union ->lru with a ->pgmap back + * pointer and hmm_data. It is a bug if a ZONE_DEVICE + * page is ever freed or placed on a driver-private list. +@@ -5612,7 +5610,18 @@ static void __meminit __memmap_init_hotplug(unsigned long size, int nid, + pfn = max(ALIGN_DOWN(pfn - 1, pageblock_nr_pages), start_pfn); + stride -= pfn; + +- __init_pageblock(pfn, stride, zone, nid, pgmap); ++ /* ++ * The last argument of __init_pageblock is a boolean ++ * value indicating if the page will be marked as reserved. ++ * ++ * Mark page reserved as it will need to wait for onlining ++ * phase for it to be fully associated with a zone. ++ * ++ * Under certain circumstances ZONE_DEVICE pages may not ++ * need to be marked as reserved, however there is still ++ * code that is depending on this being set for now. ++ */ ++ __init_pageblock(pfn, stride, zone, nid, pgmap, true); + + cond_resched(); + } diff --git a/N2/2.hdr b/N2/2.hdr new file mode 100644 index 0000000..976a421 --- /dev/null +++ b/N2/2.hdr @@ -0,0 +1,2 @@ +Content-Type: message/rfc822 +Content-Disposition: inline diff --git a/a/content_digest b/N2/content_digest index cf7e835..a7ab36b 100644 --- a/a/content_digest +++ b/N2/content_digest @@ -12,20 +12,25 @@ "Subject\0Re: [PATCH V5 4/4] kvm: add a check if pfn is from NVDIMM pmem.\0" "Date\0Mon, 22 Oct 2018 16:47:00 +0800\0" "To\0Barret Rhoden <brho@google.com>\0" - "Cc\0J\303\251r\303\264me Glisse <jglisse@redhat.com>" - Alexander Duyck <alexander.h.duyck@linux.intel.com> - Zhang - Yu C <yu.c.zhang@intel.com> + "Cc\0David Hildenbrand <david@redhat.com>" + Dan Williams <dan.j.williams@intel.com> KVM list <kvm@vger.kernel.org> + Linux Kernel Mailing List <linux-kernel@vger.kernel.org> linux-nvdimm <linux-nvdimm@lists.01.org> + Paolo Bonzini <pbonzini@redhat.com> + Dave Jiang <dave.jiang@intel.com> + Zhang + Yu C <yu.c.zhang@intel.com> + Pankaj Gupta <pagupta@redhat.com> Jan Kara <jack@suse.cz> - David Hildenbrand <david@redhat.com> - Linux Kernel Mailing List <linux-kernel@vger.kernel.org> + Christoph Hellwig <hch@lst.de> Linux MM <linux-mm@kvack.org> rkrcmar@redhat.com - Paolo Bonzini <pbonzini@redhat.com> - " Christoph Hellwig <hch@lst.de>\0" - "\00:1\0" + " J\303\251r\303\264me Glisse <jglisse@redhat.com>" + Zhang + Yi Z <yi.z.zhang@intel.com> + " Alexander Duyck <alexander.h.duyck@linux.intel.com>\0" + "\01:1\0" "b\0" "On 2018-10-19 at 12:33:48 -0400, Barret Rhoden wrote:\n" "> On 2018-09-21 at 21:29 David Hildenbrand <david@redhat.com> wrote:\n" @@ -59,10 +64,271 @@ "> Barret\n" "> \n" "> \n" - "> \n" - "_______________________________________________\n" - "Linux-nvdimm mailing list\n" - "Linux-nvdimm@lists.01.org\n" - https://lists.01.org/mailman/listinfo/linux-nvdimm + > + "\01:2\0" + "b\0" + "Return-Path: <alexander.h.duyck@linux.intel.com>\n" + "X-Original-To: yi.z.zhang@linux.intel.com\n" + "Delivered-To: yi.z.zhang@linux.intel.com\n" + "Received: from orsmga001.jf.intel.com (orsmga001.jf.intel.com [10.7.209.18])\n" + "\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n" + "\t(No client certificate requested)\n" + "\tby linux.intel.com (Postfix) with ESMTPS id DEBC8580430;\n" + "\tWed, 17 Oct 2018 16:54:31 -0700 (PDT)\n" + "X-ExtLoop1: 1\n" + "X-IronPort-AV: E=Sophos;i=\"5.54,393,1534834800\";\n" + " d=\"scan'208\";a=\"100366643\"\n" + "Received: from ahduyck-mobl.amr.corp.intel.com (HELO localhost.localdomain) ([10.7.198.154])\n" + " by orsmga001.jf.intel.com with ESMTP; 17 Oct 2018 16:54:31 -0700\n" + "Subject: [mm PATCH v4 5/6] mm: Add reserved flag setting to set_page_links\n" + "From: Alexander Duyck <alexander.h.duyck@linux.intel.com>\n" + "To: linux-mm@kvack.org, akpm@linux-foundation.org\n" + "Cc: pavel.tatashin@microsoft.com, mhocko@suse.com, dave.jiang@intel.com,\n" + " alexander.h.duyck@linux.intel.com, linux-kernel@vger.kernel.org,\n" + " willy@infradead.org, davem@davemloft.net, yi.z.zhang@linux.intel.com,\n" + " khalid.aziz@oracle.com, rppt@linux.vnet.ibm.com, vbabka@suse.cz,\n" + " sparclinux@vger.kernel.org, dan.j.williams@intel.com,\n" + " ldufour@linux.vnet.ibm.com, mgorman@techsingularity.net, mingo@kernel.org,\n" + " kirill.shutemov@linux.intel.com\n" + "Date: Wed, 17 Oct 2018 16:54:31 -0700\n" + "Message-ID: <20181017235431.17213.11512.stgit@localhost.localdomain>\n" + "In-Reply-To: <20181017235043.17213.92459.stgit@localhost.localdomain>\n" + "References: <20181017235043.17213.92459.stgit@localhost.localdomain>\n" + "User-Agent: StGit/0.17.1-dirty\n" + "MIME-Version: 1.0\n" + "Content-Type: text/plain; charset=\"utf-8\"\n" + "Content-Transfer-Encoding: 7bit\n" + "\n" + "This patch modifies the set_page_links function to include the setting of\n" + "the reserved flag via a simple AND and OR operation. The motivation for\n" + "this is the fact that the existing __set_bit call still seems to have\n" + "effects on performance as replacing the call with the AND and OR can reduce\n" + "initialization time.\n" + "\n" + "Looking over the assembly code before and after the change the main\n" + "difference between the two is that the reserved bit is stored in a value\n" + "that is generated outside of the main initialization loop and is then\n" + "written with the other flags field values in one write to the page->flags\n" + "value. Previously the generated value was written and then then a btsq\n" + "instruction was issued.\n" + "\n" + "On my x86_64 test system with 3TB of persistent memory per node I saw the\n" + "persistent memory initialization time on average drop from 23.49s to\n" + "19.12s per node.\n" + "\n" + "Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>\n" + "---\n" + " include/linux/mm.h | 9 ++++++++-\n" + " mm/page_alloc.c | 29 +++++++++++++++++++----------\n" + " 2 files changed, 27 insertions(+), 11 deletions(-)\n" + "\n" + "diff --git a/include/linux/mm.h b/include/linux/mm.h\n" + "index 6e2c9631af05..14d06d7d2986 100644\n" + "--- a/include/linux/mm.h\n" + "+++ b/include/linux/mm.h\n" + "@@ -1171,11 +1171,18 @@ static inline void set_page_node(struct page *page, unsigned long node)\n" + " \tpage->flags |= (node & NODES_MASK) << NODES_PGSHIFT;\n" + " }\n" + " \n" + "+static inline void set_page_reserved(struct page *page, bool reserved)\n" + "+{\n" + "+\tpage->flags &= ~(1ul << PG_reserved);\n" + "+\tpage->flags |= (unsigned long)(!!reserved) << PG_reserved;\n" + "+}\n" + "+\n" + " static inline void set_page_links(struct page *page, enum zone_type zone,\n" + "-\tunsigned long node, unsigned long pfn)\n" + "+\tunsigned long node, unsigned long pfn, bool reserved)\n" + " {\n" + " \tset_page_zone(page, zone);\n" + " \tset_page_node(page, node);\n" + "+\tset_page_reserved(page, reserved);\n" + " #ifdef SECTION_IN_PAGE_FLAGS\n" + " \tset_page_section(page, pfn_to_section_nr(pfn));\n" + " #endif\n" + "diff --git a/mm/page_alloc.c b/mm/page_alloc.c\n" + "index a0b81e0bef03..e7fee7a5f8a3 100644\n" + "--- a/mm/page_alloc.c\n" + "+++ b/mm/page_alloc.c\n" + "@@ -1179,7 +1179,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,\n" + " \t\t\t\tunsigned long zone, int nid)\n" + " {\n" + " \tmm_zero_struct_page(page);\n" + "-\tset_page_links(page, zone, nid, pfn);\n" + "+\tset_page_links(page, zone, nid, pfn, false);\n" + " \tinit_page_count(page);\n" + " \tpage_mapcount_reset(page);\n" + " \tpage_cpupid_reset_last(page);\n" + "@@ -1195,7 +1195,8 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,\n" + " static void __meminit __init_pageblock(unsigned long start_pfn,\n" + " \t\t\t\t unsigned long nr_pages,\n" + " \t\t\t\t unsigned long zone, int nid,\n" + "-\t\t\t\t struct dev_pagemap *pgmap)\n" + "+\t\t\t\t struct dev_pagemap *pgmap,\n" + "+\t\t\t\t bool is_reserved)\n" + " {\n" + " \tunsigned long nr_pgmask = pageblock_nr_pages - 1;\n" + " \tstruct page *start_page = pfn_to_page(start_pfn);\n" + "@@ -1231,19 +1232,16 @@ static void __meminit __init_pageblock(unsigned long start_pfn,\n" + " \t\t * call because of the fact that the pfn number is used to\n" + " \t\t * get the section_nr and this function should not be\n" + " \t\t * spanning more than a single section.\n" + "+\t\t *\n" + "+\t\t * We can use a non-atomic operation for setting the\n" + "+\t\t * PG_reserved flag as we are still initializing the pages.\n" + " \t\t */\n" + "-\t\tset_page_links(page, zone, nid, start_pfn);\n" + "+\t\tset_page_links(page, zone, nid, start_pfn, is_reserved);\n" + " \t\tinit_page_count(page);\n" + " \t\tpage_mapcount_reset(page);\n" + " \t\tpage_cpupid_reset_last(page);\n" + " \n" + " \t\t/*\n" + "-\t\t * We can use the non-atomic __set_bit operation for setting\n" + "-\t\t * the flag as we are still initializing the pages.\n" + "-\t\t */\n" + "-\t\t__SetPageReserved(page);\n" + "-\n" + "-\t\t/*\n" + " \t\t * ZONE_DEVICE pages union ->lru with a ->pgmap back\n" + " \t\t * pointer and hmm_data. It is a bug if a ZONE_DEVICE\n" + " \t\t * page is ever freed or placed on a driver-private list.\n" + "@@ -5612,7 +5610,18 @@ static void __meminit __memmap_init_hotplug(unsigned long size, int nid,\n" + " \t\tpfn = max(ALIGN_DOWN(pfn - 1, pageblock_nr_pages), start_pfn);\n" + " \t\tstride -= pfn;\n" + " \n" + "-\t\t__init_pageblock(pfn, stride, zone, nid, pgmap);\n" + "+\t\t/*\n" + "+\t\t * The last argument of __init_pageblock is a boolean\n" + "+\t\t * value indicating if the page will be marked as reserved.\n" + "+\t\t *\n" + "+\t\t * Mark page reserved as it will need to wait for onlining\n" + "+\t\t * phase for it to be fully associated with a zone.\n" + "+\t\t *\n" + "+\t\t * Under certain circumstances ZONE_DEVICE pages may not\n" + "+\t\t * need to be marked as reserved, however there is still\n" + "+\t\t * code that is depending on this being set for now.\n" + "+\t\t */\n" + "+\t\t__init_pageblock(pfn, stride, zone, nid, pgmap, true);\n" + " \n" + " \t\tcond_resched();\n" + " \t}\n" + "\n" + "\02:2.1\0" + "b\0" + "This patch modifies the set_page_links function to include the setting of\n" + "the reserved flag via a simple AND and OR operation. The motivation for\n" + "this is the fact that the existing __set_bit call still seems to have\n" + "effects on performance as replacing the call with the AND and OR can reduce\n" + "initialization time.\n" + "\n" + "Looking over the assembly code before and after the change the main\n" + "difference between the two is that the reserved bit is stored in a value\n" + "that is generated outside of the main initialization loop and is then\n" + "written with the other flags field values in one write to the page->flags\n" + "value. Previously the generated value was written and then then a btsq\n" + "instruction was issued.\n" + "\n" + "On my x86_64 test system with 3TB of persistent memory per node I saw the\n" + "persistent memory initialization time on average drop from 23.49s to\n" + "19.12s per node.\n" + "\n" + "Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>\n" + "---\n" + " include/linux/mm.h | 9 ++++++++-\n" + " mm/page_alloc.c | 29 +++++++++++++++++++----------\n" + " 2 files changed, 27 insertions(+), 11 deletions(-)\n" + "\n" + "diff --git a/include/linux/mm.h b/include/linux/mm.h\n" + "index 6e2c9631af05..14d06d7d2986 100644\n" + "--- a/include/linux/mm.h\n" + "+++ b/include/linux/mm.h\n" + "@@ -1171,11 +1171,18 @@ static inline void set_page_node(struct page *page, unsigned long node)\n" + " \tpage->flags |= (node & NODES_MASK) << NODES_PGSHIFT;\n" + " }\n" + " \n" + "+static inline void set_page_reserved(struct page *page, bool reserved)\n" + "+{\n" + "+\tpage->flags &= ~(1ul << PG_reserved);\n" + "+\tpage->flags |= (unsigned long)(!!reserved) << PG_reserved;\n" + "+}\n" + "+\n" + " static inline void set_page_links(struct page *page, enum zone_type zone,\n" + "-\tunsigned long node, unsigned long pfn)\n" + "+\tunsigned long node, unsigned long pfn, bool reserved)\n" + " {\n" + " \tset_page_zone(page, zone);\n" + " \tset_page_node(page, node);\n" + "+\tset_page_reserved(page, reserved);\n" + " #ifdef SECTION_IN_PAGE_FLAGS\n" + " \tset_page_section(page, pfn_to_section_nr(pfn));\n" + " #endif\n" + "diff --git a/mm/page_alloc.c b/mm/page_alloc.c\n" + "index a0b81e0bef03..e7fee7a5f8a3 100644\n" + "--- a/mm/page_alloc.c\n" + "+++ b/mm/page_alloc.c\n" + "@@ -1179,7 +1179,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,\n" + " \t\t\t\tunsigned long zone, int nid)\n" + " {\n" + " \tmm_zero_struct_page(page);\n" + "-\tset_page_links(page, zone, nid, pfn);\n" + "+\tset_page_links(page, zone, nid, pfn, false);\n" + " \tinit_page_count(page);\n" + " \tpage_mapcount_reset(page);\n" + " \tpage_cpupid_reset_last(page);\n" + "@@ -1195,7 +1195,8 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,\n" + " static void __meminit __init_pageblock(unsigned long start_pfn,\n" + " \t\t\t\t unsigned long nr_pages,\n" + " \t\t\t\t unsigned long zone, int nid,\n" + "-\t\t\t\t struct dev_pagemap *pgmap)\n" + "+\t\t\t\t struct dev_pagemap *pgmap,\n" + "+\t\t\t\t bool is_reserved)\n" + " {\n" + " \tunsigned long nr_pgmask = pageblock_nr_pages - 1;\n" + " \tstruct page *start_page = pfn_to_page(start_pfn);\n" + "@@ -1231,19 +1232,16 @@ static void __meminit __init_pageblock(unsigned long start_pfn,\n" + " \t\t * call because of the fact that the pfn number is used to\n" + " \t\t * get the section_nr and this function should not be\n" + " \t\t * spanning more than a single section.\n" + "+\t\t *\n" + "+\t\t * We can use a non-atomic operation for setting the\n" + "+\t\t * PG_reserved flag as we are still initializing the pages.\n" + " \t\t */\n" + "-\t\tset_page_links(page, zone, nid, start_pfn);\n" + "+\t\tset_page_links(page, zone, nid, start_pfn, is_reserved);\n" + " \t\tinit_page_count(page);\n" + " \t\tpage_mapcount_reset(page);\n" + " \t\tpage_cpupid_reset_last(page);\n" + " \n" + " \t\t/*\n" + "-\t\t * We can use the non-atomic __set_bit operation for setting\n" + "-\t\t * the flag as we are still initializing the pages.\n" + "-\t\t */\n" + "-\t\t__SetPageReserved(page);\n" + "-\n" + "-\t\t/*\n" + " \t\t * ZONE_DEVICE pages union ->lru with a ->pgmap back\n" + " \t\t * pointer and hmm_data. It is a bug if a ZONE_DEVICE\n" + " \t\t * page is ever freed or placed on a driver-private list.\n" + "@@ -5612,7 +5610,18 @@ static void __meminit __memmap_init_hotplug(unsigned long size, int nid,\n" + " \t\tpfn = max(ALIGN_DOWN(pfn - 1, pageblock_nr_pages), start_pfn);\n" + " \t\tstride -= pfn;\n" + " \n" + "-\t\t__init_pageblock(pfn, stride, zone, nid, pgmap);\n" + "+\t\t/*\n" + "+\t\t * The last argument of __init_pageblock is a boolean\n" + "+\t\t * value indicating if the page will be marked as reserved.\n" + "+\t\t *\n" + "+\t\t * Mark page reserved as it will need to wait for onlining\n" + "+\t\t * phase for it to be fully associated with a zone.\n" + "+\t\t *\n" + "+\t\t * Under certain circumstances ZONE_DEVICE pages may not\n" + "+\t\t * need to be marked as reserved, however there is still\n" + "+\t\t * code that is depending on this being set for now.\n" + "+\t\t */\n" + "+\t\t__init_pageblock(pfn, stride, zone, nid, pgmap, true);\n" + " \n" + " \t\tcond_resched();\n" + " \t}" -fc2f327d6010839dcbbface8f3fb308a7ad10ae6717140856578a9b54d423885 +fbb28b75dfb436cebbd9d5cb12912067c784733be66952998110cf8896a9d668
diff --git a/a/1.txt b/N3/1.txt index b634393..416f372 100644 --- a/a/1.txt +++ b/N3/1.txt @@ -30,8 +30,4 @@ Added Alex, attached the patch-set. > Barret > > -> -_______________________________________________ -Linux-nvdimm mailing list -Linux-nvdimm@lists.01.org -https://lists.01.org/mailman/listinfo/linux-nvdimm +> diff --git a/N3/2.1.hdr b/N3/2.1.hdr new file mode 100644 index 0000000..e057038 --- /dev/null +++ b/N3/2.1.hdr @@ -0,0 +1,31 @@ +Return-Path: <alexander.h.duyck@linux.intel.com> +X-Original-To: yi.z.zhang@linux.intel.com +Delivered-To: yi.z.zhang@linux.intel.com +Received: from orsmga001.jf.intel.com (orsmga001.jf.intel.com [10.7.209.18]) + (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) + (No client certificate requested) + by linux.intel.com (Postfix) with ESMTPS id DEBC8580430; + Wed, 17 Oct 2018 16:54:31 -0700 (PDT) +X-ExtLoop1: 1 +X-IronPort-AV: E=Sophos;i="5.54,393,1534834800"; + d="scan'208";a="100366643" +Received: from ahduyck-mobl.amr.corp.intel.com (HELO localhost.localdomain) ([10.7.198.154]) + by orsmga001.jf.intel.com with ESMTP; 17 Oct 2018 16:54:31 -0700 +Subject: [mm PATCH v4 5/6] mm: Add reserved flag setting to set_page_links +From: Alexander Duyck <alexander.h.duyck@linux.intel.com> +To: linux-mm@kvack.org, akpm@linux-foundation.org +Cc: pavel.tatashin@microsoft.com, mhocko@suse.com, dave.jiang@intel.com, + alexander.h.duyck@linux.intel.com, linux-kernel@vger.kernel.org, + willy@infradead.org, davem@davemloft.net, yi.z.zhang@linux.intel.com, + khalid.aziz@oracle.com, rppt@linux.vnet.ibm.com, vbabka@suse.cz, + sparclinux@vger.kernel.org, dan.j.williams@intel.com, + ldufour@linux.vnet.ibm.com, mgorman@techsingularity.net, mingo@kernel.org, + kirill.shutemov@linux.intel.com +Date: Wed, 17 Oct 2018 16:54:31 -0700 +Message-ID: <20181017235431.17213.11512.stgit@localhost.localdomain> +In-Reply-To: <20181017235043.17213.92459.stgit@localhost.localdomain> +References: <20181017235043.17213.92459.stgit@localhost.localdomain> +User-Agent: StGit/0.17.1-dirty +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit diff --git a/N3/2.1.txt b/N3/2.1.txt new file mode 100644 index 0000000..ee09c4a --- /dev/null +++ b/N3/2.1.txt @@ -0,0 +1,114 @@ +This patch modifies the set_page_links function to include the setting of +the reserved flag via a simple AND and OR operation. The motivation for +this is the fact that the existing __set_bit call still seems to have +effects on performance as replacing the call with the AND and OR can reduce +initialization time. + +Looking over the assembly code before and after the change the main +difference between the two is that the reserved bit is stored in a value +that is generated outside of the main initialization loop and is then +written with the other flags field values in one write to the page->flags +value. Previously the generated value was written and then then a btsq +instruction was issued. + +On my x86_64 test system with 3TB of persistent memory per node I saw the +persistent memory initialization time on average drop from 23.49s to +19.12s per node. + +Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com> +--- + include/linux/mm.h | 9 ++++++++- + mm/page_alloc.c | 29 +++++++++++++++++++---------- + 2 files changed, 27 insertions(+), 11 deletions(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 6e2c9631af05..14d06d7d2986 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1171,11 +1171,18 @@ static inline void set_page_node(struct page *page, unsigned long node) + page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; + } + ++static inline void set_page_reserved(struct page *page, bool reserved) ++{ ++ page->flags &= ~(1ul << PG_reserved); ++ page->flags |= (unsigned long)(!!reserved) << PG_reserved; ++} ++ + static inline void set_page_links(struct page *page, enum zone_type zone, +- unsigned long node, unsigned long pfn) ++ unsigned long node, unsigned long pfn, bool reserved) + { + set_page_zone(page, zone); + set_page_node(page, node); ++ set_page_reserved(page, reserved); + #ifdef SECTION_IN_PAGE_FLAGS + set_page_section(page, pfn_to_section_nr(pfn)); + #endif +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index a0b81e0bef03..e7fee7a5f8a3 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1179,7 +1179,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, + unsigned long zone, int nid) + { + mm_zero_struct_page(page); +- set_page_links(page, zone, nid, pfn); ++ set_page_links(page, zone, nid, pfn, false); + init_page_count(page); + page_mapcount_reset(page); + page_cpupid_reset_last(page); +@@ -1195,7 +1195,8 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, + static void __meminit __init_pageblock(unsigned long start_pfn, + unsigned long nr_pages, + unsigned long zone, int nid, +- struct dev_pagemap *pgmap) ++ struct dev_pagemap *pgmap, ++ bool is_reserved) + { + unsigned long nr_pgmask = pageblock_nr_pages - 1; + struct page *start_page = pfn_to_page(start_pfn); +@@ -1231,19 +1232,16 @@ static void __meminit __init_pageblock(unsigned long start_pfn, + * call because of the fact that the pfn number is used to + * get the section_nr and this function should not be + * spanning more than a single section. ++ * ++ * We can use a non-atomic operation for setting the ++ * PG_reserved flag as we are still initializing the pages. + */ +- set_page_links(page, zone, nid, start_pfn); ++ set_page_links(page, zone, nid, start_pfn, is_reserved); + init_page_count(page); + page_mapcount_reset(page); + page_cpupid_reset_last(page); + + /* +- * We can use the non-atomic __set_bit operation for setting +- * the flag as we are still initializing the pages. +- */ +- __SetPageReserved(page); +- +- /* + * ZONE_DEVICE pages union ->lru with a ->pgmap back + * pointer and hmm_data. It is a bug if a ZONE_DEVICE + * page is ever freed or placed on a driver-private list. +@@ -5612,7 +5610,18 @@ static void __meminit __memmap_init_hotplug(unsigned long size, int nid, + pfn = max(ALIGN_DOWN(pfn - 1, pageblock_nr_pages), start_pfn); + stride -= pfn; + +- __init_pageblock(pfn, stride, zone, nid, pgmap); ++ /* ++ * The last argument of __init_pageblock is a boolean ++ * value indicating if the page will be marked as reserved. ++ * ++ * Mark page reserved as it will need to wait for onlining ++ * phase for it to be fully associated with a zone. ++ * ++ * Under certain circumstances ZONE_DEVICE pages may not ++ * need to be marked as reserved, however there is still ++ * code that is depending on this being set for now. ++ */ ++ __init_pageblock(pfn, stride, zone, nid, pgmap, true); + + cond_resched(); + } diff --git a/N3/2.bin b/N3/2.bin new file mode 100644 index 0000000..d65fe6c --- /dev/null +++ b/N3/2.bin @@ -0,0 +1,146 @@ +Return-Path: <alexander.h.duyck@linux.intel.com> +X-Original-To: yi.z.zhang@linux.intel.com +Delivered-To: yi.z.zhang@linux.intel.com +Received: from orsmga001.jf.intel.com (orsmga001.jf.intel.com [10.7.209.18]) + (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) + (No client certificate requested) + by linux.intel.com (Postfix) with ESMTPS id DEBC8580430; + Wed, 17 Oct 2018 16:54:31 -0700 (PDT) +X-ExtLoop1: 1 +X-IronPort-AV: E=Sophos;i="5.54,393,1534834800"; + d="scan'208";a="100366643" +Received: from ahduyck-mobl.amr.corp.intel.com (HELO localhost.localdomain) ([10.7.198.154]) + by orsmga001.jf.intel.com with ESMTP; 17 Oct 2018 16:54:31 -0700 +Subject: [mm PATCH v4 5/6] mm: Add reserved flag setting to set_page_links +From: Alexander Duyck <alexander.h.duyck@linux.intel.com> +To: linux-mm@kvack.org, akpm@linux-foundation.org +Cc: pavel.tatashin@microsoft.com, mhocko@suse.com, dave.jiang@intel.com, + alexander.h.duyck@linux.intel.com, linux-kernel@vger.kernel.org, + willy@infradead.org, davem@davemloft.net, yi.z.zhang@linux.intel.com, + khalid.aziz@oracle.com, rppt@linux.vnet.ibm.com, vbabka@suse.cz, + sparclinux@vger.kernel.org, dan.j.williams@intel.com, + ldufour@linux.vnet.ibm.com, mgorman@techsingularity.net, mingo@kernel.org, + kirill.shutemov@linux.intel.com +Date: Wed, 17 Oct 2018 16:54:31 -0700 +Message-ID: <20181017235431.17213.11512.stgit@localhost.localdomain> +In-Reply-To: <20181017235043.17213.92459.stgit@localhost.localdomain> +References: <20181017235043.17213.92459.stgit@localhost.localdomain> +User-Agent: StGit/0.17.1-dirty +MIME-Version: 1.0 +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: 7bit + +This patch modifies the set_page_links function to include the setting of +the reserved flag via a simple AND and OR operation. The motivation for +this is the fact that the existing __set_bit call still seems to have +effects on performance as replacing the call with the AND and OR can reduce +initialization time. + +Looking over the assembly code before and after the change the main +difference between the two is that the reserved bit is stored in a value +that is generated outside of the main initialization loop and is then +written with the other flags field values in one write to the page->flags +value. Previously the generated value was written and then then a btsq +instruction was issued. + +On my x86_64 test system with 3TB of persistent memory per node I saw the +persistent memory initialization time on average drop from 23.49s to +19.12s per node. + +Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com> +--- + include/linux/mm.h | 9 ++++++++- + mm/page_alloc.c | 29 +++++++++++++++++++---------- + 2 files changed, 27 insertions(+), 11 deletions(-) + +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 6e2c9631af05..14d06d7d2986 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1171,11 +1171,18 @@ static inline void set_page_node(struct page *page, unsigned long node) + page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; + } + ++static inline void set_page_reserved(struct page *page, bool reserved) ++{ ++ page->flags &= ~(1ul << PG_reserved); ++ page->flags |= (unsigned long)(!!reserved) << PG_reserved; ++} ++ + static inline void set_page_links(struct page *page, enum zone_type zone, +- unsigned long node, unsigned long pfn) ++ unsigned long node, unsigned long pfn, bool reserved) + { + set_page_zone(page, zone); + set_page_node(page, node); ++ set_page_reserved(page, reserved); + #ifdef SECTION_IN_PAGE_FLAGS + set_page_section(page, pfn_to_section_nr(pfn)); + #endif +diff --git a/mm/page_alloc.c b/mm/page_alloc.c +index a0b81e0bef03..e7fee7a5f8a3 100644 +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1179,7 +1179,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, + unsigned long zone, int nid) + { + mm_zero_struct_page(page); +- set_page_links(page, zone, nid, pfn); ++ set_page_links(page, zone, nid, pfn, false); + init_page_count(page); + page_mapcount_reset(page); + page_cpupid_reset_last(page); +@@ -1195,7 +1195,8 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn, + static void __meminit __init_pageblock(unsigned long start_pfn, + unsigned long nr_pages, + unsigned long zone, int nid, +- struct dev_pagemap *pgmap) ++ struct dev_pagemap *pgmap, ++ bool is_reserved) + { + unsigned long nr_pgmask = pageblock_nr_pages - 1; + struct page *start_page = pfn_to_page(start_pfn); +@@ -1231,19 +1232,16 @@ static void __meminit __init_pageblock(unsigned long start_pfn, + * call because of the fact that the pfn number is used to + * get the section_nr and this function should not be + * spanning more than a single section. ++ * ++ * We can use a non-atomic operation for setting the ++ * PG_reserved flag as we are still initializing the pages. + */ +- set_page_links(page, zone, nid, start_pfn); ++ set_page_links(page, zone, nid, start_pfn, is_reserved); + init_page_count(page); + page_mapcount_reset(page); + page_cpupid_reset_last(page); + + /* +- * We can use the non-atomic __set_bit operation for setting +- * the flag as we are still initializing the pages. +- */ +- __SetPageReserved(page); +- +- /* + * ZONE_DEVICE pages union ->lru with a ->pgmap back + * pointer and hmm_data. It is a bug if a ZONE_DEVICE + * page is ever freed or placed on a driver-private list. +@@ -5612,7 +5610,18 @@ static void __meminit __memmap_init_hotplug(unsigned long size, int nid, + pfn = max(ALIGN_DOWN(pfn - 1, pageblock_nr_pages), start_pfn); + stride -= pfn; + +- __init_pageblock(pfn, stride, zone, nid, pgmap); ++ /* ++ * The last argument of __init_pageblock is a boolean ++ * value indicating if the page will be marked as reserved. ++ * ++ * Mark page reserved as it will need to wait for onlining ++ * phase for it to be fully associated with a zone. ++ * ++ * Under certain circumstances ZONE_DEVICE pages may not ++ * need to be marked as reserved, however there is still ++ * code that is depending on this being set for now. ++ */ ++ __init_pageblock(pfn, stride, zone, nid, pgmap, true); + + cond_resched(); + } diff --git a/N3/2.hdr b/N3/2.hdr new file mode 100644 index 0000000..976a421 --- /dev/null +++ b/N3/2.hdr @@ -0,0 +1,2 @@ +Content-Type: message/rfc822 +Content-Disposition: inline diff --git a/a/content_digest b/N3/content_digest index cf7e835..a2ec0de 100644 --- a/a/content_digest +++ b/N3/content_digest @@ -12,20 +12,25 @@ "Subject\0Re: [PATCH V5 4/4] kvm: add a check if pfn is from NVDIMM pmem.\0" "Date\0Mon, 22 Oct 2018 16:47:00 +0800\0" "To\0Barret Rhoden <brho@google.com>\0" - "Cc\0J\303\251r\303\264me Glisse <jglisse@redhat.com>" - Alexander Duyck <alexander.h.duyck@linux.intel.com> - Zhang - Yu C <yu.c.zhang@intel.com> + "Cc\0David Hildenbrand <david@redhat.com>" + Dan Williams <dan.j.williams@intel.com> KVM list <kvm@vger.kernel.org> + Linux Kernel Mailing List <linux-kernel@vger.kernel.org> linux-nvdimm <linux-nvdimm@lists.01.org> + Paolo Bonzini <pbonzini@redhat.com> + Dave Jiang <dave.jiang@intel.com> + Zhang + Yu C <yu.c.zhang@intel.com> + Pankaj Gupta <pagupta@redhat.com> Jan Kara <jack@suse.cz> - David Hildenbrand <david@redhat.com> - Linux Kernel Mailing List <linux-kernel@vger.kernel.org> + Christoph Hellwig <hch@lst.de> Linux MM <linux-mm@kvack.org> rkrcmar@redhat.com - Paolo Bonzini <pbonzini@redhat.com> - " Christoph Hellwig <hch@lst.de>\0" - "\00:1\0" + " J\303\251r\303\264me Glisse <jglisse@redhat.com>" + Zhang + Yi Z <yi.z.zhang@intel.com> + " Alexander Duyck <alexander.h.duyck@linux.intel.com>\0" + "\01:1\0" "b\0" "On 2018-10-19 at 12:33:48 -0400, Barret Rhoden wrote:\n" "> On 2018-09-21 at 21:29 David Hildenbrand <david@redhat.com> wrote:\n" @@ -59,10 +64,271 @@ "> Barret\n" "> \n" "> \n" - "> \n" - "_______________________________________________\n" - "Linux-nvdimm mailing list\n" - "Linux-nvdimm@lists.01.org\n" - https://lists.01.org/mailman/listinfo/linux-nvdimm + > + "\01:2\0" + "b\0" + "Return-Path: <alexander.h.duyck@linux.intel.com>\n" + "X-Original-To: yi.z.zhang@linux.intel.com\n" + "Delivered-To: yi.z.zhang@linux.intel.com\n" + "Received: from orsmga001.jf.intel.com (orsmga001.jf.intel.com [10.7.209.18])\n" + "\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n" + "\t(No client certificate requested)\n" + "\tby linux.intel.com (Postfix) with ESMTPS id DEBC8580430;\n" + "\tWed, 17 Oct 2018 16:54:31 -0700 (PDT)\n" + "X-ExtLoop1: 1\n" + "X-IronPort-AV: E=Sophos;i=\"5.54,393,1534834800\"; \n" + " d=\"scan'208\";a=\"100366643\"\n" + "Received: from ahduyck-mobl.amr.corp.intel.com (HELO localhost.localdomain) ([10.7.198.154])\n" + " by orsmga001.jf.intel.com with ESMTP; 17 Oct 2018 16:54:31 -0700\n" + "Subject: [mm PATCH v4 5/6] mm: Add reserved flag setting to set_page_links\n" + "From: Alexander Duyck <alexander.h.duyck@linux.intel.com>\n" + "To: linux-mm@kvack.org, akpm@linux-foundation.org\n" + "Cc: pavel.tatashin@microsoft.com, mhocko@suse.com, dave.jiang@intel.com,\n" + " alexander.h.duyck@linux.intel.com, linux-kernel@vger.kernel.org,\n" + " willy@infradead.org, davem@davemloft.net, yi.z.zhang@linux.intel.com,\n" + " khalid.aziz@oracle.com, rppt@linux.vnet.ibm.com, vbabka@suse.cz,\n" + " sparclinux@vger.kernel.org, dan.j.williams@intel.com,\n" + " ldufour@linux.vnet.ibm.com, mgorman@techsingularity.net, mingo@kernel.org,\n" + " kirill.shutemov@linux.intel.com\n" + "Date: Wed, 17 Oct 2018 16:54:31 -0700\n" + "Message-ID: <20181017235431.17213.11512.stgit@localhost.localdomain>\n" + "In-Reply-To: <20181017235043.17213.92459.stgit@localhost.localdomain>\n" + "References: <20181017235043.17213.92459.stgit@localhost.localdomain>\n" + "User-Agent: StGit/0.17.1-dirty\n" + "MIME-Version: 1.0\n" + "Content-Type: text/plain; charset=\"utf-8\"\n" + "Content-Transfer-Encoding: 7bit\n" + "\n" + "This patch modifies the set_page_links function to include the setting of\n" + "the reserved flag via a simple AND and OR operation. The motivation for\n" + "this is the fact that the existing __set_bit call still seems to have\n" + "effects on performance as replacing the call with the AND and OR can reduce\n" + "initialization time.\n" + "\n" + "Looking over the assembly code before and after the change the main\n" + "difference between the two is that the reserved bit is stored in a value\n" + "that is generated outside of the main initialization loop and is then\n" + "written with the other flags field values in one write to the page->flags\n" + "value. Previously the generated value was written and then then a btsq\n" + "instruction was issued.\n" + "\n" + "On my x86_64 test system with 3TB of persistent memory per node I saw the\n" + "persistent memory initialization time on average drop from 23.49s to\n" + "19.12s per node.\n" + "\n" + "Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>\n" + "---\n" + " include/linux/mm.h | 9 ++++++++-\n" + " mm/page_alloc.c | 29 +++++++++++++++++++----------\n" + " 2 files changed, 27 insertions(+), 11 deletions(-)\n" + "\n" + "diff --git a/include/linux/mm.h b/include/linux/mm.h\n" + "index 6e2c9631af05..14d06d7d2986 100644\n" + "--- a/include/linux/mm.h\n" + "+++ b/include/linux/mm.h\n" + "@@ -1171,11 +1171,18 @@ static inline void set_page_node(struct page *page, unsigned long node)\n" + " \tpage->flags |= (node & NODES_MASK) << NODES_PGSHIFT;\n" + " }\n" + " \n" + "+static inline void set_page_reserved(struct page *page, bool reserved)\n" + "+{\n" + "+\tpage->flags &= ~(1ul << PG_reserved);\n" + "+\tpage->flags |= (unsigned long)(!!reserved) << PG_reserved;\n" + "+}\n" + "+\n" + " static inline void set_page_links(struct page *page, enum zone_type zone,\n" + "-\tunsigned long node, unsigned long pfn)\n" + "+\tunsigned long node, unsigned long pfn, bool reserved)\n" + " {\n" + " \tset_page_zone(page, zone);\n" + " \tset_page_node(page, node);\n" + "+\tset_page_reserved(page, reserved);\n" + " #ifdef SECTION_IN_PAGE_FLAGS\n" + " \tset_page_section(page, pfn_to_section_nr(pfn));\n" + " #endif\n" + "diff --git a/mm/page_alloc.c b/mm/page_alloc.c\n" + "index a0b81e0bef03..e7fee7a5f8a3 100644\n" + "--- a/mm/page_alloc.c\n" + "+++ b/mm/page_alloc.c\n" + "@@ -1179,7 +1179,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,\n" + " \t\t\t\tunsigned long zone, int nid)\n" + " {\n" + " \tmm_zero_struct_page(page);\n" + "-\tset_page_links(page, zone, nid, pfn);\n" + "+\tset_page_links(page, zone, nid, pfn, false);\n" + " \tinit_page_count(page);\n" + " \tpage_mapcount_reset(page);\n" + " \tpage_cpupid_reset_last(page);\n" + "@@ -1195,7 +1195,8 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,\n" + " static void __meminit __init_pageblock(unsigned long start_pfn,\n" + " \t\t\t\t unsigned long nr_pages,\n" + " \t\t\t\t unsigned long zone, int nid,\n" + "-\t\t\t\t struct dev_pagemap *pgmap)\n" + "+\t\t\t\t struct dev_pagemap *pgmap,\n" + "+\t\t\t\t bool is_reserved)\n" + " {\n" + " \tunsigned long nr_pgmask = pageblock_nr_pages - 1;\n" + " \tstruct page *start_page = pfn_to_page(start_pfn);\n" + "@@ -1231,19 +1232,16 @@ static void __meminit __init_pageblock(unsigned long start_pfn,\n" + " \t\t * call because of the fact that the pfn number is used to\n" + " \t\t * get the section_nr and this function should not be\n" + " \t\t * spanning more than a single section.\n" + "+\t\t *\n" + "+\t\t * We can use a non-atomic operation for setting the\n" + "+\t\t * PG_reserved flag as we are still initializing the pages.\n" + " \t\t */\n" + "-\t\tset_page_links(page, zone, nid, start_pfn);\n" + "+\t\tset_page_links(page, zone, nid, start_pfn, is_reserved);\n" + " \t\tinit_page_count(page);\n" + " \t\tpage_mapcount_reset(page);\n" + " \t\tpage_cpupid_reset_last(page);\n" + " \n" + " \t\t/*\n" + "-\t\t * We can use the non-atomic __set_bit operation for setting\n" + "-\t\t * the flag as we are still initializing the pages.\n" + "-\t\t */\n" + "-\t\t__SetPageReserved(page);\n" + "-\n" + "-\t\t/*\n" + " \t\t * ZONE_DEVICE pages union ->lru with a ->pgmap back\n" + " \t\t * pointer and hmm_data. It is a bug if a ZONE_DEVICE\n" + " \t\t * page is ever freed or placed on a driver-private list.\n" + "@@ -5612,7 +5610,18 @@ static void __meminit __memmap_init_hotplug(unsigned long size, int nid,\n" + " \t\tpfn = max(ALIGN_DOWN(pfn - 1, pageblock_nr_pages), start_pfn);\n" + " \t\tstride -= pfn;\n" + " \n" + "-\t\t__init_pageblock(pfn, stride, zone, nid, pgmap);\n" + "+\t\t/*\n" + "+\t\t * The last argument of __init_pageblock is a boolean\n" + "+\t\t * value indicating if the page will be marked as reserved.\n" + "+\t\t *\n" + "+\t\t * Mark page reserved as it will need to wait for onlining\n" + "+\t\t * phase for it to be fully associated with a zone.\n" + "+\t\t *\n" + "+\t\t * Under certain circumstances ZONE_DEVICE pages may not\n" + "+\t\t * need to be marked as reserved, however there is still\n" + "+\t\t * code that is depending on this being set for now.\n" + "+\t\t */\n" + "+\t\t__init_pageblock(pfn, stride, zone, nid, pgmap, true);\n" + " \n" + " \t\tcond_resched();\n" + " \t}\n" + "\n" + "\02:2.1\0" + "b\0" + "This patch modifies the set_page_links function to include the setting of\n" + "the reserved flag via a simple AND and OR operation. The motivation for\n" + "this is the fact that the existing __set_bit call still seems to have\n" + "effects on performance as replacing the call with the AND and OR can reduce\n" + "initialization time.\n" + "\n" + "Looking over the assembly code before and after the change the main\n" + "difference between the two is that the reserved bit is stored in a value\n" + "that is generated outside of the main initialization loop and is then\n" + "written with the other flags field values in one write to the page->flags\n" + "value. Previously the generated value was written and then then a btsq\n" + "instruction was issued.\n" + "\n" + "On my x86_64 test system with 3TB of persistent memory per node I saw the\n" + "persistent memory initialization time on average drop from 23.49s to\n" + "19.12s per node.\n" + "\n" + "Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>\n" + "---\n" + " include/linux/mm.h | 9 ++++++++-\n" + " mm/page_alloc.c | 29 +++++++++++++++++++----------\n" + " 2 files changed, 27 insertions(+), 11 deletions(-)\n" + "\n" + "diff --git a/include/linux/mm.h b/include/linux/mm.h\n" + "index 6e2c9631af05..14d06d7d2986 100644\n" + "--- a/include/linux/mm.h\n" + "+++ b/include/linux/mm.h\n" + "@@ -1171,11 +1171,18 @@ static inline void set_page_node(struct page *page, unsigned long node)\n" + " \tpage->flags |= (node & NODES_MASK) << NODES_PGSHIFT;\n" + " }\n" + " \n" + "+static inline void set_page_reserved(struct page *page, bool reserved)\n" + "+{\n" + "+\tpage->flags &= ~(1ul << PG_reserved);\n" + "+\tpage->flags |= (unsigned long)(!!reserved) << PG_reserved;\n" + "+}\n" + "+\n" + " static inline void set_page_links(struct page *page, enum zone_type zone,\n" + "-\tunsigned long node, unsigned long pfn)\n" + "+\tunsigned long node, unsigned long pfn, bool reserved)\n" + " {\n" + " \tset_page_zone(page, zone);\n" + " \tset_page_node(page, node);\n" + "+\tset_page_reserved(page, reserved);\n" + " #ifdef SECTION_IN_PAGE_FLAGS\n" + " \tset_page_section(page, pfn_to_section_nr(pfn));\n" + " #endif\n" + "diff --git a/mm/page_alloc.c b/mm/page_alloc.c\n" + "index a0b81e0bef03..e7fee7a5f8a3 100644\n" + "--- a/mm/page_alloc.c\n" + "+++ b/mm/page_alloc.c\n" + "@@ -1179,7 +1179,7 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,\n" + " \t\t\t\tunsigned long zone, int nid)\n" + " {\n" + " \tmm_zero_struct_page(page);\n" + "-\tset_page_links(page, zone, nid, pfn);\n" + "+\tset_page_links(page, zone, nid, pfn, false);\n" + " \tinit_page_count(page);\n" + " \tpage_mapcount_reset(page);\n" + " \tpage_cpupid_reset_last(page);\n" + "@@ -1195,7 +1195,8 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,\n" + " static void __meminit __init_pageblock(unsigned long start_pfn,\n" + " \t\t\t\t unsigned long nr_pages,\n" + " \t\t\t\t unsigned long zone, int nid,\n" + "-\t\t\t\t struct dev_pagemap *pgmap)\n" + "+\t\t\t\t struct dev_pagemap *pgmap,\n" + "+\t\t\t\t bool is_reserved)\n" + " {\n" + " \tunsigned long nr_pgmask = pageblock_nr_pages - 1;\n" + " \tstruct page *start_page = pfn_to_page(start_pfn);\n" + "@@ -1231,19 +1232,16 @@ static void __meminit __init_pageblock(unsigned long start_pfn,\n" + " \t\t * call because of the fact that the pfn number is used to\n" + " \t\t * get the section_nr and this function should not be\n" + " \t\t * spanning more than a single section.\n" + "+\t\t *\n" + "+\t\t * We can use a non-atomic operation for setting the\n" + "+\t\t * PG_reserved flag as we are still initializing the pages.\n" + " \t\t */\n" + "-\t\tset_page_links(page, zone, nid, start_pfn);\n" + "+\t\tset_page_links(page, zone, nid, start_pfn, is_reserved);\n" + " \t\tinit_page_count(page);\n" + " \t\tpage_mapcount_reset(page);\n" + " \t\tpage_cpupid_reset_last(page);\n" + " \n" + " \t\t/*\n" + "-\t\t * We can use the non-atomic __set_bit operation for setting\n" + "-\t\t * the flag as we are still initializing the pages.\n" + "-\t\t */\n" + "-\t\t__SetPageReserved(page);\n" + "-\n" + "-\t\t/*\n" + " \t\t * ZONE_DEVICE pages union ->lru with a ->pgmap back\n" + " \t\t * pointer and hmm_data. It is a bug if a ZONE_DEVICE\n" + " \t\t * page is ever freed or placed on a driver-private list.\n" + "@@ -5612,7 +5610,18 @@ static void __meminit __memmap_init_hotplug(unsigned long size, int nid,\n" + " \t\tpfn = max(ALIGN_DOWN(pfn - 1, pageblock_nr_pages), start_pfn);\n" + " \t\tstride -= pfn;\n" + " \n" + "-\t\t__init_pageblock(pfn, stride, zone, nid, pgmap);\n" + "+\t\t/*\n" + "+\t\t * The last argument of __init_pageblock is a boolean\n" + "+\t\t * value indicating if the page will be marked as reserved.\n" + "+\t\t *\n" + "+\t\t * Mark page reserved as it will need to wait for onlining\n" + "+\t\t * phase for it to be fully associated with a zone.\n" + "+\t\t *\n" + "+\t\t * Under certain circumstances ZONE_DEVICE pages may not\n" + "+\t\t * need to be marked as reserved, however there is still\n" + "+\t\t * code that is depending on this being set for now.\n" + "+\t\t */\n" + "+\t\t__init_pageblock(pfn, stride, zone, nid, pgmap, true);\n" + " \n" + " \t\tcond_resched();\n" + " \t}" -fc2f327d6010839dcbbface8f3fb308a7ad10ae6717140856578a9b54d423885 +b6d346ae1d09ced42675714802d290c5fa2ec4667e65d87c77640f2df3a32b9f
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.