All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: Oscar Salvador <osalvador@suse.de>, akpm@linux-foundation.org
Cc: dan.j.williams@intel.com, david@redhat.com,
	pasha.tatashin@soleen.com, mhocko@suse.com, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, Oscar Salvador <osalvador@suse.de>
Subject: Re: [PATCH 2/2] mm,memory_hotplug: Fix shrink_{zone,node}_span
Date: Mon, 15 Jul 2019 21:41:18 +0530	[thread overview]
Message-ID: <87tvbne0rd.fsf@linux.ibm.com> (raw)
In-Reply-To: <20190715081549.32577-3-osalvador@suse.de>

Oscar Salvador <osalvador@suse.de> writes:

> Since [1], shrink_{zone,node}_span work on PAGES_PER_SUBSECTION granularity.
> The problem is that deactivation of the section occurs later on in
> sparse_remove_section, so pfn_valid()->pfn_section_valid() will always return
> true before we deactivate the {sub}section.

Can you explain this more? The patch doesn't update section_mem_map
update sequence. So what changed? What is the problem in finding
pfn_valid() return true there?

>
> I spotted this during hotplug hotremove tests, there I always saw that
> spanned_pages was, at least, left with PAGES_PER_SECTION, even if we
> removed all memory linked to that zone.
>
> Fix this by decoupling section_deactivate from sparse_remove_section, and
> re-order the function calls.
>
> Now, __remove_section will:
>
> 1) deactivate section
> 2) shrink {zone,node}'s pages
> 3) remove section
>
> [1] https://patchwork.kernel.org/patch/11003467/
>
> Fixes: mmotm ("mm/hotplug: prepare shrink_{zone, pgdat}_span for sub-section removal")
> Signed-off-by: Oscar Salvador <osalvador@suse.de>
> ---
>  include/linux/memory_hotplug.h |  7 ++--
>  mm/memory_hotplug.c            |  6 +++-
>  mm/sparse.c                    | 77 +++++++++++++++++++++++++++++-------------
>  3 files changed, 62 insertions(+), 28 deletions(-)
>
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index f46ea71b4ffd..d2eb917aad5f 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -348,9 +348,10 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
>  extern bool is_memblock_offlined(struct memory_block *mem);
>  extern int sparse_add_section(int nid, unsigned long pfn,
>  		unsigned long nr_pages, struct vmem_altmap *altmap);
> -extern void sparse_remove_section(struct mem_section *ms,
> -		unsigned long pfn, unsigned long nr_pages,
> -		unsigned long map_offset, struct vmem_altmap *altmap);
> +int sparse_deactivate_section(unsigned long pfn, unsigned long nr_pages);
> +void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
> +                           unsigned long map_offset, struct vmem_altmap *altmap,
> +                           int section_empty);
>  extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
>  					  unsigned long pnum);
>  extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index b9ba5b85f9f7..03d535eee60d 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -517,12 +517,16 @@ static void __remove_section(struct zone *zone, unsigned long pfn,
>  		struct vmem_altmap *altmap)
>  {
>  	struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn));
> +	int ret;
>  
>  	if (WARN_ON_ONCE(!valid_section(ms)))
>  		return;
>  
> +	ret = sparse_deactivate_section(pfn, nr_pages);
>  	__remove_zone(zone, pfn, nr_pages);
> -	sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
> +	if (ret >= 0)
> +		sparse_remove_section(pfn, nr_pages, map_offset, altmap,
> +				      ret);
>  }
>  
>  /**
> diff --git a/mm/sparse.c b/mm/sparse.c
> index 1e224149aab6..d4953ee1d087 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -732,16 +732,47 @@ static void free_map_bootmem(struct page *memmap)
>  }
>  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>  
> -static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
> -		struct vmem_altmap *altmap)
> +static void section_remove(unsigned long pfn, unsigned long nr_pages,
> +			   struct vmem_altmap *altmap, int section_empty)
> +{
> +	struct mem_section *ms = __pfn_to_section(pfn);
> +	bool section_early = early_section(ms);
> +	struct page *memmap = NULL;
> +
> +	if (section_empty) {
> +		unsigned long section_nr = pfn_to_section_nr(pfn);
> +
> +		if (!section_early) {
> +			kfree(ms->usage);
> +			ms->usage = NULL;
> +		}
> +		memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
> +		ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr);
> +	}
> +
> +        if (section_early && memmap)
> +		free_map_bootmem(memmap);
> +        else
> +		depopulate_section_memmap(pfn, nr_pages, altmap);
> +}
> +
> +/**
> + * section_deactivate: Deactivate a {sub}section.
> + *
> + * Return:
> + * * -1         - {sub}section has already been deactivated.
> + * * 0          - Section is not empty
> + * * 1          - Section is empty
> + */
> +
> +static int section_deactivate(unsigned long pfn, unsigned long nr_pages)
>  {
>  	DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
>  	DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
>  	struct mem_section *ms = __pfn_to_section(pfn);
> -	bool section_is_early = early_section(ms);
> -	struct page *memmap = NULL;
>  	unsigned long *subsection_map = ms->usage
>  		? &ms->usage->subsection_map[0] : NULL;
> +	int section_empty = 0;
>  
>  	subsection_mask_set(map, pfn, nr_pages);
>  	if (subsection_map)
> @@ -750,7 +781,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>  	if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
>  				"section already deactivated (%#lx + %ld)\n",
>  				pfn, nr_pages))
> -		return;
> +		return -1;
>  
>  	/*
>  	 * There are 3 cases to handle across two configurations
> @@ -770,21 +801,10 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
>  	 * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified
>  	 */
>  	bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
> -	if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) {
> -		unsigned long section_nr = pfn_to_section_nr(pfn);
> -
> -		if (!section_is_early) {
> -			kfree(ms->usage);
> -			ms->usage = NULL;
> -		}
> -		memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
> -		ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr);
> -	}
> +	if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION))
> +		section_empty = 1;
>  
> -	if (section_is_early && memmap)
> -		free_map_bootmem(memmap);
> -	else
> -		depopulate_section_memmap(pfn, nr_pages, altmap);
> +	return section_empty;
>  }
>  
>  static struct page * __meminit section_activate(int nid, unsigned long pfn,
> @@ -834,7 +854,11 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn,
>  
>  	memmap = populate_section_memmap(pfn, nr_pages, nid, altmap);
>  	if (!memmap) {
> -		section_deactivate(pfn, nr_pages, altmap);
> +		int ret;
> +
> +		ret = section_deactivate(pfn, nr_pages);
> +		if (ret >= 0)
> +			section_remove(pfn, nr_pages, altmap, ret);
>  		return ERR_PTR(-ENOMEM);
>  	}
>  
> @@ -919,12 +943,17 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
>  }
>  #endif
>  
> -void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
> -		unsigned long nr_pages, unsigned long map_offset,
> -		struct vmem_altmap *altmap)
> +int sparse_deactivate_section(unsigned long pfn, unsigned long nr_pages)
> +{
> +	return section_deactivate(pfn, nr_pages);
> +}
> +
> +void sparse_remove_section(unsigned long pfn, unsigned long nr_pages,
> +			   unsigned long map_offset, struct vmem_altmap *altmap,
> +			   int section_empty)
>  {
>  	clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
>  			nr_pages - map_offset);
> -	section_deactivate(pfn, nr_pages, altmap);
> +	section_remove(pfn, nr_pages, altmap, section_empty);
>  }
>  #endif /* CONFIG_MEMORY_HOTPLUG */
> -- 
> 2.12.3


  reply	other threads:[~2019-07-15 16:12 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-15  8:15 [PATCH 0/2] Fixes for sub-section hotplug Oscar Salvador
2019-07-15  8:15 ` [PATCH 1/2] mm,sparse: Fix deactivate_section for early sections Oscar Salvador
2019-07-15 16:02   ` Aneesh Kumar K.V
2019-07-16  4:33   ` Dan Williams
2019-07-18 12:07   ` David Hildenbrand
2019-07-15  8:15 ` [PATCH 2/2] mm,memory_hotplug: Fix shrink_{zone,node}_span Oscar Salvador
2019-07-15 16:11   ` Aneesh Kumar K.V [this message]
2019-07-15 21:24     ` Oscar Salvador
2019-07-17  2:28       ` Dan Williams
2019-07-17  7:38         ` Oscar Salvador
2019-07-17  8:01           ` David Hildenbrand
2019-07-17  8:08             ` Oscar Salvador
2019-07-17  5:38       ` Aneesh Kumar K.V
2019-07-17  7:42         ` Oscar Salvador
2019-07-18 12:05   ` Oscar Salvador

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87tvbne0rd.fsf@linux.ibm.com \
    --to=aneesh.kumar@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=david@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=osalvador@suse.de \
    --cc=pasha.tatashin@soleen.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.