linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vlastimil Babka <vbabka@suse.cz>
To: Mel Gorman <mgorman@suse.de>, Sasha Levin <sasha.levin@oracle.com>
Cc: Linux-MM <linux-mm@kvack.org>,
	Linux-FSDevel <linux-fsdevel@vger.kernel.org>,
	Johannes Weiner <hannes@cmpxchg.org>, Jan Kara <jack@suse.cz>,
	Michal Hocko <mhocko@suse.cz>, Hugh Dickins <hughd@google.com>,
	Linux Kernel <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH 08/17] mm: page_alloc: Use word-based accesses for get/set pageblock bitmaps
Date: Mon, 05 May 2014 14:40:38 +0200	[thread overview]
Message-ID: <536786C6.8040805@suse.cz> (raw)
In-Reply-To: <20140504131454.GS23991@suse.de>

On 05/04/2014 03:14 PM, Mel Gorman wrote:
> On Fri, May 02, 2014 at 06:34:52PM -0400, Sasha Levin wrote:
>> Hi Mel,
>>
>> Vlastimil Babka suggested I should try this patch to work around a different
>> issue I'm seeing, and noticed that it doesn't build because:
>>
>
> Rebasing SNAFU. Can you try this instead?
>
> ---8<---
> mm: page_alloc: Use word-based accesses for get/set pageblock bitmaps
>
> The test_bit operations in get/set pageblock flags are expensive. This patch
> reads the bitmap on a word basis and use shifts and masks to isolate the bits
> of interest. Similarly masks are used to set a local copy of the bitmap and then
> use cmpxchg to update the bitmap if there have been no other changes made in
> parallel.
>
> In a test running dd onto tmpfs the overhead of the pageblock-related
> functions went from 1.27% in profiles to 0.5%.
>
> Signed-off-by: Mel Gorman <mgorman@suse.de>
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index fac5509..c84703d 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -75,9 +75,14 @@ enum {
>
>   extern int page_group_by_mobility_disabled;
>
> +#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1)
> +#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1)
> +
>   static inline int get_pageblock_migratetype(struct page *page)
>   {
> -	return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
> +	BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2);
> +	return get_pageblock_flags_mask(page, PB_migrate_end,
> +					NR_MIGRATETYPE_BITS, MIGRATETYPE_MASK);
>   }
>
>   struct free_area {
> diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
> index 2ee8cd2..bc37036 100644
> --- a/include/linux/pageblock-flags.h
> +++ b/include/linux/pageblock-flags.h
> @@ -30,9 +30,12 @@ enum pageblock_bits {
>   	PB_migrate,
>   	PB_migrate_end = PB_migrate + 3 - 1,
>   			/* 3 bits required for migrate types */
> -#ifdef CONFIG_COMPACTION
>   	PB_migrate_skip,/* If set the block is skipped by compaction */
> -#endif /* CONFIG_COMPACTION */
> +
> +	/*
> +	 * Assume the bits will always align on a word. If this assumption
> +	 * changes then get/set pageblock needs updating.
> +	 */
>   	NR_PAGEBLOCK_BITS
>   };
>
> @@ -62,11 +65,35 @@ extern int pageblock_order;
>   /* Forward declaration */
>   struct page;
>
> +unsigned long get_pageblock_flags_mask(struct page *page,
> +				unsigned long end_bitidx,
> +				unsigned long nr_flag_bits,
> +				unsigned long mask);
> +void set_pageblock_flags_mask(struct page *page,
> +				unsigned long flags,
> +				unsigned long end_bitidx,
> +				unsigned long nr_flag_bits,
> +				unsigned long mask);
> +

The nr_flag_bits parameter is not used anymore and can be dropped.

>   /* Declarations for getting and setting flags. See mm/page_alloc.c */
> -unsigned long get_pageblock_flags_group(struct page *page,
> -					int start_bitidx, int end_bitidx);
> -void set_pageblock_flags_group(struct page *page, unsigned long flags,
> -					int start_bitidx, int end_bitidx);
> +static inline unsigned long get_pageblock_flags_group(struct page *page,
> +					int start_bitidx, int end_bitidx)
> +{
> +	unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1;
> +	unsigned long mask = (1 << nr_flag_bits) - 1;
> +
> +	return get_pageblock_flags_mask(page, end_bitidx, nr_flag_bits, mask);
> +}
> +
> +static inline void set_pageblock_flags_group(struct page *page,
> +					unsigned long flags,
> +					int start_bitidx, int end_bitidx)
> +{
> +	unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1;
> +	unsigned long mask = (1 << nr_flag_bits) - 1;
> +
> +	set_pageblock_flags_mask(page, flags, end_bitidx, nr_flag_bits, mask);
> +}
>
>   #ifdef CONFIG_COMPACTION
>   #define get_pageblock_skip(page) \
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index dc123ff..f393b0e 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -6032,53 +6032,64 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
>    * @end_bitidx: The last bit of interest
>    * returns pageblock_bits flags
>    */
> -unsigned long get_pageblock_flags_group(struct page *page,
> -					int start_bitidx, int end_bitidx)
> +unsigned long get_pageblock_flags_mask(struct page *page,
> +					unsigned long end_bitidx,
> +					unsigned long nr_flag_bits,
> +					unsigned long mask)
>   {
>   	struct zone *zone;
>   	unsigned long *bitmap;
> -	unsigned long pfn, bitidx;
> -	unsigned long flags = 0;
> -	unsigned long value = 1;
> +	unsigned long pfn, bitidx, word_bitidx;
> +	unsigned long word;
>
>   	zone = page_zone(page);
>   	pfn = page_to_pfn(page);
>   	bitmap = get_pageblock_bitmap(zone, pfn);
>   	bitidx = pfn_to_bitidx(zone, pfn);
> +	word_bitidx = bitidx / BITS_PER_LONG;
> +	bitidx &= (BITS_PER_LONG-1);
>
> -	for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
> -		if (test_bit(bitidx + start_bitidx, bitmap))
> -			flags |= value;
> -
> -	return flags;
> +	word = bitmap[word_bitidx];

I wonder if on some architecture this may result in inconsistent word 
when racing with set(), i.e. cmpxchg? We need consistency at least on 
the granularity of byte to prevent the problem with bogus migratetype 
values being read.

> +	bitidx += end_bitidx;
> +	return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;

Yes that looks correct to me, bits don't seem to overlap anymore.

>   }
>
>   /**
> - * set_pageblock_flags_group - Set the requested group of flags for a pageblock_nr_pages block of pages
> + * set_pageblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
>    * @page: The page within the block of interest
>    * @start_bitidx: The first bit of interest
>    * @end_bitidx: The last bit of interest
>    * @flags: The flags to set
>    */
> -void set_pageblock_flags_group(struct page *page, unsigned long flags,
> -					int start_bitidx, int end_bitidx)
> +void set_pageblock_flags_mask(struct page *page, unsigned long flags,
> +					unsigned long end_bitidx,
> +					unsigned long nr_flag_bits,
> +					unsigned long mask)
>   {
>   	struct zone *zone;
>   	unsigned long *bitmap;
> -	unsigned long pfn, bitidx;
> -	unsigned long value = 1;
> +	unsigned long pfn, bitidx, word_bitidx;
> +	unsigned long old_word, new_word;
> +
> +	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
>
>   	zone = page_zone(page);
>   	pfn = page_to_pfn(page);
>   	bitmap = get_pageblock_bitmap(zone, pfn);
>   	bitidx = pfn_to_bitidx(zone, pfn);
> +	word_bitidx = bitidx / BITS_PER_LONG;
> +	bitidx &= (BITS_PER_LONG-1);
> +
>   	VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page);
>
> -	for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
> -		if (flags & value)
> -			__set_bit(bitidx + start_bitidx, bitmap);
> -		else
> -			__clear_bit(bitidx + start_bitidx, bitmap);
> +	bitidx += end_bitidx;
> +	mask <<= (BITS_PER_LONG - bitidx - 1);
> +	flags <<= (BITS_PER_LONG - bitidx - 1);
> +
> +	do {
> +		old_word = ACCESS_ONCE(bitmap[word_bitidx]);
> +		new_word = (old_word & ~mask) | flags;
> +	} while (cmpxchg(&bitmap[word_bitidx], old_word, new_word) != old_word);

The bitfield logic here seems fine as well.

>   }
>
>   /*
>

  reply	other threads:[~2014-05-05 12:40 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-01  8:44 [PATCH 00/17] Misc page alloc, shmem, mark_page_accessed and page_waitqueue optimisations Mel Gorman
2014-05-01  8:44 ` [PATCH 01/17] mm: page_alloc: Do not update zlc unless the zlc is active Mel Gorman
2014-05-01 13:25   ` Johannes Weiner
2014-05-06 15:04   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 02/17] mm: page_alloc: Do not treat a zone that cannot be used for dirty pages as "full" Mel Gorman
2014-05-06 15:09   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 03/17] mm: page_alloc: Use jump labels to avoid checking number_of_cpusets Mel Gorman
2014-05-06 15:10   ` Rik van Riel
2014-05-06 20:23   ` Peter Zijlstra
2014-05-06 22:21     ` Mel Gorman
2014-05-07  9:04       ` Peter Zijlstra
2014-05-07  9:43         ` Mel Gorman
2014-05-01  8:44 ` [PATCH 04/17] mm: page_alloc: Calculate classzone_idx once from the zonelist ref Mel Gorman
2014-05-06 16:01   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 05/17] mm: page_alloc: Only check the zone id check if pages are buddies Mel Gorman
2014-05-06 16:48   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 06/17] mm: page_alloc: Only check the alloc flags and gfp_mask for dirty once Mel Gorman
2014-05-06 17:24   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 07/17] mm: page_alloc: Take the ALLOC_NO_WATERMARK check out of the fast path Mel Gorman
2014-05-06 17:25   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 08/17] mm: page_alloc: Use word-based accesses for get/set pageblock bitmaps Mel Gorman
2014-05-02 22:34   ` Sasha Levin
2014-05-04 13:14     ` Mel Gorman
2014-05-05 12:40       ` Vlastimil Babka [this message]
2014-05-06  9:13         ` Mel Gorman
2014-05-06 14:42           ` Vlastimil Babka
2014-05-06 15:12             ` Mel Gorman
2014-05-06 20:34   ` Peter Zijlstra
2014-05-06 22:24     ` Mel Gorman
2014-05-01  8:44 ` [PATCH 09/17] mm: page_alloc: Reduce number of times page_to_pfn is called Mel Gorman
2014-05-06 18:47   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 10/17] mm: page_alloc: Lookup pageblock migratetype with IRQs enabled during free Mel Gorman
2014-05-06 18:48   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 11/17] mm: page_alloc: Use unsigned int for order in more places Mel Gorman
2014-05-01 14:35   ` Dave Hansen
2014-05-01 15:11     ` Mel Gorman
2014-05-01 15:38       ` Dave Hansen
2014-05-06 18:49   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 12/17] mm: page_alloc: Convert hot/cold parameter and immediate callers to bool Mel Gorman
2014-05-06 18:49   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 13/17] mm: shmem: Avoid atomic operation during shmem_getpage_gfp Mel Gorman
2014-05-06 18:53   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 14/17] mm: Do not use atomic operations when releasing pages Mel Gorman
2014-05-01 13:29   ` Johannes Weiner
2014-05-01 13:39     ` Mel Gorman
2014-05-01 13:47       ` Johannes Weiner
2014-05-06 18:54   ` Rik van Riel
2014-05-01  8:44 ` [PATCH 15/17] mm: Do not use unnecessary atomic operations when adding pages to the LRU Mel Gorman
2014-05-01 13:33   ` Johannes Weiner
2014-05-01 13:40     ` Mel Gorman
2014-05-06 15:30   ` Vlastimil Babka
2014-05-06 15:55     ` Mel Gorman
2014-05-01  8:44 ` [PATCH 16/17] mm: Non-atomically mark page accessed during page cache allocation where possible Mel Gorman
2014-05-01  8:44 ` [PATCH 17/17] mm: filemap: Avoid unnecessary barries and waitqueue lookup in unlock_page fastpath Mel Gorman
2014-05-05 10:50   ` Jan Kara
2014-05-07  9:03     ` Mel Gorman
2014-05-06 20:30   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=536786C6.8040805@suse.cz \
    --to=vbabka@suse.cz \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.cz \
    --cc=sasha.levin@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).