Re: [PATCH 4/5] mm: compaction: Determine if dirty pages can be migreated without blocking within ->migratepage

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Nai Xia <nai.xia@gmail.com>
To: Mel Gorman <mgorman@suse.de>
Cc: Linux-MM <linux-mm@kvack.org>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Minchan Kim <minchan.kim@gmail.com>, Jan Kara <jack@suse.cz>,
	Andy Isaacson <adi@hexapodia.org>,
	Johannes Weiner <jweiner@redhat.com>,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH 4/5] mm: compaction: Determine if dirty pages can be migreated without blocking within ->migratepage
Date: Sat, 19 Nov 2011 16:59:10 +0800	[thread overview]
Message-ID: <CAPQyPG4GTccLroA2NsdQK_PH1_KB3dD1v3m1FzenCeDW-8qb+g@mail.gmail.com> (raw)
In-Reply-To: <1321635524-8586-5-git-send-email-mgorman@suse.de>

On Sat, Nov 19, 2011 at 12:58 AM, Mel Gorman <mgorman@suse.de> wrote:
> Asynchronous compaction is when allocating transparent hugepages to
> avoid blocking for long periods of time. Due to reports of stalling,
> synchronous compaction is never used but this impacts allocation
> success rates. When deciding whether to migrate dirty pages, the
> following check is made
>
>        if (PageDirty(page) && !sync &&
>                mapping->a_ops->migratepage != migrate_page)
>                        rc = -EBUSY;
>
> This skips over all pages using buffer_migrate_page() even though
> it is possible to migrate some of these pages without blocking. This
> patch updates the ->migratepage callback with a "sync" parameter. It
> is the resposibility of the callback to gracefully fail migration of
> the page if it cannot be achieved without blocking.
>
> Signed-off-by: Mel Gorman <mgorman@suse.de>
> ---
>  fs/btrfs/disk-io.c      |    2 +-
>  fs/nfs/internal.h       |    2 +-
>  fs/nfs/write.c          |    4 +-
>  include/linux/fs.h      |    9 +++-
>  include/linux/migrate.h |    2 +-
>  mm/migrate.c            |  106 ++++++++++++++++++++++++++++++++---------------
>  6 files changed, 83 insertions(+), 42 deletions(-)
>
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 62afe5c..f841f00 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -872,7 +872,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
>
>  #ifdef CONFIG_MIGRATION
>  static int btree_migratepage(struct address_space *mapping,
> -                       struct page *newpage, struct page *page)
> +                       struct page *newpage, struct page *page, bool sync)
>  {
>        /*
>         * we can't safely write a btree page from here,
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index c1a1bd8..d0c460f 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -328,7 +328,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data);
>
>  #ifdef CONFIG_MIGRATION
>  extern int nfs_migrate_page(struct address_space *,
> -               struct page *, struct page *);
> +               struct page *, struct page *, bool);
>  #else
>  #define nfs_migrate_page NULL
>  #endif
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index 1dda78d..33475df 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -1711,7 +1711,7 @@ out_error:
>
>  #ifdef CONFIG_MIGRATION
>  int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
> -               struct page *page)
> +               struct page *page, bool sync)
>  {
>        /*
>         * If PagePrivate is set, then the page is currently associated with
> @@ -1726,7 +1726,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
>
>        nfs_fscache_release_page(page, GFP_KERNEL);
>
> -       return migrate_page(mapping, newpage, page);
> +       return migrate_page(mapping, newpage, page, sync);
>  }
>  #endif
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 0c4df26..67f8e46 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -609,9 +609,12 @@ struct address_space_operations {
>                        loff_t offset, unsigned long nr_segs);
>        int (*get_xip_mem)(struct address_space *, pgoff_t, int,
>                                                void **, unsigned long *);
> -       /* migrate the contents of a page to the specified target */
> +       /*
> +        * migrate the contents of a page to the specified target. If sync
> +        * is false, it must not block. If it needs to block, return -EBUSY
> +        */
>        int (*migratepage) (struct address_space *,
> -                       struct page *, struct page *);
> +                       struct page *, struct page *, bool);
>        int (*launder_page) (struct page *);
>        int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
>                                        unsigned long);
> @@ -2577,7 +2580,7 @@ extern int generic_check_addressable(unsigned, u64);
>
>  #ifdef CONFIG_MIGRATION
>  extern int buffer_migrate_page(struct address_space *,
> -                               struct page *, struct page *);
> +                               struct page *, struct page *, bool);
>  #else
>  #define buffer_migrate_page NULL
>  #endif
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index e39aeec..14e6d2a 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -11,7 +11,7 @@ typedef struct page *new_page_t(struct page *, unsigned long private, int **);
>
>  extern void putback_lru_pages(struct list_head *l);
>  extern int migrate_page(struct address_space *,
> -                       struct page *, struct page *);
> +                       struct page *, struct page *, bool);
>  extern int migrate_pages(struct list_head *l, new_page_t x,
>                        unsigned long private, bool offlining,
>                        bool sync);
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 578e291..8395697 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -415,7 +415,7 @@ EXPORT_SYMBOL(fail_migrate_page);
>  * Pages are locked upon entry and exit.
>  */
>  int migrate_page(struct address_space *mapping,
> -               struct page *newpage, struct page *page)
> +               struct page *newpage, struct page *page, bool sync)
>  {
>        int rc;
>
> @@ -432,19 +432,60 @@ int migrate_page(struct address_space *mapping,
>  EXPORT_SYMBOL(migrate_page);
>
>  #ifdef CONFIG_BLOCK
> +
> +/* Returns true if all buffers are successfully locked */
> +bool buffer_migrate_lock_buffers(struct buffer_head *head, bool sync)
> +{
> +       struct buffer_head *bh = head;
> +
> +       /* Simple case, sync compaction */
> +       if (sync) {
> +               do {
> +                       get_bh(bh);
> +                       lock_buffer(bh);
> +                       bh = bh->b_this_page;
> +
> +               } while (bh != head);
> +
> +               return true;
> +       }
> +
> +       /* async case, we cannot block on lock_buffer so use trylock_buffer */
> +       do {
> +               get_bh(bh);
> +               if (!trylock_buffer(bh)) {
> +                       /*
> +                        * We failed to lock the buffer and cannot stall in
> +                        * async migration. Release the taken locks
> +                        */
> +                       struct buffer_head *failed_bh = bh;
> +                       bh = head;
> +                       do {
> +                               unlock_buffer(bh);
> +                               put_bh(bh);
> +                               bh = bh->b_this_page;
> +                       } while (bh != failed_bh);
> +                       return false;
> +               }
> +
> +               bh = bh->b_this_page;
> +       } while (bh != head);
> +       return true;
> +}
> +
>  /*
>  * Migration function for pages with buffers. This function can only be used
>  * if the underlying filesystem guarantees that no other references to "page"
>  * exist.
>  */
>  int buffer_migrate_page(struct address_space *mapping,
> -               struct page *newpage, struct page *page)
> +               struct page *newpage, struct page *page, bool sync)
>  {
>        struct buffer_head *bh, *head;
>        int rc;
>
>        if (!page_has_buffers(page))
> -               return migrate_page(mapping, newpage, page);
> +               return migrate_page(mapping, newpage, page, sync);
>
>        head = page_buffers(page);
>
> @@ -453,13 +494,18 @@ int buffer_migrate_page(struct address_space *mapping,
>        if (rc)
>                return rc;
>
> -       bh = head;
> -       do {
> -               get_bh(bh);
> -               lock_buffer(bh);
> -               bh = bh->b_this_page;
> -
> -       } while (bh != head);
> +       if (!buffer_migrate_lock_buffers(head, sync)) {
> +               /*
> +                * We have to revert the radix tree update. If this returns
> +                * non-zero, it either means that the page count changed
> +                * which "can't happen" or the slot changed from underneath
> +                * us in which case someone operated on a page that did not
> +                * have buffers fully migrated which is alarming so warn
> +                * that it happened.
> +                */
> +               WARN_ON(migrate_page_move_mapping(mapping, page, newpage));
> +               return -EBUSY;

If this migrate_page_move_mapping() really fails, seems disk IO will be needed
to bring the previously already cached page back, I wonder if we should make the
double check for the two conditions of "page refs is ok " and "all bh
trylocked"
before doing radix_tree_replace_slot() ? which I think does not
involve IO on the
error path.


Nai

> +       }
>
>        ClearPagePrivate(page);
>        set_page_private(newpage, page_private(page));
> @@ -536,10 +582,13 @@ static int writeout(struct address_space *mapping, struct page *page)
>  * Default handling if a filesystem does not provide a migration function.
>  */
>  static int fallback_migrate_page(struct address_space *mapping,
> -       struct page *newpage, struct page *page)
> +       struct page *newpage, struct page *page, bool sync)
>  {
> -       if (PageDirty(page))
> +       if (PageDirty(page)) {
> +               if (!sync)
> +                       return -EBUSY;
>                return writeout(mapping, page);
> +       }
>
>        /*
>         * Buffers may be managed in a filesystem specific way.
> @@ -549,7 +598,7 @@ static int fallback_migrate_page(struct address_space *mapping,
>            !try_to_release_page(page, GFP_KERNEL))
>                return -EAGAIN;
>
> -       return migrate_page(mapping, newpage, page);
> +       return migrate_page(mapping, newpage, page, sync);
>  }
>
>  /*
> @@ -585,29 +634,18 @@ static int move_to_new_page(struct page *newpage, struct page *page,
>
>        mapping = page_mapping(page);
>        if (!mapping)
> -               rc = migrate_page(mapping, newpage, page);
> -       else {
> +               rc = migrate_page(mapping, newpage, page, sync);
> +       else if (mapping->a_ops->migratepage)
>                /*
> -                * Do not writeback pages if !sync and migratepage is
> -                * not pointing to migrate_page() which is nonblocking
> -                * (swapcache/tmpfs uses migratepage = migrate_page).
> +                * Most pages have a mapping and most filesystems provide a
> +                * migratepage callback. Anonymous pages are part of swap
> +                * space which also has its own migratepage callback. This
> +                * is the most common path for page migration.
>                 */
> -               if (PageDirty(page) && !sync &&
> -                   mapping->a_ops->migratepage != migrate_page)
> -                       rc = -EBUSY;
> -               else if (mapping->a_ops->migratepage)
> -                       /*
> -                        * Most pages have a mapping and most filesystems
> -                        * should provide a migration function. Anonymous
> -                        * pages are part of swap space which also has its
> -                        * own migration function. This is the most common
> -                        * path for page migration.
> -                        */
> -                       rc = mapping->a_ops->migratepage(mapping,
> -                                                       newpage, page);
> -               else
> -                       rc = fallback_migrate_page(mapping, newpage, page);
> -       }
> +               rc = mapping->a_ops->migratepage(mapping,
> +                                               newpage, page, sync);
> +       else
> +               rc = fallback_migrate_page(mapping, newpage, page, sync);
>
>        if (rc) {
>                newpage->mapping = NULL;
> --
> 1.7.3.4
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2011-11-19  8:59 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-11-18 16:58 [RFC PATCH 0/5] Reduce compaction-related stalls and improve asynchronous migration of dirty pages v3 Mel Gorman
2011-11-18 16:58 ` [PATCH 1/5] mm: compaction: Allow compaction to isolate dirty pages Mel Gorman
2011-11-18 17:28   ` Andrea Arcangeli
2011-11-21 17:16   ` Rik van Riel
2011-11-18 16:58 ` [PATCH 2/5] mm: compaction: Use synchronous compaction for /proc/sys/vm/compact_memory Mel Gorman
2011-11-18 17:27   ` Andrea Arcangeli
2011-11-21 21:46   ` Rik van Riel
2011-11-18 16:58 ` [PATCH 3/5] mm: Do not stall in synchronous compaction for THP allocations Mel Gorman
2011-11-18 17:34   ` Andrea Arcangeli
2011-11-18 16:58 ` [PATCH 4/5] mm: compaction: Determine if dirty pages can be migreated without blocking within ->migratepage Mel Gorman
2011-11-18 21:35   ` Andrea Arcangeli
2011-11-21 11:17     ` Mel Gorman
2011-11-21 22:45       ` Andrea Arcangeli
2011-11-22  0:55         ` [PATCH] mm: compaction: make buffer cache __GFP_MOVABLE Rik van Riel
2011-11-22 12:59         ` [PATCH 4/5] mm: compaction: Determine if dirty pages can be migreated without blocking within ->migratepage Mel Gorman
2011-11-24  1:19           ` Andrea Arcangeli
2011-11-24 12:21             ` Mel Gorman
2011-11-26  6:51               ` Andy Isaacson
2011-11-27 20:50               ` Rik van Riel
2011-11-19  8:59   ` Nai Xia [this message]
2011-11-19  9:48     ` Nai Xia
2011-11-21 11:19     ` Mel Gorman
2011-11-18 16:58 ` [PATCH 5/5] mm: compaction: make isolate_lru_page() filter-aware again Mel Gorman
2011-11-19 19:54 ` [RFC PATCH 0/5] Reduce compaction-related stalls Andrea Arcangeli
2011-11-19 19:54 ` [PATCH 1/8] mm: compaction: Allow compaction to isolate dirty pages Andrea Arcangeli
2011-11-19 19:54 ` [PATCH 2/8] mm: compaction: Use synchronous compaction for /proc/sys/vm/compact_memory Andrea Arcangeli
2011-11-19 19:54 ` [PATCH 3/8] mm: check if we isolated a compound page during lumpy scan Andrea Arcangeli
2011-11-21 11:51   ` Mel Gorman
2011-11-19 19:54 ` [PATCH 4/8] mm: compaction: defer compaction only with sync_migration Andrea Arcangeli
2011-11-21 12:36   ` Mel Gorman
2011-11-19 19:54 ` [PATCH 5/8] mm: compaction: avoid overwork in migrate sync mode Andrea Arcangeli
2011-11-21 21:59   ` Rik van Riel
2011-11-22  9:51     ` Mel Gorman
2011-11-19 19:54 ` [PATCH 6/8] Revert "mm: compaction: make isolate_lru_page() filter-aware" Andrea Arcangeli
2011-11-21 12:57   ` Mel Gorman
2011-11-19 19:54 ` [PATCH 7/8] Revert "vmscan: abort reclaim/compaction if compaction can proceed" Andrea Arcangeli
2011-11-21 13:09   ` Mel Gorman
2011-11-21 15:37     ` Rik van Riel
2011-11-19 19:54 ` [PATCH 8/8] Revert "vmscan: limit direct reclaim for higher order allocations" Andrea Arcangeli
2011-11-21 21:57   ` Rik van Riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAPQyPG4GTccLroA2NsdQK_PH1_KB3dD1v3m1FzenCeDW-8qb+g@mail.gmail.com \
    --to=nai.xia@gmail.com \
    --cc=aarcange@redhat.com \
    --cc=adi@hexapodia.org \
    --cc=jack@suse.cz \
    --cc=jweiner@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=minchan.kim@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).