From: Benjamin LaHaise <bcrl@kvack.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Kent Overstreet <kmo@daterainc.com>,
Dave Jones <davej@redhat.com>,
Linux Kernel <linux-kernel@vger.kernel.org>,
linux-mm <linux-mm@kvack.org>, Christoph Lameter <cl@gentwo.org>,
Al Viro <viro@zeniv.linux.org.uk>
Subject: Re: bad page state in 3.13-rc4
Date: Thu, 19 Dec 2013 15:31:11 -0500 [thread overview]
Message-ID: <20131219203111.GA10905@kvack.org> (raw)
In-Reply-To: <CA+55aFwu_KN+1Ep5RmgFTvBdH3xRJDmCjZ9Fo_pH28hTdiHyiQ@mail.gmail.com>
On Fri, Dec 20, 2013 at 05:11:12AM +0900, Linus Torvalds wrote:
> On Fri, Dec 20, 2013 at 5:02 AM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> > Why not just get rid of the idiotic get_user_pages() crap then?
> > Something like the attached patch?
> >
> > Totally untested, but at least it makes *some* amount of sense.
>
> Ok, that can't work, since the ring_pages[] allocation happens later.
> So that part needs to be moved up, and it needs to initialize
> 'nr_pages'.
>
> So here's the same patch, but with stuff moved around a bit, and the
> "oops, couldn't create page" part fixed.
>
> Bit it's still totally and entirely untested.
That looks much better. I think the following is also needed to nail down
the migratepage operation as well. I'll give these two a few tests
together.
-ben
--
"Thought is the essence of where you are now."
diff --git a/fs/aio.c b/fs/aio.c
index 6efb7f6..eec0ae4 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -244,8 +244,13 @@ static void aio_free_ring(struct kioctx *ctx)
int i;
for (i = 0; i < ctx->nr_pages; i++) {
+ struct page *page;
pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
page_count(ctx->ring_pages[i]));
+ page = ctx->ring_pages[i];
+ if (!page)
+ continue;
+ ctx->ring_pages[i] = NULL;
put_page(ctx->ring_pages[i]);
}
@@ -280,18 +285,42 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
unsigned long flags;
int rc;
+ /* Serialize access to the old page */
+ if (!trylock_page(old))
+ return -EAGAIN;
+
+ rc = 0;
+
+ spin_lock(&mapping->private_lock);
+ ctx = mapping->private_data;
+ if (ctx) {
+ pgoff_t idx;
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ idx = old->index;
+ if (idx < (pgoff_t)ctx->nr_pages) {
+ if (ctx->ring_pages[idx] != old)
+ rc = -EAGAIN;
+ } else
+ rc = -EINVAL;
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ } else
+ rc = -EINVAL;
+ spin_unlock(&mapping->private_lock);
+
+ if (rc != 0)
+ return rc;
+
/* Writeback must be complete */
BUG_ON(PageWriteback(old));
- put_page(old);
+ get_page(new);
- rc = migrate_page_move_mapping(mapping, new, old, NULL, mode);
+ rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
if (rc != MIGRATEPAGE_SUCCESS) {
- get_page(old);
+ unlock_page(old);
+ put_page(new);
return rc;
}
- get_page(new);
-
/* We can potentially race against kioctx teardown here. Use the
* address_space's private data lock to protect the mapping's
* private_data.
@@ -305,10 +334,16 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
idx = old->index;
if (idx < (pgoff_t)ctx->nr_pages)
ctx->ring_pages[idx] = new;
+ else
+ rc = -EINVAL;
spin_unlock_irqrestore(&ctx->completion_lock, flags);
} else
rc = -EBUSY;
spin_unlock(&mapping->private_lock);
+ unlock_page(old);
+
+ if (rc == MIGRATEPAGE_SUCCESS)
+ put_page(old);
return rc;
}
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index b7717d7..f015c05 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -55,7 +55,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page);
extern int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page,
- struct buffer_head *head, enum migrate_mode mode);
+ struct buffer_head *head, enum migrate_mode mode,
+ int extra_count);
#else
static inline void putback_lru_pages(struct list_head *l) {}
diff --git a/mm/migrate.c b/mm/migrate.c
index e9b7102..e73823e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -317,14 +317,15 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
*/
int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page,
- struct buffer_head *head, enum migrate_mode mode)
+ struct buffer_head *head, enum migrate_mode mode,
+ int extra_count)
{
int expected_count = 0;
void **pslot;
if (!mapping) {
/* Anonymous page without mapping */
- if (page_count(page) != 1)
+ if (page_count(page) != (expected_count + 1))
return -EAGAIN;
return MIGRATEPAGE_SUCCESS;
}
@@ -584,7 +585,7 @@ int migrate_page(struct address_space *mapping,
BUG_ON(PageWriteback(page)); /* Writeback must be complete */
- rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
+ rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
@@ -611,7 +612,7 @@ int buffer_migrate_page(struct address_space *mapping,
head = page_buffers(page);
- rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
+ rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
WARNING: multiple messages have this Message-ID (diff)
From: Benjamin LaHaise <bcrl@kvack.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Kent Overstreet <kmo@daterainc.com>,
Dave Jones <davej@redhat.com>,
Linux Kernel <linux-kernel@vger.kernel.org>,
linux-mm <linux-mm@kvack.org>, Christoph Lameter <cl@gentwo.org>,
Al Viro <viro@zeniv.linux.org.uk>
Subject: Re: bad page state in 3.13-rc4
Date: Thu, 19 Dec 2013 15:31:11 -0500 [thread overview]
Message-ID: <20131219203111.GA10905@kvack.org> (raw)
In-Reply-To: <CA+55aFwu_KN+1Ep5RmgFTvBdH3xRJDmCjZ9Fo_pH28hTdiHyiQ@mail.gmail.com>
On Fri, Dec 20, 2013 at 05:11:12AM +0900, Linus Torvalds wrote:
> On Fri, Dec 20, 2013 at 5:02 AM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> > Why not just get rid of the idiotic get_user_pages() crap then?
> > Something like the attached patch?
> >
> > Totally untested, but at least it makes *some* amount of sense.
>
> Ok, that can't work, since the ring_pages[] allocation happens later.
> So that part needs to be moved up, and it needs to initialize
> 'nr_pages'.
>
> So here's the same patch, but with stuff moved around a bit, and the
> "oops, couldn't create page" part fixed.
>
> Bit it's still totally and entirely untested.
That looks much better. I think the following is also needed to nail down
the migratepage operation as well. I'll give these two a few tests
together.
-ben
--
"Thought is the essence of where you are now."
diff --git a/fs/aio.c b/fs/aio.c
index 6efb7f6..eec0ae4 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -244,8 +244,13 @@ static void aio_free_ring(struct kioctx *ctx)
int i;
for (i = 0; i < ctx->nr_pages; i++) {
+ struct page *page;
pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
page_count(ctx->ring_pages[i]));
+ page = ctx->ring_pages[i];
+ if (!page)
+ continue;
+ ctx->ring_pages[i] = NULL;
put_page(ctx->ring_pages[i]);
}
@@ -280,18 +285,42 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
unsigned long flags;
int rc;
+ /* Serialize access to the old page */
+ if (!trylock_page(old))
+ return -EAGAIN;
+
+ rc = 0;
+
+ spin_lock(&mapping->private_lock);
+ ctx = mapping->private_data;
+ if (ctx) {
+ pgoff_t idx;
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ idx = old->index;
+ if (idx < (pgoff_t)ctx->nr_pages) {
+ if (ctx->ring_pages[idx] != old)
+ rc = -EAGAIN;
+ } else
+ rc = -EINVAL;
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ } else
+ rc = -EINVAL;
+ spin_unlock(&mapping->private_lock);
+
+ if (rc != 0)
+ return rc;
+
/* Writeback must be complete */
BUG_ON(PageWriteback(old));
- put_page(old);
+ get_page(new);
- rc = migrate_page_move_mapping(mapping, new, old, NULL, mode);
+ rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1);
if (rc != MIGRATEPAGE_SUCCESS) {
- get_page(old);
+ unlock_page(old);
+ put_page(new);
return rc;
}
- get_page(new);
-
/* We can potentially race against kioctx teardown here. Use the
* address_space's private data lock to protect the mapping's
* private_data.
@@ -305,10 +334,16 @@ static int aio_migratepage(struct address_space *mapping, struct page *new,
idx = old->index;
if (idx < (pgoff_t)ctx->nr_pages)
ctx->ring_pages[idx] = new;
+ else
+ rc = -EINVAL;
spin_unlock_irqrestore(&ctx->completion_lock, flags);
} else
rc = -EBUSY;
spin_unlock(&mapping->private_lock);
+ unlock_page(old);
+
+ if (rc == MIGRATEPAGE_SUCCESS)
+ put_page(old);
return rc;
}
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index b7717d7..f015c05 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -55,7 +55,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page);
extern int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page,
- struct buffer_head *head, enum migrate_mode mode);
+ struct buffer_head *head, enum migrate_mode mode,
+ int extra_count);
#else
static inline void putback_lru_pages(struct list_head *l) {}
diff --git a/mm/migrate.c b/mm/migrate.c
index e9b7102..e73823e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -317,14 +317,15 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
*/
int migrate_page_move_mapping(struct address_space *mapping,
struct page *newpage, struct page *page,
- struct buffer_head *head, enum migrate_mode mode)
+ struct buffer_head *head, enum migrate_mode mode,
+ int extra_count)
{
int expected_count = 0;
void **pslot;
if (!mapping) {
/* Anonymous page without mapping */
- if (page_count(page) != 1)
+ if (page_count(page) != (expected_count + 1))
return -EAGAIN;
return MIGRATEPAGE_SUCCESS;
}
@@ -584,7 +585,7 @@ int migrate_page(struct address_space *mapping,
BUG_ON(PageWriteback(page)); /* Writeback must be complete */
- rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode);
+ rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
@@ -611,7 +612,7 @@ int buffer_migrate_page(struct address_space *mapping,
head = page_buffers(page);
- rc = migrate_page_move_mapping(mapping, newpage, page, head, mode);
+ rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
next prev parent reply other threads:[~2013-12-19 20:31 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-19 4:07 bad page state in 3.13-rc4 Dave Jones
2013-12-19 4:07 ` Dave Jones
2013-12-19 4:40 ` Linus Torvalds
2013-12-19 4:40 ` Linus Torvalds
2013-12-19 15:41 ` Christoph Lameter
2013-12-19 15:41 ` Christoph Lameter
2013-12-19 20:11 ` Mel Gorman
2013-12-19 20:11 ` Mel Gorman
2013-12-19 20:30 ` Dave Jones
2013-12-19 20:30 ` Dave Jones
2013-12-19 15:53 ` Dave Jones
2013-12-19 15:53 ` Dave Jones
2013-12-19 17:07 ` Linus Torvalds
2013-12-19 17:07 ` Linus Torvalds
2013-12-19 17:17 ` Dave Jones
2013-12-19 17:17 ` Dave Jones
2013-12-19 18:11 ` Kent Overstreet
2013-12-19 18:11 ` Kent Overstreet
2013-12-19 18:29 ` Benjamin LaHaise
2013-12-19 18:29 ` Benjamin LaHaise
2013-12-19 18:35 ` Dave Jones
2013-12-19 18:35 ` Dave Jones
2013-12-19 19:19 ` Linus Torvalds
2013-12-19 19:19 ` Linus Torvalds
2013-12-19 19:26 ` Benjamin LaHaise
2013-12-19 19:26 ` Benjamin LaHaise
2013-12-19 19:45 ` Linus Torvalds
2013-12-19 19:45 ` Linus Torvalds
2013-12-19 19:53 ` Benjamin LaHaise
2013-12-19 19:53 ` Benjamin LaHaise
2013-12-19 20:02 ` Linus Torvalds
2013-12-19 20:11 ` Linus Torvalds
2013-12-19 20:31 ` Benjamin LaHaise [this message]
2013-12-19 20:31 ` Benjamin LaHaise
2013-12-19 20:31 ` Linus Torvalds
2013-12-19 20:31 ` Linus Torvalds
2013-12-19 20:42 ` Benjamin LaHaise
2013-12-19 20:42 ` Benjamin LaHaise
2013-12-19 20:24 ` Dave Jones
2013-12-19 20:24 ` Dave Jones
2013-12-19 23:38 ` Benjamin LaHaise
2013-12-19 23:38 ` Benjamin LaHaise
2013-12-20 1:00 ` Dave Jones
2013-12-20 1:00 ` Dave Jones
2013-12-21 23:06 ` [PATCHes - aio / migrate page, please review] " Benjamin LaHaise
2013-12-21 23:06 ` Benjamin LaHaise
2013-12-22 19:09 ` Linus Torvalds
2013-12-22 19:09 ` Linus Torvalds
2013-12-22 21:30 ` Dave Jones
2013-12-22 21:30 ` Dave Jones
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20131219203111.GA10905@kvack.org \
--to=bcrl@kvack.org \
--cc=cl@gentwo.org \
--cc=davej@redhat.com \
--cc=kmo@daterainc.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.