All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tang Chen <tangchen@cn.fujitsu.com>
To: viro@zeniv.linux.org.uk, bcrl@kvack.org, jmoyer@redhat.com,
	kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com,
	isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com
Cc: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 1/1] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages.
Date: Thu, 27 Feb 2014 08:26:16 +0800	[thread overview]
Message-ID: <530E8628.3060105@cn.fujitsu.com> (raw)
In-Reply-To: <1393403919-1178-1-git-send-email-tangchen@cn.fujitsu.com>


Hi all,

On 02/26/2014 04:38 PM, Tang Chen wrote:
> AIO ring page migration has been implemented by the following patch:
>
>          https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/fs/aio.c?id=36bc08cc01709b4a9bb563b35aa530241ddc63e3

Forgot to mention that the above patch was merged when Linux 3.12 was 
released.
So I think this problem exists in 3.12 stable tree.

If the following solution is acceptable, we need to merge it to 3.12 
stable tree, too.

Please reply ASAP.

Thanks.

>
> In this patch, ctx->completion_lock is used to prevent other processes
> from accessing the ring page being migrated.
>
> But in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring(),
> when writing to the ring page, they didn't take ctx->completion_lock.
>
> As a result, for example, we have the following problem:
>
>              thread 1                      |              thread 2
>                                            |
> aio_migratepage()                         |
>   |->  take ctx->completion_lock            |
>   |->  migrate_page_copy(new, old)          |
>   |   *NOW*, ctx->ring_pages[idx] == old   |
>                                            |
>                                            |    *NOW*, ctx->ring_pages[idx] == old
>                                            |    aio_read_events_ring()
>                                            |     |->  ring = kmap_atomic(ctx->ring_pages[0])
>                                            |     |->  ring->head = head;          *HERE, write to the old ring page*
>                                            |     |->  kunmap_atomic(ring);
>                                            |
>   |->  ctx->ring_pages[idx] = new           |
>   |   *BUT NOW*, the content of            |
>   |    ring_pages[idx] is old.             |
>   |->  release ctx->completion_lock         |
>
> As above, the new ring page will not be updated.
>
> The solution is taking ctx->completion_lock in thread 2, which means,
> in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring() when
> writing to ring pages.
>
>
> Reported-by: Yasuaki Ishimatsu<isimatu.yasuaki@jp.fujitsu.com>
> Signed-off-by: Tang Chen<tangchen@cn.fujitsu.com>
> ---
>   fs/aio.c | 33 +++++++++++++++++++++++++++++++++
>   1 file changed, 33 insertions(+)
>
> diff --git a/fs/aio.c b/fs/aio.c
> index 062a5f6..50c089c 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -366,6 +366,7 @@ static int aio_setup_ring(struct kioctx *ctx)
>   	int nr_pages;
>   	int i;
>   	struct file *file;
> +	unsigned long flags;
>
>   	/* Compensate for the ring buffer's head/tail overlap entry */
>   	nr_events += 2;	/* 1 is required, 2 for good luck */
> @@ -437,6 +438,14 @@ static int aio_setup_ring(struct kioctx *ctx)
>   	ctx->user_id = ctx->mmap_base;
>   	ctx->nr_events = nr_events; /* trusted copy */
>
> +	/*
> +	 * The aio ring pages are user space pages, so they can be migrated.
> +	 * When writing to an aio ring page, we should ensure the page is not
> +	 * being migrated. Aio page migration procedure is protected by
> +	 * ctx->completion_lock, so we add this lock here.
> +	 */
> +	spin_lock_irqsave(&ctx->completion_lock, flags);
> +
>   	ring = kmap_atomic(ctx->ring_pages[0]);
>   	ring->nr = nr_events;	/* user copy */
>   	ring->id = ~0U;
> @@ -448,6 +457,8 @@ static int aio_setup_ring(struct kioctx *ctx)
>   	kunmap_atomic(ring);
>   	flush_dcache_page(ctx->ring_pages[0]);
>
> +	spin_unlock_irqrestore(&ctx->completion_lock, flags);
> +
>   	return 0;
>   }
>
> @@ -542,6 +553,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
>   	unsigned i, new_nr;
>   	struct kioctx_table *table, *old;
>   	struct aio_ring *ring;
> +	unsigned long flags;
>
>   	spin_lock(&mm->ioctx_lock);
>   	rcu_read_lock();
> @@ -556,9 +568,19 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
>   					rcu_read_unlock();
>   					spin_unlock(&mm->ioctx_lock);
>
> +					/*
> +					 * Accessing ring pages must be done
> +					 * holding ctx->completion_lock to
> +					 * prevent aio ring page migration
> +					 * procedure from migrating ring pages.
> +					 */
> +					spin_lock_irqsave(&ctx->completion_lock,
> +							  flags);
>   					ring = kmap_atomic(ctx->ring_pages[0]);
>   					ring->id = ctx->id;
>   					kunmap_atomic(ring);
> +					spin_unlock_irqrestore(
> +						&ctx->completion_lock, flags);
>   					return 0;
>   				}
>
> @@ -1021,6 +1043,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
>   	unsigned head, tail, pos;
>   	long ret = 0;
>   	int copy_ret;
> +	unsigned long flags;
>
>   	mutex_lock(&ctx->ring_lock);
>
> @@ -1066,11 +1089,21 @@ static long aio_read_events_ring(struct kioctx *ctx,
>   		head %= ctx->nr_events;
>   	}
>
> +	/*
> +	 * The aio ring pages are user space pages, so they can be migrated.
> +	 * When writing to an aio ring page, we should ensure the page is not
> +	 * being migrated. Aio page migration procedure is protected by
> +	 * ctx->completion_lock, so we add this lock here.
> +	 */
> +	spin_lock_irqsave(&ctx->completion_lock, flags);
> +
>   	ring = kmap_atomic(ctx->ring_pages[0]);
>   	ring->head = head;
>   	kunmap_atomic(ring);
>   	flush_dcache_page(ctx->ring_pages[0]);
>
> +	spin_unlock_irqrestore(&ctx->completion_lock, flags);
> +
>   	pr_debug("%li  h%u t%u\n", ret, head, tail);
>
>   	put_reqs_available(ctx, ret);

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

WARNING: multiple messages have this Message-ID (diff)
From: Tang Chen <tangchen@cn.fujitsu.com>
To: viro@zeniv.linux.org.uk, bcrl@kvack.org, jmoyer@redhat.com,
	kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com,
	isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com
Cc: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 1/1] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages.
Date: Thu, 27 Feb 2014 08:26:16 +0800	[thread overview]
Message-ID: <530E8628.3060105@cn.fujitsu.com> (raw)
In-Reply-To: <1393403919-1178-1-git-send-email-tangchen@cn.fujitsu.com>


Hi all,

On 02/26/2014 04:38 PM, Tang Chen wrote:
> AIO ring page migration has been implemented by the following patch:
>
>          https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/fs/aio.c?id=36bc08cc01709b4a9bb563b35aa530241ddc63e3

Forgot to mention that the above patch was merged when Linux 3.12 was 
released.
So I think this problem exists in 3.12 stable tree.

If the following solution is acceptable, we need to merge it to 3.12 
stable tree, too.

Please reply ASAP.

Thanks.

>
> In this patch, ctx->completion_lock is used to prevent other processes
> from accessing the ring page being migrated.
>
> But in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring(),
> when writing to the ring page, they didn't take ctx->completion_lock.
>
> As a result, for example, we have the following problem:
>
>              thread 1                      |              thread 2
>                                            |
> aio_migratepage()                         |
>   |->  take ctx->completion_lock            |
>   |->  migrate_page_copy(new, old)          |
>   |   *NOW*, ctx->ring_pages[idx] == old   |
>                                            |
>                                            |    *NOW*, ctx->ring_pages[idx] == old
>                                            |    aio_read_events_ring()
>                                            |     |->  ring = kmap_atomic(ctx->ring_pages[0])
>                                            |     |->  ring->head = head;          *HERE, write to the old ring page*
>                                            |     |->  kunmap_atomic(ring);
>                                            |
>   |->  ctx->ring_pages[idx] = new           |
>   |   *BUT NOW*, the content of            |
>   |    ring_pages[idx] is old.             |
>   |->  release ctx->completion_lock         |
>
> As above, the new ring page will not be updated.
>
> The solution is taking ctx->completion_lock in thread 2, which means,
> in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring() when
> writing to ring pages.
>
>
> Reported-by: Yasuaki Ishimatsu<isimatu.yasuaki@jp.fujitsu.com>
> Signed-off-by: Tang Chen<tangchen@cn.fujitsu.com>
> ---
>   fs/aio.c | 33 +++++++++++++++++++++++++++++++++
>   1 file changed, 33 insertions(+)
>
> diff --git a/fs/aio.c b/fs/aio.c
> index 062a5f6..50c089c 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -366,6 +366,7 @@ static int aio_setup_ring(struct kioctx *ctx)
>   	int nr_pages;
>   	int i;
>   	struct file *file;
> +	unsigned long flags;
>
>   	/* Compensate for the ring buffer's head/tail overlap entry */
>   	nr_events += 2;	/* 1 is required, 2 for good luck */
> @@ -437,6 +438,14 @@ static int aio_setup_ring(struct kioctx *ctx)
>   	ctx->user_id = ctx->mmap_base;
>   	ctx->nr_events = nr_events; /* trusted copy */
>
> +	/*
> +	 * The aio ring pages are user space pages, so they can be migrated.
> +	 * When writing to an aio ring page, we should ensure the page is not
> +	 * being migrated. Aio page migration procedure is protected by
> +	 * ctx->completion_lock, so we add this lock here.
> +	 */
> +	spin_lock_irqsave(&ctx->completion_lock, flags);
> +
>   	ring = kmap_atomic(ctx->ring_pages[0]);
>   	ring->nr = nr_events;	/* user copy */
>   	ring->id = ~0U;
> @@ -448,6 +457,8 @@ static int aio_setup_ring(struct kioctx *ctx)
>   	kunmap_atomic(ring);
>   	flush_dcache_page(ctx->ring_pages[0]);
>
> +	spin_unlock_irqrestore(&ctx->completion_lock, flags);
> +
>   	return 0;
>   }
>
> @@ -542,6 +553,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
>   	unsigned i, new_nr;
>   	struct kioctx_table *table, *old;
>   	struct aio_ring *ring;
> +	unsigned long flags;
>
>   	spin_lock(&mm->ioctx_lock);
>   	rcu_read_lock();
> @@ -556,9 +568,19 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
>   					rcu_read_unlock();
>   					spin_unlock(&mm->ioctx_lock);
>
> +					/*
> +					 * Accessing ring pages must be done
> +					 * holding ctx->completion_lock to
> +					 * prevent aio ring page migration
> +					 * procedure from migrating ring pages.
> +					 */
> +					spin_lock_irqsave(&ctx->completion_lock,
> +							  flags);
>   					ring = kmap_atomic(ctx->ring_pages[0]);
>   					ring->id = ctx->id;
>   					kunmap_atomic(ring);
> +					spin_unlock_irqrestore(
> +						&ctx->completion_lock, flags);
>   					return 0;
>   				}
>
> @@ -1021,6 +1043,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
>   	unsigned head, tail, pos;
>   	long ret = 0;
>   	int copy_ret;
> +	unsigned long flags;
>
>   	mutex_lock(&ctx->ring_lock);
>
> @@ -1066,11 +1089,21 @@ static long aio_read_events_ring(struct kioctx *ctx,
>   		head %= ctx->nr_events;
>   	}
>
> +	/*
> +	 * The aio ring pages are user space pages, so they can be migrated.
> +	 * When writing to an aio ring page, we should ensure the page is not
> +	 * being migrated. Aio page migration procedure is protected by
> +	 * ctx->completion_lock, so we add this lock here.
> +	 */
> +	spin_lock_irqsave(&ctx->completion_lock, flags);
> +
>   	ring = kmap_atomic(ctx->ring_pages[0]);
>   	ring->head = head;
>   	kunmap_atomic(ring);
>   	flush_dcache_page(ctx->ring_pages[0]);
>
> +	spin_unlock_irqrestore(&ctx->completion_lock, flags);
> +
>   	pr_debug("%li  h%u t%u\n", ret, head, tail);
>
>   	put_reqs_available(ctx, ret);

  reply	other threads:[~2014-02-27  0:26 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-02-26  8:38 [PATCH 1/1] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages Tang Chen
2014-02-26  8:38 ` Tang Chen
2014-02-27  0:26 ` Tang Chen [this message]
2014-02-27  0:26   ` Tang Chen
2014-02-27 10:03   ` Benjamin LaHaise
2014-02-27 10:03     ` Benjamin LaHaise
2014-02-27 14:38     ` tom
2014-03-10  5:30 ` [V2 PATCH 0/2] Bug fix in aio ring page migration Tang Chen
2014-03-10  5:30   ` Tang Chen
2014-03-10  5:30 ` [V2 PATCH 1/2] aio, memory-hotplug: Fix confliction when migrating and, accessing ring pages Tang Chen
2014-03-10  5:30   ` Tang Chen
2014-03-10  5:31 ` [V2 PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration Tang Chen
2014-03-10  5:31   ` Tang Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=530E8628.3060105@cn.fujitsu.com \
    --to=tangchen@cn.fujitsu.com \
    --cc=bcrl@kvack.org \
    --cc=guz.fnst@cn.fujitsu.com \
    --cc=isimatu.yasuaki@jp.fujitsu.com \
    --cc=jmoyer@redhat.com \
    --cc=kosaki.motohiro@gmail.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-aio@kvack.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.