From mboxrd@z Thu Jan 1 00:00:00 1970 From: Tang Chen Subject: [PATCH 0/2] Bug fix in aio ring page migration. Date: Thu, 27 Feb 2014 18:40:14 +0800 Message-ID: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> Cc: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org To: viro@zeniv.linux.org.uk, bcrl@kvack.org, jmoyer@redhat.com, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com Return-path: Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org This patch-set fixes the following two problems: 1. Need to use ctx->completion_lock to protect ring pages from being mis-written while migration. 2. Need memory barrier to ensure memory copy is done before ctx->ring_pages[] is updated. NOTE: AIO ring page migration was implemented since Linux 3.12. So we need to merge these two patches into 3.12 stable tree. Tang Chen (2): aio, memory-hotplug: Fix confliction when migrating and accessing ring pages. aio, mem-hotplug: Add memory barrier to aio ring page migration. fs/aio.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) -- 1.8.3.1 -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Tang Chen Subject: [PATCH 1/2] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages. Date: Thu, 27 Feb 2014 18:40:15 +0800 Message-ID: <1393497616-16428-2-git-send-email-tangchen@cn.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> Cc: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org To: viro@zeniv.linux.org.uk, bcrl@kvack.org, jmoyer@redhat.com, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com Return-path: In-Reply-To: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org AIO ring page migration has been implemented by the following patch: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/fs/aio.c?id=36bc08cc01709b4a9bb563b35aa530241ddc63e3 In this patch, ctx->completion_lock is used to prevent other processes from accessing the ring page being migrated. But in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring(), when writing to the ring page, they didn't take ctx->completion_lock. As a result, for example, we have the following problem: thread 1 | thread 2 | aio_migratepage() | |-> take ctx->completion_lock | |-> migrate_page_copy(new, old) | | *NOW*, ctx->ring_pages[idx] == old | | | *NOW*, ctx->ring_pages[idx] == old | aio_read_events_ring() | |-> ring = kmap_atomic(ctx->ring_pages[0]) | |-> ring->head = head; *HERE, write to the old ring page* | |-> kunmap_atomic(ring); | |-> ctx->ring_pages[idx] = new | | *BUT NOW*, the content of | | ring_pages[idx] is old. | |-> release ctx->completion_lock | As above, the new ring page will not be updated. The solution is taking ctx->completion_lock in thread 2, which means, in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring() when writing to ring pages. Reported-by: Yasuaki Ishimatsu Signed-off-by: Tang Chen --- fs/aio.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 062a5f6..50c089c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -366,6 +366,7 @@ static int aio_setup_ring(struct kioctx *ctx) int nr_pages; int i; struct file *file; + unsigned long flags; /* Compensate for the ring buffer's head/tail overlap entry */ nr_events += 2; /* 1 is required, 2 for good luck */ @@ -437,6 +438,14 @@ static int aio_setup_ring(struct kioctx *ctx) ctx->user_id = ctx->mmap_base; ctx->nr_events = nr_events; /* trusted copy */ + /* + * The aio ring pages are user space pages, so they can be migrated. + * When writing to an aio ring page, we should ensure the page is not + * being migrated. Aio page migration procedure is protected by + * ctx->completion_lock, so we add this lock here. + */ + spin_lock_irqsave(&ctx->completion_lock, flags); + ring = kmap_atomic(ctx->ring_pages[0]); ring->nr = nr_events; /* user copy */ ring->id = ~0U; @@ -448,6 +457,8 @@ static int aio_setup_ring(struct kioctx *ctx) kunmap_atomic(ring); flush_dcache_page(ctx->ring_pages[0]); + spin_unlock_irqrestore(&ctx->completion_lock, flags); + return 0; } @@ -542,6 +553,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) unsigned i, new_nr; struct kioctx_table *table, *old; struct aio_ring *ring; + unsigned long flags; spin_lock(&mm->ioctx_lock); rcu_read_lock(); @@ -556,9 +568,19 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); + /* + * Accessing ring pages must be done + * holding ctx->completion_lock to + * prevent aio ring page migration + * procedure from migrating ring pages. + */ + spin_lock_irqsave(&ctx->completion_lock, + flags); ring = kmap_atomic(ctx->ring_pages[0]); ring->id = ctx->id; kunmap_atomic(ring); + spin_unlock_irqrestore( + &ctx->completion_lock, flags); return 0; } @@ -1021,6 +1043,7 @@ static long aio_read_events_ring(struct kioctx *ctx, unsigned head, tail, pos; long ret = 0; int copy_ret; + unsigned long flags; mutex_lock(&ctx->ring_lock); @@ -1066,11 +1089,21 @@ static long aio_read_events_ring(struct kioctx *ctx, head %= ctx->nr_events; } + /* + * The aio ring pages are user space pages, so they can be migrated. + * When writing to an aio ring page, we should ensure the page is not + * being migrated. Aio page migration procedure is protected by + * ctx->completion_lock, so we add this lock here. + */ + spin_lock_irqsave(&ctx->completion_lock, flags); + ring = kmap_atomic(ctx->ring_pages[0]); ring->head = head; kunmap_atomic(ring); flush_dcache_page(ctx->ring_pages[0]); + spin_unlock_irqrestore(&ctx->completion_lock, flags); + pr_debug("%li h%u t%u\n", ret, head, tail); put_reqs_available(ctx, ret); -- 1.8.3.1 -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Tang Chen Subject: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Date: Thu, 27 Feb 2014 18:40:16 +0800 Message-ID: <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> Cc: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org To: viro@zeniv.linux.org.uk, bcrl@kvack.org, jmoyer@redhat.com, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com Return-path: In-Reply-To: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org When doing aio ring page migration, we migrated the page, and update ctx->ring_pages[]. Like the following: aio_migratepage() |-> migrate_page_copy(new, old) | ...... /* Need barrier here */ |-> ctx->ring_pages[idx] = new Actually, we need a memory barrier between these two operations. Otherwise, if ctx->ring_pages[] is updated before memory copy due to the compiler optimization, other processes may have an opportunity to access to the not fully initialized new ring page. So add a wmb to synchronize them. Reported-by: Yasuaki Ishimatsu Signed-off-by: Tang Chen --- fs/aio.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 50c089c..f0ed838 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, pgoff_t idx; spin_lock_irqsave(&ctx->completion_lock, flags); migrate_page_copy(new, old); + + /* + * Ensure memory copy is finished before updating + * ctx->ring_pages[]. Otherwise other processes may access to + * new ring pages which are not fully initialized. + */ + smp_wmb(); + idx = old->index; if (idx < (pgoff_t)ctx->nr_pages) { /* And only do the move if things haven't changed */ -- 1.8.3.1 -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yasuaki Ishimatsu Subject: Re: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Date: Thu, 27 Feb 2014 21:06:05 +0900 Message-ID: <530F2A2D.50307@jp.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="ISO-2022-JP" Content-Transfer-Encoding: 7bit Cc: , , , , , , , , To: Tang Chen Return-path: In-Reply-To: <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org Hi Tang, (2014/02/27 19:40), Tang Chen wrote: > When doing aio ring page migration, we migrated the page, and update > ctx->ring_pages[]. Like the following: > > aio_migratepage() > |-> migrate_page_copy(new, old) > | ...... /* Need barrier here */ > |-> ctx->ring_pages[idx] = new > > Actually, we need a memory barrier between these two operations. > Otherwise, if ctx->ring_pages[] is updated before memory copy due to > the compiler optimization, other processes may have an opportunity > to access to the not fully initialized new ring page. > > So add a wmb to synchronize them. > > Reported-by: Yasuaki Ishimatsu > Signed-off-by: Tang Chen > --- > fs/aio.c | 8 ++++++++ > 1 file changed, 8 insertions(+) > > diff --git a/fs/aio.c b/fs/aio.c > index 50c089c..f0ed838 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, > pgoff_t idx; > spin_lock_irqsave(&ctx->completion_lock, flags); > migrate_page_copy(new, old); > + > + /* > + * Ensure memory copy is finished before updating > + * ctx->ring_pages[]. Otherwise other processes may access to > + * new ring pages which are not fully initialized. > + */ > + smp_wmb(); > + If you put smp_wmb() here, you should put smp_rmb() before kmap() in aio_read_events_ring(). Thanks, Yasuaki Ishimatsu > idx = old->index; > if (idx < (pgoff_t)ctx->nr_pages) { > /* And only do the move if things haven't changed */ > -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yasuaki Ishimatsu Subject: [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Date: Thu, 27 Feb 2014 21:44:23 +0900 Message-ID: <530F3327.8020205@jp.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <530F2A2D.50307@jp.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="ISO-2022-JP" Content-Transfer-Encoding: 7bit Cc: , , , , , , , To: Tang Chen , Return-path: In-Reply-To: <530F2A2D.50307@jp.fujitsu.com> Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org When doing aio ring page migration, we migrated the page, and update ctx->ring_pages[]. Like the following: aio_migratepage() |-> migrate_page_copy(new, old) | ...... /* Need barrier here */ |-> ctx->ring_pages[idx] = new Actually, we need a memory barrier between these two operations. Otherwise, if ctx->ring_pages[] is updated before memory copy due to the compiler optimization, other processes may have an opportunity to access to the not fully initialized new ring page. So add a wmb and rmb to synchronize them. Signed-off-by: Tang Chen Signed-off-by: Yasuaki Ishimatsu --- fs/aio.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 50c089c..8d9b82b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, pgoff_t idx; spin_lock_irqsave(&ctx->completion_lock, flags); migrate_page_copy(new, old); + + /* + * Ensure memory copy is finished before updating + * ctx->ring_pages[]. Otherwise other processes may access to + * new ring pages which are not fully initialized. + */ + smp_wmb(); + idx = old->index; if (idx < (pgoff_t)ctx->nr_pages) { /* And only do the move if things haven't changed */ @@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx, page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; pos %= AIO_EVENTS_PER_PAGE; + /* + * Ensure that the page's data was copied from old one by + * aio_migratepage(). + */ + smp_rmb(); + ev = kmap(page); copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * avail); -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Benjamin LaHaise Subject: Re: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Date: Thu, 27 Feb 2014 09:57:30 -0500 Message-ID: <20140227145730.GA639@kvack.org> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: viro@zeniv.linux.org.uk, jmoyer@redhat.com, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org To: Tang Chen Return-path: Content-Disposition: inline In-Reply-To: <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org On Thu, Feb 27, 2014 at 06:40:16PM +0800, Tang Chen wrote: > When doing aio ring page migration, we migrated the page, and update > ctx->ring_pages[]. Like the following: > > aio_migratepage() > |-> migrate_page_copy(new, old) > | ...... /* Need barrier here */ > |-> ctx->ring_pages[idx] = new > > Actually, we need a memory barrier between these two operations. > Otherwise, if ctx->ring_pages[] is updated before memory copy due to > the compiler optimization, other processes may have an opportunity > to access to the not fully initialized new ring page. > > So add a wmb to synchronize them. The smp_wmb() is not needed after you added the taking of ctx->completion_lock lock since all accesses to ring_pages is then protected by the spinlock. Why are you adding this then? Or have you missed adding the lock somewhere? Also, if you've changed the patch, it is customary to add a "v2" somewhere in the patch title so that I have some idea what version of the patch should be applied. -ben > Reported-by: Yasuaki Ishimatsu > Signed-off-by: Tang Chen > --- > fs/aio.c | 8 ++++++++ > 1 file changed, 8 insertions(+) > > diff --git a/fs/aio.c b/fs/aio.c > index 50c089c..f0ed838 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, > pgoff_t idx; > spin_lock_irqsave(&ctx->completion_lock, flags); > migrate_page_copy(new, old); > + > + /* > + * Ensure memory copy is finished before updating > + * ctx->ring_pages[]. Otherwise other processes may access to > + * new ring pages which are not fully initialized. > + */ > + smp_wmb(); > + > idx = old->index; > if (idx < (pgoff_t)ctx->nr_pages) { > /* And only do the move if things haven't changed */ > -- > 1.8.3.1 -- "Thought is the essence of where you are now." -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Gu Zheng Subject: Re: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Date: Fri, 28 Feb 2014 09:29:19 +0800 Message-ID: <530FE66F.9040308@cn.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <20140227145730.GA639@kvack.org> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Cc: Tang Chen , viro@zeniv.linux.org.uk, jmoyer@redhat.com, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, isimatu.yasuaki@jp.fujitsu.com, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org To: Benjamin LaHaise Return-path: In-Reply-To: <20140227145730.GA639@kvack.org> Sender: linux-kernel-owner@vger.kernel.org List-Id: linux-fsdevel.vger.kernel.org Hi Ben, On 02/27/2014 10:57 PM, Benjamin LaHaise wrote: > On Thu, Feb 27, 2014 at 06:40:16PM +0800, Tang Chen wrote: >> When doing aio ring page migration, we migrated the page, and update >> ctx->ring_pages[]. Like the following: >> >> aio_migratepage() >> |-> migrate_page_copy(new, old) >> | ...... /* Need barrier here */ >> |-> ctx->ring_pages[idx] = new >> >> Actually, we need a memory barrier between these two operations. >> Otherwise, if ctx->ring_pages[] is updated before memory copy due to >> the compiler optimization, other processes may have an opportunity >> to access to the not fully initialized new ring page. >> >> So add a wmb to synchronize them. > > The smp_wmb() is not needed after you added the taking of ctx->completion_lock > lock since all accesses to ring_pages is then protected by the spinlock. > Why are you adding this then? Or have you missed adding the lock somewhere? > Also, if you've changed the patch, it is customary to add a "v2" somewhere in > the patch title so that I have some idea what version of the patch should be > applied. The completion_lock just protects updating ring->head when reading events, so wmb is still needed here. Regards, Gu > > -ben > >> Reported-by: Yasuaki Ishimatsu >> Signed-off-by: Tang Chen >> --- >> fs/aio.c | 8 ++++++++ >> 1 file changed, 8 insertions(+) >> >> diff --git a/fs/aio.c b/fs/aio.c >> index 50c089c..f0ed838 100644 >> --- a/fs/aio.c >> +++ b/fs/aio.c >> @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, >> pgoff_t idx; >> spin_lock_irqsave(&ctx->completion_lock, flags); >> migrate_page_copy(new, old); >> + >> + /* >> + * Ensure memory copy is finished before updating >> + * ctx->ring_pages[]. Otherwise other processes may access to >> + * new ring pages which are not fully initialized. >> + */ >> + smp_wmb(); >> + >> idx = old->index; >> if (idx < (pgoff_t)ctx->nr_pages) { >> /* And only do the move if things haven't changed */ >> -- >> 1.8.3.1 > From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yasuaki Ishimatsu Subject: Re: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Date: Fri, 28 Feb 2014 16:25:39 +0900 Message-ID: <531039F3.4050707@jp.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <20140227145730.GA639@kvack.org> Mime-Version: 1.0 Content-Type: text/plain; charset="ISO-8859-1"; format=flowed Content-Transfer-Encoding: 7bit Cc: Tang Chen , , , , , , , , To: Benjamin LaHaise Return-path: In-Reply-To: <20140227145730.GA639@kvack.org> Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org (2014/02/27 23:57), Benjamin LaHaise wrote: > On Thu, Feb 27, 2014 at 06:40:16PM +0800, Tang Chen wrote: >> When doing aio ring page migration, we migrated the page, and update >> ctx->ring_pages[]. Like the following: >> >> aio_migratepage() >> |-> migrate_page_copy(new, old) >> | ...... /* Need barrier here */ >> |-> ctx->ring_pages[idx] = new >> >> Actually, we need a memory barrier between these two operations. >> Otherwise, if ctx->ring_pages[] is updated before memory copy due to >> the compiler optimization, other processes may have an opportunity >> to access to the not fully initialized new ring page. >> >> So add a wmb to synchronize them. > > The smp_wmb() is not needed after you added the taking of ctx->completion_lock > lock since all accesses to ring_pages is then protected by the spinlock. > Why are you adding this then? Or have you missed adding the lock somewhere? Pleaes see following thread. It's a updated patch. https://lkml.org/lkml/2014/2/27/237 aio_read_events_ring() accesses ring_pages without locking ctx-completion_lock. If copying old page'date to new page runs after new page was set to ctx->ring_pages by changing order, aio_read_events_ring() may not work correctly. So smp_wmb() and smp_rmb() is needed. Thanks, Yasuaki Ishimatsu > Also, if you've changed the patch, it is customary to add a "v2" somewhere in > the patch title so that I have some idea what version of the patch should be > applied. > > -ben > >> Reported-by: Yasuaki Ishimatsu >> Signed-off-by: Tang Chen >> --- >> fs/aio.c | 8 ++++++++ >> 1 file changed, 8 insertions(+) >> >> diff --git a/fs/aio.c b/fs/aio.c >> index 50c089c..f0ed838 100644 >> --- a/fs/aio.c >> +++ b/fs/aio.c >> @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, >> pgoff_t idx; >> spin_lock_irqsave(&ctx->completion_lock, flags); >> migrate_page_copy(new, old); >> + >> + /* >> + * Ensure memory copy is finished before updating >> + * ctx->ring_pages[]. Otherwise other processes may access to >> + * new ring pages which are not fully initialized. >> + */ >> + smp_wmb(); >> + >> idx = old->index; >> if (idx < (pgoff_t)ctx->nr_pages) { >> /* And only do the move if things haven't changed */ >> -- >> 1.8.3.1 > -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Miao Xie Subject: Re: [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Date: Tue, 04 Mar 2014 13:35:29 +0800 Message-ID: <53156621.60900@cn.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <530F2A2D.50307@jp.fujitsu.com> <530F3327.8020205@jp.fujitsu.com> Reply-To: miaox@cn.fujitsu.com Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-2022-JP Content-Transfer-Encoding: 7bit Cc: viro@zeniv.linux.org.uk, jmoyer@redhat.com, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, guz.fnst@cn.fujitsu.com, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org To: Yasuaki Ishimatsu , Tang Chen , bcrl@kvack.org Return-path: In-Reply-To: <530F3327.8020205@jp.fujitsu.com> Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org On thu, 27 Feb 2014 21:44:23 +0900, Yasuaki Ishimatsu wrote: > When doing aio ring page migration, we migrated the page, and update > ctx->ring_pages[]. Like the following: > > aio_migratepage() > |-> migrate_page_copy(new, old) > | ...... /* Need barrier here */ > |-> ctx->ring_pages[idx] = new > > Actually, we need a memory barrier between these two operations. > Otherwise, if ctx->ring_pages[] is updated before memory copy due to > the compiler optimization, other processes may have an opportunity > to access to the not fully initialized new ring page. > > So add a wmb and rmb to synchronize them. > > Signed-off-by: Tang Chen > Signed-off-by: Yasuaki Ishimatsu > > --- > fs/aio.c | 14 ++++++++++++++ > 1 file changed, 14 insertions(+) > > diff --git a/fs/aio.c b/fs/aio.c > index 50c089c..8d9b82b 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, > pgoff_t idx; > spin_lock_irqsave(&ctx->completion_lock, flags); > migrate_page_copy(new, old); > + > + /* > + * Ensure memory copy is finished before updating > + * ctx->ring_pages[]. Otherwise other processes may access to > + * new ring pages which are not fully initialized. > + */ > + smp_wmb(); > + > idx = old->index; > if (idx < (pgoff_t)ctx->nr_pages) { > /* And only do the move if things haven't changed */ > @@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx, > page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; > pos %= AIO_EVENTS_PER_PAGE; > > + /* > + * Ensure that the page's data was copied from old one by > + * aio_migratepage(). > + */ > + smp_rmb(); > + smp_read_barrier_depends() is better. "One could place an A smp_rmb() primitive between the pointer fetch and dereference. However, this imposes unneeded overhead on systems (such as i386, IA64, PPC, and SPARC) that respect data dependencies on the read side. A smp_read_barrier_depends() primitive has been added to the Linux 2.6 kernel to eliminate overhead on these systems." -- From Chapter 7.1 of Written by Paul E. McKenney Thanks Miao > ev = kmap(page); > copy_ret = copy_to_user(event + ret, ev + pos, > sizeof(*ev) * avail); > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ > -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: KOSAKI Motohiro Subject: Re: [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Date: Tue, 4 Mar 2014 22:04:42 -0500 Message-ID: References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <530F2A2D.50307@jp.fujitsu.com> <530F3327.8020205@jp.fujitsu.com> <53156621.60900@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Cc: Yasuaki Ishimatsu , Tang Chen , Benjamin LaHaise , Al Viro , Jeff Moyer , guz.fnst@cn.fujitsu.com, "linux-fsdevel@vger.kernel.org" , linux-aio@kvack.org, LKML To: Miao Xie Return-path: In-Reply-To: <53156621.60900@cn.fujitsu.com> Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org >> + /* >> + * Ensure that the page's data was copied from old one by >> + * aio_migratepage(). >> + */ >> + smp_rmb(); >> + > > smp_read_barrier_depends() is better. > > "One could place an A smp_rmb() primitive between the pointer fetch and > dereference. However, this imposes unneeded overhead on systems (such as > i386, IA64, PPC, and SPARC) that respect data dependencies on the read side. > A smp_read_barrier_depends() primitive has been added to the Linux 2.6 kernel > to eliminate overhead on these systems." > -- From Chapter 7.1 of > Written by Paul E. McKenney > Right. The document of memory barrier described this situation. see https://www.kernel.org/doc/Documentation/memory-barriers.txt CPU 1 CPU 2 =============== =============== { M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 } M[1] = 4; ACCESS_ONCE(P) = 1 Q = ACCESS_ONCE(P); D = M[Q]; -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yasuaki Ishimatsu Subject: Re: [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Date: Wed, 5 Mar 2014 15:59:23 +0900 Message-ID: <5316CB4B.8060905@jp.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <530F2A2D.50307@jp.fujitsu.com> <530F3327.8020205@jp.fujitsu.com> <53156621.60900@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="ISO-2022-JP" Content-Transfer-Encoding: 7bit Cc: Tang Chen , , , , , , , , , To: Return-path: In-Reply-To: <53156621.60900@cn.fujitsu.com> Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org (2014/03/04 14:35), Miao Xie wrote: > On thu, 27 Feb 2014 21:44:23 +0900, Yasuaki Ishimatsu wrote: >> When doing aio ring page migration, we migrated the page, and update >> ctx->ring_pages[]. Like the following: >> >> aio_migratepage() >> |-> migrate_page_copy(new, old) >> | ...... /* Need barrier here */ >> |-> ctx->ring_pages[idx] = new >> >> Actually, we need a memory barrier between these two operations. >> Otherwise, if ctx->ring_pages[] is updated before memory copy due to >> the compiler optimization, other processes may have an opportunity >> to access to the not fully initialized new ring page. >> >> So add a wmb and rmb to synchronize them. >> >> Signed-off-by: Tang Chen >> Signed-off-by: Yasuaki Ishimatsu >> >> --- >> fs/aio.c | 14 ++++++++++++++ >> 1 file changed, 14 insertions(+) >> >> diff --git a/fs/aio.c b/fs/aio.c >> index 50c089c..8d9b82b 100644 >> --- a/fs/aio.c >> +++ b/fs/aio.c >> @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, >> pgoff_t idx; >> spin_lock_irqsave(&ctx->completion_lock, flags); >> migrate_page_copy(new, old); >> + >> + /* >> + * Ensure memory copy is finished before updating >> + * ctx->ring_pages[]. Otherwise other processes may access to >> + * new ring pages which are not fully initialized. >> + */ >> + smp_wmb(); >> + >> idx = old->index; >> if (idx < (pgoff_t)ctx->nr_pages) { >> /* And only do the move if things haven't changed */ >> @@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx, >> page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; >> pos %= AIO_EVENTS_PER_PAGE; >> >> + /* >> + * Ensure that the page's data was copied from old one by >> + * aio_migratepage(). >> + */ >> + smp_rmb(); >> + > > smp_read_barrier_depends() is better. > > "One could place an A smp_rmb() primitive between the pointer fetch and > dereference. However, this imposes unneeded overhead on systems (such as > i386, IA64, PPC, and SPARC) that respect data dependencies on the read side. > A smp_read_barrier_depends() primitive has been added to the Linux 2.6 kernel > to eliminate overhead on these systems." > -- From Chapter 7.1 of > Written by Paul E. McKenney > > Thanks > Miao Thank you for your comment. I'll update soon. Thanks, Yasauaki Ishimatsu > >> ev = kmap(page); >> copy_ret = copy_to_user(event + ret, ev + pos, >> sizeof(*ev) * avail); >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> Please read the FAQ at http://www.tux.org/lkml/ >> > -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yasuaki Ishimatsu Subject: [Update v2 PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Date: Wed, 5 Mar 2014 16:17:42 +0900 Message-ID: <5316CF96.20902@jp.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <530F2A2D.50307@jp.fujitsu.com> <530F3327.8020205@jp.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset="ISO-2022-JP" Content-Transfer-Encoding: 7bit Cc: , , , , , , , , To: Tang Chen , Return-path: In-Reply-To: <530F3327.8020205@jp.fujitsu.com> Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org When doing aio ring page migration, we migrated the page, and update ctx->ring_pages[]. Like the following: aio_migratepage() |-> migrate_page_copy(new, old) | ...... /* Need barrier here */ |-> ctx->ring_pages[idx] = new Actually, we need a memory barrier between these two operations. Otherwise, if ctx->ring_pages[] is updated before memory copy due to the compiler optimization, other processes may have an opportunity to access to the not fully initialized new ring page. So add a wmb and rmb to synchronize them. Signed-off-by: Tang Chen Signed-off-by: Yasuaki Ishimatsu --- v2: change smp_rmb() to smp_read_barrier_depends(). Thanks Miao. --- fs/aio.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 50c089c..98c7f2d 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, pgoff_t idx; spin_lock_irqsave(&ctx->completion_lock, flags); migrate_page_copy(new, old); + + /* + * Ensure memory copy is finished before updating + * ctx->ring_pages[]. Otherwise other processes may access to + * new ring pages which are not fully initialized. + */ + smp_wmb(); + idx = old->index; if (idx < (pgoff_t)ctx->nr_pages) { /* And only do the move if things haven't changed */ @@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx, page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; pos %= AIO_EVENTS_PER_PAGE; + /* + * Ensure that the page's data was copied from old one by + * aio_migratepage(). + */ + smp_read_barrier_depends(); + ev = kmap(page); copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * avail); -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 From: Jeff Moyer Subject: Re: [PATCH 1/2] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages. Date: Wed, 05 Mar 2014 14:23:58 -0500 Message-ID: References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-2-git-send-email-tangchen@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: viro@zeniv.linux.org.uk, bcrl@kvack.org, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org To: Tang Chen Return-path: In-Reply-To: <1393497616-16428-2-git-send-email-tangchen@cn.fujitsu.com> (Tang Chen's message of "Thu, 27 Feb 2014 18:40:15 +0800") Sender: owner-linux-aio@kvack.org List-Id: linux-fsdevel.vger.kernel.org Tang Chen writes: > AIO ring page migration has been implemented by the following patch: > > https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/fs/aio.c?id=36bc08cc01709b4a9bb563b35aa530241ddc63e3 > > In this patch, ctx->completion_lock is used to prevent other processes > from accessing the ring page being migrated. > > But in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring(), > when writing to the ring page, they didn't take ctx->completion_lock. > > As a result, for example, we have the following problem: > > thread 1 | thread 2 > | > aio_migratepage() | > |-> take ctx->completion_lock | > |-> migrate_page_copy(new, old) | > | *NOW*, ctx->ring_pages[idx] == old | > | > | *NOW*, ctx->ring_pages[idx] == old > | aio_read_events_ring() > | |-> ring = kmap_atomic(ctx->ring_pages[0]) > | |-> ring->head = head; *HERE, write to the old ring page* > | |-> kunmap_atomic(ring); > | > |-> ctx->ring_pages[idx] = new | > | *BUT NOW*, the content of | > | ring_pages[idx] is old. | > |-> release ctx->completion_lock | > > As above, the new ring page will not be updated. > > The solution is taking ctx->completion_lock in thread 2, which means, > in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring() when > writing to ring pages. Thanks for the good explanation and for adding comments to the code. This looks right to me. I guess the only issue I have with it is that the code paths changed don't run in interrupt context, so I think you could just use spin_lock_irq. That's not a big deal, though. Reviewed-by: Jeff Moyer > Reported-by: Yasuaki Ishimatsu > Signed-off-by: Tang Chen > --- > fs/aio.c | 33 +++++++++++++++++++++++++++++++++ > 1 file changed, 33 insertions(+) > > diff --git a/fs/aio.c b/fs/aio.c > index 062a5f6..50c089c 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -366,6 +366,7 @@ static int aio_setup_ring(struct kioctx *ctx) > int nr_pages; > int i; > struct file *file; > + unsigned long flags; > > /* Compensate for the ring buffer's head/tail overlap entry */ > nr_events += 2; /* 1 is required, 2 for good luck */ > @@ -437,6 +438,14 @@ static int aio_setup_ring(struct kioctx *ctx) > ctx->user_id = ctx->mmap_base; > ctx->nr_events = nr_events; /* trusted copy */ > > + /* > + * The aio ring pages are user space pages, so they can be migrated. > + * When writing to an aio ring page, we should ensure the page is not > + * being migrated. Aio page migration procedure is protected by > + * ctx->completion_lock, so we add this lock here. > + */ > + spin_lock_irqsave(&ctx->completion_lock, flags); > + > ring = kmap_atomic(ctx->ring_pages[0]); > ring->nr = nr_events; /* user copy */ > ring->id = ~0U; > @@ -448,6 +457,8 @@ static int aio_setup_ring(struct kioctx *ctx) > kunmap_atomic(ring); > flush_dcache_page(ctx->ring_pages[0]); > > + spin_unlock_irqrestore(&ctx->completion_lock, flags); > + > return 0; > } > > @@ -542,6 +553,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) > unsigned i, new_nr; > struct kioctx_table *table, *old; > struct aio_ring *ring; > + unsigned long flags; > > spin_lock(&mm->ioctx_lock); > rcu_read_lock(); > @@ -556,9 +568,19 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) > rcu_read_unlock(); > spin_unlock(&mm->ioctx_lock); > > + /* > + * Accessing ring pages must be done > + * holding ctx->completion_lock to > + * prevent aio ring page migration > + * procedure from migrating ring pages. > + */ > + spin_lock_irqsave(&ctx->completion_lock, > + flags); > ring = kmap_atomic(ctx->ring_pages[0]); > ring->id = ctx->id; > kunmap_atomic(ring); > + spin_unlock_irqrestore( > + &ctx->completion_lock, flags); > return 0; > } > > @@ -1021,6 +1043,7 @@ static long aio_read_events_ring(struct kioctx *ctx, > unsigned head, tail, pos; > long ret = 0; > int copy_ret; > + unsigned long flags; > > mutex_lock(&ctx->ring_lock); > > @@ -1066,11 +1089,21 @@ static long aio_read_events_ring(struct kioctx *ctx, > head %= ctx->nr_events; > } > > + /* > + * The aio ring pages are user space pages, so they can be migrated. > + * When writing to an aio ring page, we should ensure the page is not > + * being migrated. Aio page migration procedure is protected by > + * ctx->completion_lock, so we add this lock here. > + */ > + spin_lock_irqsave(&ctx->completion_lock, flags); > + > ring = kmap_atomic(ctx->ring_pages[0]); > ring->head = head; > kunmap_atomic(ring); > flush_dcache_page(ctx->ring_pages[0]); > > + spin_unlock_irqrestore(&ctx->completion_lock, flags); > + > pr_debug("%li h%u t%u\n", ret, head, tail); > > put_reqs_available(ctx, ret); -- To unsubscribe, send a message with 'unsubscribe linux-aio' in the body to majordomo@kvack.org. For more info on Linux AIO, see: http://www.kvack.org/aio/ Don't email: aart@kvack.org From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752272AbaB0KiR (ORCPT ); Thu, 27 Feb 2014 05:38:17 -0500 Received: from cn.fujitsu.com ([222.73.24.84]:55710 "EHLO song.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1751202AbaB0KiP (ORCPT ); Thu, 27 Feb 2014 05:38:15 -0500 X-IronPort-AV: E=Sophos;i="4.97,553,1389715200"; d="scan'208";a="9615208" From: Tang Chen To: viro@zeniv.linux.org.uk, bcrl@kvack.org, jmoyer@redhat.com, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com Cc: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org Subject: [PATCH 0/2] Bug fix in aio ring page migration. Date: Thu, 27 Feb 2014 18:40:14 +0800 Message-Id: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> X-Mailer: git-send-email 1.7.11.7 X-MIMETrack: Itemize by SMTP Server on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2014/02/27 18:35:02, Serialize by Router on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2014/02/27 18:35:03, Serialize complete at 2014/02/27 18:35:03 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org This patch-set fixes the following two problems: 1. Need to use ctx->completion_lock to protect ring pages from being mis-written while migration. 2. Need memory barrier to ensure memory copy is done before ctx->ring_pages[] is updated. NOTE: AIO ring page migration was implemented since Linux 3.12. So we need to merge these two patches into 3.12 stable tree. Tang Chen (2): aio, memory-hotplug: Fix confliction when migrating and accessing ring pages. aio, mem-hotplug: Add memory barrier to aio ring page migration. fs/aio.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) -- 1.8.3.1 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752390AbaB0KiX (ORCPT ); Thu, 27 Feb 2014 05:38:23 -0500 Received: from cn.fujitsu.com ([222.73.24.84]:33132 "EHLO song.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1752222AbaB0KiS (ORCPT ); Thu, 27 Feb 2014 05:38:18 -0500 X-IronPort-AV: E=Sophos;i="4.97,553,1389715200"; d="scan'208";a="9615210" From: Tang Chen To: viro@zeniv.linux.org.uk, bcrl@kvack.org, jmoyer@redhat.com, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com Cc: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org Subject: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Date: Thu, 27 Feb 2014 18:40:16 +0800 Message-Id: <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> X-Mailer: git-send-email 1.7.11.7 In-Reply-To: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> X-MIMETrack: Itemize by SMTP Server on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2014/02/27 18:35:02, Serialize by Router on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2014/02/27 18:35:04, Serialize complete at 2014/02/27 18:35:04 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org When doing aio ring page migration, we migrated the page, and update ctx->ring_pages[]. Like the following: aio_migratepage() |-> migrate_page_copy(new, old) | ...... /* Need barrier here */ |-> ctx->ring_pages[idx] = new Actually, we need a memory barrier between these two operations. Otherwise, if ctx->ring_pages[] is updated before memory copy due to the compiler optimization, other processes may have an opportunity to access to the not fully initialized new ring page. So add a wmb to synchronize them. Reported-by: Yasuaki Ishimatsu Signed-off-by: Tang Chen --- fs/aio.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 50c089c..f0ed838 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, pgoff_t idx; spin_lock_irqsave(&ctx->completion_lock, flags); migrate_page_copy(new, old); + + /* + * Ensure memory copy is finished before updating + * ctx->ring_pages[]. Otherwise other processes may access to + * new ring pages which are not fully initialized. + */ + smp_wmb(); + idx = old->index; if (idx < (pgoff_t)ctx->nr_pages) { /* And only do the move if things haven't changed */ -- 1.8.3.1 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752334AbaB0KiV (ORCPT ); Thu, 27 Feb 2014 05:38:21 -0500 Received: from cn.fujitsu.com ([222.73.24.84]:55710 "EHLO song.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1752125AbaB0KiS (ORCPT ); Thu, 27 Feb 2014 05:38:18 -0500 X-IronPort-AV: E=Sophos;i="4.97,553,1389715200"; d="scan'208";a="9615209" From: Tang Chen To: viro@zeniv.linux.org.uk, bcrl@kvack.org, jmoyer@redhat.com, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com Cc: linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org Subject: [PATCH 1/2] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages. Date: Thu, 27 Feb 2014 18:40:15 +0800 Message-Id: <1393497616-16428-2-git-send-email-tangchen@cn.fujitsu.com> X-Mailer: git-send-email 1.7.11.7 In-Reply-To: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> X-MIMETrack: Itemize by SMTP Server on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2014/02/27 18:35:02, Serialize by Router on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2014/02/27 18:35:03, Serialize complete at 2014/02/27 18:35:03 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org AIO ring page migration has been implemented by the following patch: https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/fs/aio.c?id=36bc08cc01709b4a9bb563b35aa530241ddc63e3 In this patch, ctx->completion_lock is used to prevent other processes from accessing the ring page being migrated. But in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring(), when writing to the ring page, they didn't take ctx->completion_lock. As a result, for example, we have the following problem: thread 1 | thread 2 | aio_migratepage() | |-> take ctx->completion_lock | |-> migrate_page_copy(new, old) | | *NOW*, ctx->ring_pages[idx] == old | | | *NOW*, ctx->ring_pages[idx] == old | aio_read_events_ring() | |-> ring = kmap_atomic(ctx->ring_pages[0]) | |-> ring->head = head; *HERE, write to the old ring page* | |-> kunmap_atomic(ring); | |-> ctx->ring_pages[idx] = new | | *BUT NOW*, the content of | | ring_pages[idx] is old. | |-> release ctx->completion_lock | As above, the new ring page will not be updated. The solution is taking ctx->completion_lock in thread 2, which means, in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring() when writing to ring pages. Reported-by: Yasuaki Ishimatsu Signed-off-by: Tang Chen --- fs/aio.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 062a5f6..50c089c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -366,6 +366,7 @@ static int aio_setup_ring(struct kioctx *ctx) int nr_pages; int i; struct file *file; + unsigned long flags; /* Compensate for the ring buffer's head/tail overlap entry */ nr_events += 2; /* 1 is required, 2 for good luck */ @@ -437,6 +438,14 @@ static int aio_setup_ring(struct kioctx *ctx) ctx->user_id = ctx->mmap_base; ctx->nr_events = nr_events; /* trusted copy */ + /* + * The aio ring pages are user space pages, so they can be migrated. + * When writing to an aio ring page, we should ensure the page is not + * being migrated. Aio page migration procedure is protected by + * ctx->completion_lock, so we add this lock here. + */ + spin_lock_irqsave(&ctx->completion_lock, flags); + ring = kmap_atomic(ctx->ring_pages[0]); ring->nr = nr_events; /* user copy */ ring->id = ~0U; @@ -448,6 +457,8 @@ static int aio_setup_ring(struct kioctx *ctx) kunmap_atomic(ring); flush_dcache_page(ctx->ring_pages[0]); + spin_unlock_irqrestore(&ctx->completion_lock, flags); + return 0; } @@ -542,6 +553,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) unsigned i, new_nr; struct kioctx_table *table, *old; struct aio_ring *ring; + unsigned long flags; spin_lock(&mm->ioctx_lock); rcu_read_lock(); @@ -556,9 +568,19 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); + /* + * Accessing ring pages must be done + * holding ctx->completion_lock to + * prevent aio ring page migration + * procedure from migrating ring pages. + */ + spin_lock_irqsave(&ctx->completion_lock, + flags); ring = kmap_atomic(ctx->ring_pages[0]); ring->id = ctx->id; kunmap_atomic(ring); + spin_unlock_irqrestore( + &ctx->completion_lock, flags); return 0; } @@ -1021,6 +1043,7 @@ static long aio_read_events_ring(struct kioctx *ctx, unsigned head, tail, pos; long ret = 0; int copy_ret; + unsigned long flags; mutex_lock(&ctx->ring_lock); @@ -1066,11 +1089,21 @@ static long aio_read_events_ring(struct kioctx *ctx, head %= ctx->nr_events; } + /* + * The aio ring pages are user space pages, so they can be migrated. + * When writing to an aio ring page, we should ensure the page is not + * being migrated. Aio page migration procedure is protected by + * ctx->completion_lock, so we add this lock here. + */ + spin_lock_irqsave(&ctx->completion_lock, flags); + ring = kmap_atomic(ctx->ring_pages[0]); ring->head = head; kunmap_atomic(ring); flush_dcache_page(ctx->ring_pages[0]); + spin_unlock_irqrestore(&ctx->completion_lock, flags); + pr_debug("%li h%u t%u\n", ret, head, tail); put_reqs_available(ctx, ret); -- 1.8.3.1 From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752670AbaB0MGw (ORCPT ); Thu, 27 Feb 2014 07:06:52 -0500 Received: from fgwmail6.fujitsu.co.jp ([192.51.44.36]:59553 "EHLO fgwmail6.fujitsu.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751048AbaB0MGt (ORCPT ); Thu, 27 Feb 2014 07:06:49 -0500 X-SecurityPolicyCheck: OK by SHieldMailChecker v2.0.1 X-SHieldMailCheckerPolicyVersion: FJ-ISEC-20120718-3 Message-ID: <530F2A2D.50307@jp.fujitsu.com> Date: Thu, 27 Feb 2014 21:06:05 +0900 From: Yasuaki Ishimatsu User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20130801 Thunderbird/17.0.8 MIME-Version: 1.0 To: Tang Chen CC: , , , , , , , , Subject: Re: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> In-Reply-To: <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> Content-Type: text/plain; charset="ISO-2022-JP" Content-Transfer-Encoding: 7bit X-SecurityPolicyCheck-GC: OK by FENCE-Mail Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Hi Tang, (2014/02/27 19:40), Tang Chen wrote: > When doing aio ring page migration, we migrated the page, and update > ctx->ring_pages[]. Like the following: > > aio_migratepage() > |-> migrate_page_copy(new, old) > | ...... /* Need barrier here */ > |-> ctx->ring_pages[idx] = new > > Actually, we need a memory barrier between these two operations. > Otherwise, if ctx->ring_pages[] is updated before memory copy due to > the compiler optimization, other processes may have an opportunity > to access to the not fully initialized new ring page. > > So add a wmb to synchronize them. > > Reported-by: Yasuaki Ishimatsu > Signed-off-by: Tang Chen > --- > fs/aio.c | 8 ++++++++ > 1 file changed, 8 insertions(+) > > diff --git a/fs/aio.c b/fs/aio.c > index 50c089c..f0ed838 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, > pgoff_t idx; > spin_lock_irqsave(&ctx->completion_lock, flags); > migrate_page_copy(new, old); > + > + /* > + * Ensure memory copy is finished before updating > + * ctx->ring_pages[]. Otherwise other processes may access to > + * new ring pages which are not fully initialized. > + */ > + smp_wmb(); > + If you put smp_wmb() here, you should put smp_rmb() before kmap() in aio_read_events_ring(). Thanks, Yasuaki Ishimatsu > idx = old->index; > if (idx < (pgoff_t)ctx->nr_pages) { > /* And only do the move if things haven't changed */ > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752437AbaB0MpA (ORCPT ); Thu, 27 Feb 2014 07:45:00 -0500 Received: from fgwmail6.fujitsu.co.jp ([192.51.44.36]:41443 "EHLO fgwmail6.fujitsu.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750904AbaB0Mo6 (ORCPT ); Thu, 27 Feb 2014 07:44:58 -0500 X-SecurityPolicyCheck: OK by SHieldMailChecker v2.0.1 X-SHieldMailCheckerPolicyVersion: FJ-ISEC-20120718-3 Message-ID: <530F3327.8020205@jp.fujitsu.com> Date: Thu, 27 Feb 2014 21:44:23 +0900 From: Yasuaki Ishimatsu User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20130801 Thunderbird/17.0.8 MIME-Version: 1.0 To: Tang Chen , CC: , , , , , , , Subject: [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <530F2A2D.50307@jp.fujitsu.com> In-Reply-To: <530F2A2D.50307@jp.fujitsu.com> Content-Type: text/plain; charset="ISO-2022-JP" Content-Transfer-Encoding: 7bit X-SecurityPolicyCheck-GC: OK by FENCE-Mail Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org When doing aio ring page migration, we migrated the page, and update ctx->ring_pages[]. Like the following: aio_migratepage() |-> migrate_page_copy(new, old) | ...... /* Need barrier here */ |-> ctx->ring_pages[idx] = new Actually, we need a memory barrier between these two operations. Otherwise, if ctx->ring_pages[] is updated before memory copy due to the compiler optimization, other processes may have an opportunity to access to the not fully initialized new ring page. So add a wmb and rmb to synchronize them. Signed-off-by: Tang Chen Signed-off-by: Yasuaki Ishimatsu --- fs/aio.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 50c089c..8d9b82b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, pgoff_t idx; spin_lock_irqsave(&ctx->completion_lock, flags); migrate_page_copy(new, old); + + /* + * Ensure memory copy is finished before updating + * ctx->ring_pages[]. Otherwise other processes may access to + * new ring pages which are not fully initialized. + */ + smp_wmb(); + idx = old->index; if (idx < (pgoff_t)ctx->nr_pages) { /* And only do the move if things haven't changed */ @@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx, page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; pos %= AIO_EVENTS_PER_PAGE; + /* + * Ensure that the page's data was copied from old one by + * aio_migratepage(). + */ + smp_rmb(); + ev = kmap(page); copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * avail); From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752750AbaB0O5e (ORCPT ); Thu, 27 Feb 2014 09:57:34 -0500 Received: from kanga.kvack.org ([205.233.56.17]:54522 "EHLO kanga.kvack.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750979AbaB0O5c (ORCPT ); Thu, 27 Feb 2014 09:57:32 -0500 Date: Thu, 27 Feb 2014 09:57:30 -0500 From: Benjamin LaHaise To: Tang Chen Cc: viro@zeniv.linux.org.uk, jmoyer@redhat.com, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org Subject: Re: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. Message-ID: <20140227145730.GA639@kvack.org> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> User-Agent: Mutt/1.4.2.2i Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Thu, Feb 27, 2014 at 06:40:16PM +0800, Tang Chen wrote: > When doing aio ring page migration, we migrated the page, and update > ctx->ring_pages[]. Like the following: > > aio_migratepage() > |-> migrate_page_copy(new, old) > | ...... /* Need barrier here */ > |-> ctx->ring_pages[idx] = new > > Actually, we need a memory barrier between these two operations. > Otherwise, if ctx->ring_pages[] is updated before memory copy due to > the compiler optimization, other processes may have an opportunity > to access to the not fully initialized new ring page. > > So add a wmb to synchronize them. The smp_wmb() is not needed after you added the taking of ctx->completion_lock lock since all accesses to ring_pages is then protected by the spinlock. Why are you adding this then? Or have you missed adding the lock somewhere? Also, if you've changed the patch, it is customary to add a "v2" somewhere in the patch title so that I have some idea what version of the patch should be applied. -ben > Reported-by: Yasuaki Ishimatsu > Signed-off-by: Tang Chen > --- > fs/aio.c | 8 ++++++++ > 1 file changed, 8 insertions(+) > > diff --git a/fs/aio.c b/fs/aio.c > index 50c089c..f0ed838 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, > pgoff_t idx; > spin_lock_irqsave(&ctx->completion_lock, flags); > migrate_page_copy(new, old); > + > + /* > + * Ensure memory copy is finished before updating > + * ctx->ring_pages[]. Otherwise other processes may access to > + * new ring pages which are not fully initialized. > + */ > + smp_wmb(); > + > idx = old->index; > if (idx < (pgoff_t)ctx->nr_pages) { > /* And only do the move if things haven't changed */ > -- > 1.8.3.1 -- "Thought is the essence of where you are now." From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751535AbaB1H0q (ORCPT ); Fri, 28 Feb 2014 02:26:46 -0500 Received: from fgwmail6.fujitsu.co.jp ([192.51.44.36]:57397 "EHLO fgwmail6.fujitsu.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751157AbaB1H0o (ORCPT ); Fri, 28 Feb 2014 02:26:44 -0500 X-SecurityPolicyCheck: OK by SHieldMailChecker v2.0.1 X-SHieldMailCheckerPolicyVersion: FJ-ISEC-20120718-3 Message-ID: <531039F3.4050707@jp.fujitsu.com> Date: Fri, 28 Feb 2014 16:25:39 +0900 From: Yasuaki Ishimatsu User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20130801 Thunderbird/17.0.8 MIME-Version: 1.0 To: Benjamin LaHaise CC: Tang Chen , , , , , , , , Subject: Re: [PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <20140227145730.GA639@kvack.org> In-Reply-To: <20140227145730.GA639@kvack.org> Content-Type: text/plain; charset="ISO-8859-1"; format=flowed Content-Transfer-Encoding: 7bit X-SecurityPolicyCheck-GC: OK by FENCE-Mail Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org (2014/02/27 23:57), Benjamin LaHaise wrote: > On Thu, Feb 27, 2014 at 06:40:16PM +0800, Tang Chen wrote: >> When doing aio ring page migration, we migrated the page, and update >> ctx->ring_pages[]. Like the following: >> >> aio_migratepage() >> |-> migrate_page_copy(new, old) >> | ...... /* Need barrier here */ >> |-> ctx->ring_pages[idx] = new >> >> Actually, we need a memory barrier between these two operations. >> Otherwise, if ctx->ring_pages[] is updated before memory copy due to >> the compiler optimization, other processes may have an opportunity >> to access to the not fully initialized new ring page. >> >> So add a wmb to synchronize them. > > The smp_wmb() is not needed after you added the taking of ctx->completion_lock > lock since all accesses to ring_pages is then protected by the spinlock. > Why are you adding this then? Or have you missed adding the lock somewhere? Pleaes see following thread. It's a updated patch. https://lkml.org/lkml/2014/2/27/237 aio_read_events_ring() accesses ring_pages without locking ctx-completion_lock. If copying old page'date to new page runs after new page was set to ctx->ring_pages by changing order, aio_read_events_ring() may not work correctly. So smp_wmb() and smp_rmb() is needed. Thanks, Yasuaki Ishimatsu > Also, if you've changed the patch, it is customary to add a "v2" somewhere in > the patch title so that I have some idea what version of the patch should be > applied. > > -ben > >> Reported-by: Yasuaki Ishimatsu >> Signed-off-by: Tang Chen >> --- >> fs/aio.c | 8 ++++++++ >> 1 file changed, 8 insertions(+) >> >> diff --git a/fs/aio.c b/fs/aio.c >> index 50c089c..f0ed838 100644 >> --- a/fs/aio.c >> +++ b/fs/aio.c >> @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, >> pgoff_t idx; >> spin_lock_irqsave(&ctx->completion_lock, flags); >> migrate_page_copy(new, old); >> + >> + /* >> + * Ensure memory copy is finished before updating >> + * ctx->ring_pages[]. Otherwise other processes may access to >> + * new ring pages which are not fully initialized. >> + */ >> + smp_wmb(); >> + >> idx = old->index; >> if (idx < (pgoff_t)ctx->nr_pages) { >> /* And only do the move if things haven't changed */ >> -- >> 1.8.3.1 > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756077AbaCDGPL (ORCPT ); Tue, 4 Mar 2014 01:15:11 -0500 Received: from cn.fujitsu.com ([222.73.24.84]:16611 "EHLO song.cn.fujitsu.com" rhost-flags-OK-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1751431AbaCDGPJ (ORCPT ); Tue, 4 Mar 2014 01:15:09 -0500 X-IronPort-AV: E=Sophos;i="4.97,583,1389715200"; d="scan'208";a="9641577" Message-ID: <53156621.60900@cn.fujitsu.com> Date: Tue, 04 Mar 2014 13:35:29 +0800 From: Miao Xie Reply-To: miaox@cn.fujitsu.com User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.3.0 MIME-Version: 1.0 To: Yasuaki Ishimatsu , Tang Chen , bcrl@kvack.org CC: viro@zeniv.linux.org.uk, jmoyer@redhat.com, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, guz.fnst@cn.fujitsu.com, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org Subject: Re: [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <530F2A2D.50307@jp.fujitsu.com> <530F3327.8020205@jp.fujitsu.com> In-Reply-To: <530F3327.8020205@jp.fujitsu.com> X-MIMETrack: Itemize by SMTP Server on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2014/03/04 13:31:16, Serialize by Router on mailserver/fnst(Release 8.5.3|September 15, 2011) at 2014/03/04 13:31:37, Serialize complete at 2014/03/04 13:31:37 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset=ISO-2022-JP Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On thu, 27 Feb 2014 21:44:23 +0900, Yasuaki Ishimatsu wrote: > When doing aio ring page migration, we migrated the page, and update > ctx->ring_pages[]. Like the following: > > aio_migratepage() > |-> migrate_page_copy(new, old) > | ...... /* Need barrier here */ > |-> ctx->ring_pages[idx] = new > > Actually, we need a memory barrier between these two operations. > Otherwise, if ctx->ring_pages[] is updated before memory copy due to > the compiler optimization, other processes may have an opportunity > to access to the not fully initialized new ring page. > > So add a wmb and rmb to synchronize them. > > Signed-off-by: Tang Chen > Signed-off-by: Yasuaki Ishimatsu > > --- > fs/aio.c | 14 ++++++++++++++ > 1 file changed, 14 insertions(+) > > diff --git a/fs/aio.c b/fs/aio.c > index 50c089c..8d9b82b 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, > pgoff_t idx; > spin_lock_irqsave(&ctx->completion_lock, flags); > migrate_page_copy(new, old); > + > + /* > + * Ensure memory copy is finished before updating > + * ctx->ring_pages[]. Otherwise other processes may access to > + * new ring pages which are not fully initialized. > + */ > + smp_wmb(); > + > idx = old->index; > if (idx < (pgoff_t)ctx->nr_pages) { > /* And only do the move if things haven't changed */ > @@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx, > page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; > pos %= AIO_EVENTS_PER_PAGE; > > + /* > + * Ensure that the page's data was copied from old one by > + * aio_migratepage(). > + */ > + smp_rmb(); > + smp_read_barrier_depends() is better. "One could place an A smp_rmb() primitive between the pointer fetch and dereference. However, this imposes unneeded overhead on systems (such as i386, IA64, PPC, and SPARC) that respect data dependencies on the read side. A smp_read_barrier_depends() primitive has been added to the Linux 2.6 kernel to eliminate overhead on these systems." -- From Chapter 7.1 of Written by Paul E. McKenney Thanks Miao > ev = kmap(page); > copy_ret = copy_to_user(event + ret, ev + pos, > sizeof(*ev) * avail); > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756043AbaCEDFH (ORCPT ); Tue, 4 Mar 2014 22:05:07 -0500 Received: from mail-oa0-f41.google.com ([209.85.219.41]:44749 "EHLO mail-oa0-f41.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754190AbaCEDFD (ORCPT ); Tue, 4 Mar 2014 22:05:03 -0500 MIME-Version: 1.0 In-Reply-To: <53156621.60900@cn.fujitsu.com> References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <530F2A2D.50307@jp.fujitsu.com> <530F3327.8020205@jp.fujitsu.com> <53156621.60900@cn.fujitsu.com> From: KOSAKI Motohiro Date: Tue, 4 Mar 2014 22:04:42 -0500 Message-ID: Subject: Re: [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. To: Miao Xie Cc: Yasuaki Ishimatsu , Tang Chen , Benjamin LaHaise , Al Viro , Jeff Moyer , guz.fnst@cn.fujitsu.com, "linux-fsdevel@vger.kernel.org" , linux-aio@kvack.org, LKML Content-Type: text/plain; charset=ISO-8859-1 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org >> + /* >> + * Ensure that the page's data was copied from old one by >> + * aio_migratepage(). >> + */ >> + smp_rmb(); >> + > > smp_read_barrier_depends() is better. > > "One could place an A smp_rmb() primitive between the pointer fetch and > dereference. However, this imposes unneeded overhead on systems (such as > i386, IA64, PPC, and SPARC) that respect data dependencies on the read side. > A smp_read_barrier_depends() primitive has been added to the Linux 2.6 kernel > to eliminate overhead on these systems." > -- From Chapter 7.1 of > Written by Paul E. McKenney > Right. The document of memory barrier described this situation. see https://www.kernel.org/doc/Documentation/memory-barriers.txt CPU 1 CPU 2 =============== =============== { M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 } M[1] = 4; ACCESS_ONCE(P) = 1 Q = ACCESS_ONCE(P); D = M[Q]; From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932102AbaCEHAF (ORCPT ); Wed, 5 Mar 2014 02:00:05 -0500 Received: from fgwmail5.fujitsu.co.jp ([192.51.44.35]:41651 "EHLO fgwmail5.fujitsu.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751667AbaCEHAB (ORCPT ); Wed, 5 Mar 2014 02:00:01 -0500 X-SecurityPolicyCheck: OK by SHieldMailChecker v2.0.1 X-SHieldMailCheckerPolicyVersion: FJ-ISEC-20120718-3 Message-ID: <5316CB4B.8060905@jp.fujitsu.com> Date: Wed, 5 Mar 2014 15:59:23 +0900 From: Yasuaki Ishimatsu User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20130801 Thunderbird/17.0.8 MIME-Version: 1.0 To: CC: Tang Chen , , , , , , , , , Subject: Re: [Update PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <530F2A2D.50307@jp.fujitsu.com> <530F3327.8020205@jp.fujitsu.com> <53156621.60900@cn.fujitsu.com> In-Reply-To: <53156621.60900@cn.fujitsu.com> Content-Type: text/plain; charset="ISO-2022-JP" Content-Transfer-Encoding: 7bit X-SecurityPolicyCheck-GC: OK by FENCE-Mail Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org (2014/03/04 14:35), Miao Xie wrote: > On thu, 27 Feb 2014 21:44:23 +0900, Yasuaki Ishimatsu wrote: >> When doing aio ring page migration, we migrated the page, and update >> ctx->ring_pages[]. Like the following: >> >> aio_migratepage() >> |-> migrate_page_copy(new, old) >> | ...... /* Need barrier here */ >> |-> ctx->ring_pages[idx] = new >> >> Actually, we need a memory barrier between these two operations. >> Otherwise, if ctx->ring_pages[] is updated before memory copy due to >> the compiler optimization, other processes may have an opportunity >> to access to the not fully initialized new ring page. >> >> So add a wmb and rmb to synchronize them. >> >> Signed-off-by: Tang Chen >> Signed-off-by: Yasuaki Ishimatsu >> >> --- >> fs/aio.c | 14 ++++++++++++++ >> 1 file changed, 14 insertions(+) >> >> diff --git a/fs/aio.c b/fs/aio.c >> index 50c089c..8d9b82b 100644 >> --- a/fs/aio.c >> +++ b/fs/aio.c >> @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, >> pgoff_t idx; >> spin_lock_irqsave(&ctx->completion_lock, flags); >> migrate_page_copy(new, old); >> + >> + /* >> + * Ensure memory copy is finished before updating >> + * ctx->ring_pages[]. Otherwise other processes may access to >> + * new ring pages which are not fully initialized. >> + */ >> + smp_wmb(); >> + >> idx = old->index; >> if (idx < (pgoff_t)ctx->nr_pages) { >> /* And only do the move if things haven't changed */ >> @@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx, >> page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; >> pos %= AIO_EVENTS_PER_PAGE; >> >> + /* >> + * Ensure that the page's data was copied from old one by >> + * aio_migratepage(). >> + */ >> + smp_rmb(); >> + > > smp_read_barrier_depends() is better. > > "One could place an A smp_rmb() primitive between the pointer fetch and > dereference. However, this imposes unneeded overhead on systems (such as > i386, IA64, PPC, and SPARC) that respect data dependencies on the read side. > A smp_read_barrier_depends() primitive has been added to the Linux 2.6 kernel > to eliminate overhead on these systems." > -- From Chapter 7.1 of > Written by Paul E. McKenney > > Thanks > Miao Thank you for your comment. I'll update soon. Thanks, Yasauaki Ishimatsu > >> ev = kmap(page); >> copy_ret = copy_to_user(event + ret, ev + pos, >> sizeof(*ev) * avail); >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> Please read the FAQ at http://www.tux.org/lkml/ >> > From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752281AbaCEHSV (ORCPT ); Wed, 5 Mar 2014 02:18:21 -0500 Received: from fgwmail6.fujitsu.co.jp ([192.51.44.36]:40724 "EHLO fgwmail6.fujitsu.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751249AbaCEHST (ORCPT ); Wed, 5 Mar 2014 02:18:19 -0500 X-SecurityPolicyCheck: OK by SHieldMailChecker v2.0.1 X-SHieldMailCheckerPolicyVersion: FJ-ISEC-20120718-3 Message-ID: <5316CF96.20902@jp.fujitsu.com> Date: Wed, 5 Mar 2014 16:17:42 +0900 From: Yasuaki Ishimatsu User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20130801 Thunderbird/17.0.8 MIME-Version: 1.0 To: Tang Chen , CC: , , , , , , , , Subject: [Update v2 PATCH 2/2] aio, mem-hotplug: Add memory barrier to aio ring page migration. References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-3-git-send-email-tangchen@cn.fujitsu.com> <530F2A2D.50307@jp.fujitsu.com> <530F3327.8020205@jp.fujitsu.com> In-Reply-To: <530F3327.8020205@jp.fujitsu.com> Content-Type: text/plain; charset="ISO-2022-JP" Content-Transfer-Encoding: 7bit X-SecurityPolicyCheck-GC: OK by FENCE-Mail Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org When doing aio ring page migration, we migrated the page, and update ctx->ring_pages[]. Like the following: aio_migratepage() |-> migrate_page_copy(new, old) | ...... /* Need barrier here */ |-> ctx->ring_pages[idx] = new Actually, we need a memory barrier between these two operations. Otherwise, if ctx->ring_pages[] is updated before memory copy due to the compiler optimization, other processes may have an opportunity to access to the not fully initialized new ring page. So add a wmb and rmb to synchronize them. Signed-off-by: Tang Chen Signed-off-by: Yasuaki Ishimatsu --- v2: change smp_rmb() to smp_read_barrier_depends(). Thanks Miao. --- fs/aio.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/aio.c b/fs/aio.c index 50c089c..98c7f2d 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -327,6 +327,14 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, pgoff_t idx; spin_lock_irqsave(&ctx->completion_lock, flags); migrate_page_copy(new, old); + + /* + * Ensure memory copy is finished before updating + * ctx->ring_pages[]. Otherwise other processes may access to + * new ring pages which are not fully initialized. + */ + smp_wmb(); + idx = old->index; if (idx < (pgoff_t)ctx->nr_pages) { /* And only do the move if things haven't changed */ @@ -1074,6 +1082,12 @@ static long aio_read_events_ring(struct kioctx *ctx, page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]; pos %= AIO_EVENTS_PER_PAGE; + /* + * Ensure that the page's data was copied from old one by + * aio_migratepage(). + */ + smp_read_barrier_depends(); + ev = kmap(page); copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * avail); From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756889AbaCETYX (ORCPT ); Wed, 5 Mar 2014 14:24:23 -0500 Received: from mx1.redhat.com ([209.132.183.28]:14373 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753028AbaCETYV (ORCPT ); Wed, 5 Mar 2014 14:24:21 -0500 From: Jeff Moyer To: Tang Chen Cc: viro@zeniv.linux.org.uk, bcrl@kvack.org, kosaki.motohiro@gmail.com, kosaki.motohiro@jp.fujitsu.com, isimatu.yasuaki@jp.fujitsu.com, guz.fnst@cn.fujitsu.com, linux-fsdevel@vger.kernel.org, linux-aio@kvack.org, linux-kernel@vger.kernel.org Subject: Re: [PATCH 1/2] aio, memory-hotplug: Fix confliction when migrating and accessing ring pages. References: <1393497616-16428-1-git-send-email-tangchen@cn.fujitsu.com> <1393497616-16428-2-git-send-email-tangchen@cn.fujitsu.com> X-PGP-KeyID: 1F78E1B4 X-PGP-CertKey: F6FE 280D 8293 F72C 65FD 5A58 1FF8 A7CA 1F78 E1B4 X-PCLoadLetter: What the f**k does that mean? Date: Wed, 05 Mar 2014 14:23:58 -0500 In-Reply-To: <1393497616-16428-2-git-send-email-tangchen@cn.fujitsu.com> (Tang Chen's message of "Thu, 27 Feb 2014 18:40:15 +0800") Message-ID: User-Agent: Gnus/5.110011 (No Gnus v0.11) Emacs/23.1 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Tang Chen writes: > AIO ring page migration has been implemented by the following patch: > > https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/fs/aio.c?id=36bc08cc01709b4a9bb563b35aa530241ddc63e3 > > In this patch, ctx->completion_lock is used to prevent other processes > from accessing the ring page being migrated. > > But in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring(), > when writing to the ring page, they didn't take ctx->completion_lock. > > As a result, for example, we have the following problem: > > thread 1 | thread 2 > | > aio_migratepage() | > |-> take ctx->completion_lock | > |-> migrate_page_copy(new, old) | > | *NOW*, ctx->ring_pages[idx] == old | > | > | *NOW*, ctx->ring_pages[idx] == old > | aio_read_events_ring() > | |-> ring = kmap_atomic(ctx->ring_pages[0]) > | |-> ring->head = head; *HERE, write to the old ring page* > | |-> kunmap_atomic(ring); > | > |-> ctx->ring_pages[idx] = new | > | *BUT NOW*, the content of | > | ring_pages[idx] is old. | > |-> release ctx->completion_lock | > > As above, the new ring page will not be updated. > > The solution is taking ctx->completion_lock in thread 2, which means, > in aio_setup_ring(), ioctx_add_table() and aio_read_events_ring() when > writing to ring pages. Thanks for the good explanation and for adding comments to the code. This looks right to me. I guess the only issue I have with it is that the code paths changed don't run in interrupt context, so I think you could just use spin_lock_irq. That's not a big deal, though. Reviewed-by: Jeff Moyer > Reported-by: Yasuaki Ishimatsu > Signed-off-by: Tang Chen > --- > fs/aio.c | 33 +++++++++++++++++++++++++++++++++ > 1 file changed, 33 insertions(+) > > diff --git a/fs/aio.c b/fs/aio.c > index 062a5f6..50c089c 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -366,6 +366,7 @@ static int aio_setup_ring(struct kioctx *ctx) > int nr_pages; > int i; > struct file *file; > + unsigned long flags; > > /* Compensate for the ring buffer's head/tail overlap entry */ > nr_events += 2; /* 1 is required, 2 for good luck */ > @@ -437,6 +438,14 @@ static int aio_setup_ring(struct kioctx *ctx) > ctx->user_id = ctx->mmap_base; > ctx->nr_events = nr_events; /* trusted copy */ > > + /* > + * The aio ring pages are user space pages, so they can be migrated. > + * When writing to an aio ring page, we should ensure the page is not > + * being migrated. Aio page migration procedure is protected by > + * ctx->completion_lock, so we add this lock here. > + */ > + spin_lock_irqsave(&ctx->completion_lock, flags); > + > ring = kmap_atomic(ctx->ring_pages[0]); > ring->nr = nr_events; /* user copy */ > ring->id = ~0U; > @@ -448,6 +457,8 @@ static int aio_setup_ring(struct kioctx *ctx) > kunmap_atomic(ring); > flush_dcache_page(ctx->ring_pages[0]); > > + spin_unlock_irqrestore(&ctx->completion_lock, flags); > + > return 0; > } > > @@ -542,6 +553,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) > unsigned i, new_nr; > struct kioctx_table *table, *old; > struct aio_ring *ring; > + unsigned long flags; > > spin_lock(&mm->ioctx_lock); > rcu_read_lock(); > @@ -556,9 +568,19 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) > rcu_read_unlock(); > spin_unlock(&mm->ioctx_lock); > > + /* > + * Accessing ring pages must be done > + * holding ctx->completion_lock to > + * prevent aio ring page migration > + * procedure from migrating ring pages. > + */ > + spin_lock_irqsave(&ctx->completion_lock, > + flags); > ring = kmap_atomic(ctx->ring_pages[0]); > ring->id = ctx->id; > kunmap_atomic(ring); > + spin_unlock_irqrestore( > + &ctx->completion_lock, flags); > return 0; > } > > @@ -1021,6 +1043,7 @@ static long aio_read_events_ring(struct kioctx *ctx, > unsigned head, tail, pos; > long ret = 0; > int copy_ret; > + unsigned long flags; > > mutex_lock(&ctx->ring_lock); > > @@ -1066,11 +1089,21 @@ static long aio_read_events_ring(struct kioctx *ctx, > head %= ctx->nr_events; > } > > + /* > + * The aio ring pages are user space pages, so they can be migrated. > + * When writing to an aio ring page, we should ensure the page is not > + * being migrated. Aio page migration procedure is protected by > + * ctx->completion_lock, so we add this lock here. > + */ > + spin_lock_irqsave(&ctx->completion_lock, flags); > + > ring = kmap_atomic(ctx->ring_pages[0]); > ring->head = head; > kunmap_atomic(ring); > flush_dcache_page(ctx->ring_pages[0]); > > + spin_unlock_irqrestore(&ctx->completion_lock, flags); > + > pr_debug("%li h%u t%u\n", ret, head, tail); > > put_reqs_available(ctx, ret);