From: Andrea Arcangeli <andrea@suse.de>
To: Dave McCracken <dmccr@us.ibm.com>
Cc: Andrew Morton <akpm@digeo.com>,
mika.penttila@kolumbus.fi, linux-mm@kvack.org,
linux-kernel@vger.kernel.org
Subject: Re: Race between vmtruncate and mapped areas?
Date: Thu, 15 May 2003 02:49:15 +0200 [thread overview]
Message-ID: <20030515004915.GR1429@dualathlon.random> (raw)
In-Reply-To: <82240000.1052934152@baldur.austin.ibm.com>
Hi,
what do you think of this untested fix?
I wonder if vm_file is valid for all nopage operations, I think it
should, and the i_mapping as well should always exist, but in the worst
case it shouldn't be too difficult to take care of special cases
(just checking if the new_page is reserved and if the vma is VM_SPECIAL)
would eliminate most issues, shall there be any.
--- x/fs/inode.c.~1~ 2003-05-14 23:26:10.000000000 +0200
+++ x/fs/inode.c 2003-05-15 02:26:46.000000000 +0200
@@ -147,6 +147,8 @@ void inode_init_once(struct inode *inode
INIT_LIST_HEAD(&inode->i_data.clean_pages);
INIT_LIST_HEAD(&inode->i_data.dirty_pages);
INIT_LIST_HEAD(&inode->i_data.locked_pages);
+ inode->i_data.truncate_sequence1 = 0;
+ inode->i_data.truncate_sequence2 = 0;
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_dirty_buffers);
INIT_LIST_HEAD(&inode->i_dirty_data_buffers);
--- x/include/linux/fs.h.~1~ 2003-05-14 23:26:19.000000000 +0200
+++ x/include/linux/fs.h 2003-05-15 02:35:57.000000000 +0200
@@ -421,6 +421,8 @@ struct address_space {
struct vm_area_struct *i_mmap; /* list of private mappings */
struct vm_area_struct *i_mmap_shared; /* list of shared mappings */
spinlock_t i_shared_lock; /* and spinlock protecting it */
+ int truncate_sequence1; /* serialize ->nopage against truncate */
+ int truncate_sequence2; /* serialize ->nopage against truncate */
int gfp_mask; /* how to allocate the pages */
};
--- x/mm/vmscan.c.~1~ 2003-05-14 23:26:12.000000000 +0200
+++ x/mm/vmscan.c 2003-05-15 00:22:57.000000000 +0200
@@ -165,11 +165,10 @@ drop_pte:
goto drop_pte;
/*
- * Anonymous buffercache pages can be left behind by
+ * Anonymous buffercache pages can't be left behind by
* concurrent truncate and pagefault.
*/
- if (page->buffers)
- goto preserve;
+ BUG_ON(page->buffers);
/*
* This is a dirty, swappable page. First of all,
--- x/mm/memory.c.~1~ 2003-05-14 23:26:19.000000000 +0200
+++ x/mm/memory.c 2003-05-15 02:37:08.000000000 +0200
@@ -1127,6 +1127,8 @@ int vmtruncate(struct inode * inode, lof
if (inode->i_size < offset)
goto do_expand;
i_size_write(inode, offset);
+ mapping->truncate_sequence1++;
+ wmb();
spin_lock(&mapping->i_shared_lock);
if (!mapping->i_mmap && !mapping->i_mmap_shared)
goto out_unlock;
@@ -1140,6 +1142,8 @@ int vmtruncate(struct inode * inode, lof
out_unlock:
spin_unlock(&mapping->i_shared_lock);
truncate_inode_pages(mapping, offset);
+ wmb();
+ mapping->truncate_sequence2++;
goto out_truncate;
do_expand:
@@ -1335,12 +1339,20 @@ static int do_no_page(struct mm_struct *
{
struct page * new_page;
pte_t entry;
+ int truncate_sequence;
+ struct file *file;
+ struct address_space *mapping;
if (!vma->vm_ops || !vma->vm_ops->nopage)
return do_anonymous_page(mm, vma, page_table, pmd, write_access, address);
spin_unlock(&mm->page_table_lock);
pte_kunmap(page_table);
+ file = vma->vm_file;
+ mapping = file->f_dentry->d_inode->i_mapping;
+ truncate_sequence = mapping->truncate_sequence2;
+ mb();
+
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0);
if (new_page == NULL) /* no page was available -- SIGBUS */
@@ -1366,6 +1378,22 @@ static int do_no_page(struct mm_struct *
page_table = pte_offset_atomic(pmd, address);
spin_lock(&mm->page_table_lock);
+ mb(); /* spin_lock has inclusive semantics */
+ if (unlikely(truncate_sequence != mapping->truncate_sequence1)) {
+ struct inode *inode;
+
+ spin_unlock(&mm->page_table_lock);
+
+ /*
+ * Don't worthless loop here forever overloading the cpu
+ * until the truncate has completed.
+ */
+ inode = mapping->host;
+ down(&inode->i_sem);
+ up(&inode->i_sem);
+
+ goto retry;
+ }
/*
* This silly early PAGE_DIRTY setting removes a race
@@ -1388,6 +1416,7 @@ static int do_no_page(struct mm_struct *
set_pte(page_table, entry);
} else {
spin_unlock(&mm->page_table_lock);
+ retry:
pte_kunmap(page_table);
/* One of our sibling threads was faster, back out. */
page_cache_release(new_page);
Andrea
WARNING: multiple messages have this Message-ID (diff)
From: Andrea Arcangeli <andrea@suse.de>
To: Dave McCracken <dmccr@us.ibm.com>
Cc: Andrew Morton <akpm@digeo.com>,
mika.penttila@kolumbus.fi, linux-mm@kvack.org,
linux-kernel@vger.kernel.org
Subject: Re: Race between vmtruncate and mapped areas?
Date: Thu, 15 May 2003 02:49:15 +0200 [thread overview]
Message-ID: <20030515004915.GR1429@dualathlon.random> (raw)
In-Reply-To: <82240000.1052934152@baldur.austin.ibm.com>
Hi,
what do you think of this untested fix?
I wonder if vm_file is valid for all nopage operations, I think it
should, and the i_mapping as well should always exist, but in the worst
case it shouldn't be too difficult to take care of special cases
(just checking if the new_page is reserved and if the vma is VM_SPECIAL)
would eliminate most issues, shall there be any.
--- x/fs/inode.c.~1~ 2003-05-14 23:26:10.000000000 +0200
+++ x/fs/inode.c 2003-05-15 02:26:46.000000000 +0200
@@ -147,6 +147,8 @@ void inode_init_once(struct inode *inode
INIT_LIST_HEAD(&inode->i_data.clean_pages);
INIT_LIST_HEAD(&inode->i_data.dirty_pages);
INIT_LIST_HEAD(&inode->i_data.locked_pages);
+ inode->i_data.truncate_sequence1 = 0;
+ inode->i_data.truncate_sequence2 = 0;
INIT_LIST_HEAD(&inode->i_dentry);
INIT_LIST_HEAD(&inode->i_dirty_buffers);
INIT_LIST_HEAD(&inode->i_dirty_data_buffers);
--- x/include/linux/fs.h.~1~ 2003-05-14 23:26:19.000000000 +0200
+++ x/include/linux/fs.h 2003-05-15 02:35:57.000000000 +0200
@@ -421,6 +421,8 @@ struct address_space {
struct vm_area_struct *i_mmap; /* list of private mappings */
struct vm_area_struct *i_mmap_shared; /* list of shared mappings */
spinlock_t i_shared_lock; /* and spinlock protecting it */
+ int truncate_sequence1; /* serialize ->nopage against truncate */
+ int truncate_sequence2; /* serialize ->nopage against truncate */
int gfp_mask; /* how to allocate the pages */
};
--- x/mm/vmscan.c.~1~ 2003-05-14 23:26:12.000000000 +0200
+++ x/mm/vmscan.c 2003-05-15 00:22:57.000000000 +0200
@@ -165,11 +165,10 @@ drop_pte:
goto drop_pte;
/*
- * Anonymous buffercache pages can be left behind by
+ * Anonymous buffercache pages can't be left behind by
* concurrent truncate and pagefault.
*/
- if (page->buffers)
- goto preserve;
+ BUG_ON(page->buffers);
/*
* This is a dirty, swappable page. First of all,
--- x/mm/memory.c.~1~ 2003-05-14 23:26:19.000000000 +0200
+++ x/mm/memory.c 2003-05-15 02:37:08.000000000 +0200
@@ -1127,6 +1127,8 @@ int vmtruncate(struct inode * inode, lof
if (inode->i_size < offset)
goto do_expand;
i_size_write(inode, offset);
+ mapping->truncate_sequence1++;
+ wmb();
spin_lock(&mapping->i_shared_lock);
if (!mapping->i_mmap && !mapping->i_mmap_shared)
goto out_unlock;
@@ -1140,6 +1142,8 @@ int vmtruncate(struct inode * inode, lof
out_unlock:
spin_unlock(&mapping->i_shared_lock);
truncate_inode_pages(mapping, offset);
+ wmb();
+ mapping->truncate_sequence2++;
goto out_truncate;
do_expand:
@@ -1335,12 +1339,20 @@ static int do_no_page(struct mm_struct *
{
struct page * new_page;
pte_t entry;
+ int truncate_sequence;
+ struct file *file;
+ struct address_space *mapping;
if (!vma->vm_ops || !vma->vm_ops->nopage)
return do_anonymous_page(mm, vma, page_table, pmd, write_access, address);
spin_unlock(&mm->page_table_lock);
pte_kunmap(page_table);
+ file = vma->vm_file;
+ mapping = file->f_dentry->d_inode->i_mapping;
+ truncate_sequence = mapping->truncate_sequence2;
+ mb();
+
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0);
if (new_page == NULL) /* no page was available -- SIGBUS */
@@ -1366,6 +1378,22 @@ static int do_no_page(struct mm_struct *
page_table = pte_offset_atomic(pmd, address);
spin_lock(&mm->page_table_lock);
+ mb(); /* spin_lock has inclusive semantics */
+ if (unlikely(truncate_sequence != mapping->truncate_sequence1)) {
+ struct inode *inode;
+
+ spin_unlock(&mm->page_table_lock);
+
+ /*
+ * Don't worthless loop here forever overloading the cpu
+ * until the truncate has completed.
+ */
+ inode = mapping->host;
+ down(&inode->i_sem);
+ up(&inode->i_sem);
+
+ goto retry;
+ }
/*
* This silly early PAGE_DIRTY setting removes a race
@@ -1388,6 +1416,7 @@ static int do_no_page(struct mm_struct *
set_pte(page_table, entry);
} else {
spin_unlock(&mm->page_table_lock);
+ retry:
pte_kunmap(page_table);
/* One of our sibling threads was faster, back out. */
page_cache_release(new_page);
Andrea
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
next prev parent reply other threads:[~2003-05-15 0:36 UTC|newest]
Thread overview: 100+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-05-13 20:44 Race between vmtruncate and mapped areas? Dave McCracken
2003-05-13 20:44 ` Dave McCracken
2003-05-13 20:58 ` Mika Penttilä
2003-05-13 20:58 ` Mika Penttilä
2003-05-13 21:04 ` William Lee Irwin III
2003-05-13 21:04 ` William Lee Irwin III
2003-05-13 22:26 ` Dave McCracken
2003-05-13 22:26 ` Dave McCracken
2003-05-13 22:49 ` William Lee Irwin III
2003-05-13 22:49 ` William Lee Irwin III
2003-05-13 23:00 ` Dave McCracken
2003-05-13 23:11 ` William Lee Irwin III
2003-05-13 23:11 ` William Lee Irwin III
2003-05-13 23:16 ` Dave McCracken
2003-05-13 23:16 ` Dave McCracken
2003-05-13 23:20 ` William Lee Irwin III
2003-05-13 23:20 ` William Lee Irwin III
2003-05-13 23:28 ` Dave McCracken
2003-05-13 23:28 ` Dave McCracken
2003-05-13 23:29 ` William Lee Irwin III
2003-05-13 23:29 ` William Lee Irwin III
2003-05-13 23:16 ` William Lee Irwin III
2003-05-13 23:16 ` William Lee Irwin III
2003-05-14 1:10 ` Andrew Morton
2003-05-14 1:10 ` Andrew Morton
2003-05-14 15:02 ` Dave McCracken
2003-05-14 15:02 ` Dave McCracken
2003-05-14 1:10 ` Andrew Morton
2003-05-14 1:10 ` Andrew Morton
2003-05-14 15:02 ` Dave McCracken
2003-05-14 15:02 ` Dave McCracken
2003-05-14 15:06 ` William Lee Irwin III
2003-05-14 15:06 ` William Lee Irwin III
2003-05-14 15:25 ` Dave McCracken
2003-05-14 15:25 ` Dave McCracken
2003-05-14 16:42 ` Gerrit Huizenga
2003-05-14 16:42 ` Gerrit Huizenga
2003-05-14 17:34 ` Andrew Morton
2003-05-14 17:34 ` Andrew Morton
2003-05-14 17:42 ` Dave McCracken
2003-05-14 17:42 ` Dave McCracken
2003-05-14 17:57 ` Andrew Morton
2003-05-14 17:57 ` Andrew Morton
2003-05-14 18:05 ` Dave McCracken
2003-05-14 18:05 ` Dave McCracken
2003-05-14 18:17 ` Andrew Morton
2003-05-14 18:17 ` Andrew Morton
2003-05-14 18:24 ` Dave McCracken
2003-05-14 18:24 ` Dave McCracken
2003-05-14 18:53 ` Andrew Morton
2003-05-14 18:53 ` Andrew Morton
2003-05-15 8:50 ` Andrea Arcangeli
2003-05-15 8:50 ` Andrea Arcangeli
2003-05-14 19:02 ` Rik van Riel
2003-05-14 19:02 ` Rik van Riel
2003-05-14 19:04 ` Rik van Riel
2003-05-14 19:04 ` Rik van Riel
2003-05-14 19:07 ` Dave McCracken
2003-05-14 19:07 ` Dave McCracken
2003-05-14 19:11 ` Rik van Riel
2003-05-14 19:11 ` Rik van Riel
2003-05-15 0:49 ` Andrea Arcangeli [this message]
2003-05-15 0:49 ` Andrea Arcangeli
2003-05-15 2:36 ` Rik van Riel
2003-05-15 2:36 ` Rik van Riel
2003-05-15 9:46 ` Andrea Arcangeli
2003-05-15 9:46 ` Andrea Arcangeli
2003-05-15 9:55 ` Andrew Morton
2003-05-15 9:55 ` Andrew Morton
2003-05-15 8:32 ` Andrew Morton
2003-05-15 8:32 ` Andrew Morton
2003-05-15 8:42 ` Andrew Morton
2003-05-15 8:42 ` Andrew Morton
2003-05-15 8:55 ` Andrea Arcangeli
2003-05-15 8:55 ` Andrea Arcangeli
2003-05-15 9:20 ` Andrew Morton
2003-05-15 9:20 ` Andrew Morton
2003-05-15 9:40 ` Andrea Arcangeli
2003-05-15 9:40 ` Andrea Arcangeli
2003-05-15 9:58 ` Andrew Morton
2003-05-15 9:58 ` Andrew Morton
2003-05-15 16:38 ` Daniel McNeil
2003-05-15 19:19 ` Andrea Arcangeli
2003-05-15 19:19 ` Andrea Arcangeli
2003-05-15 22:04 ` Daniel McNeil
2003-05-15 22:04 ` Daniel McNeil
2003-05-15 23:17 ` Andrea Arcangeli
2003-05-15 23:17 ` Andrea Arcangeli
2003-05-17 0:27 ` Daniel McNeil
2003-05-17 0:27 ` Daniel McNeil
2003-05-17 17:29 ` Andrea Arcangeli
2003-05-17 17:29 ` Andrea Arcangeli
2003-05-13 21:00 ` William Lee Irwin III
2003-05-13 21:00 ` William Lee Irwin III
-- strict thread matches above, loose matches on Subject: below --
2003-05-17 18:19 Paul McKenney
2003-05-17 18:19 ` Paul McKenney
2003-05-17 18:42 ` Andrea Arcangeli
2003-05-17 18:42 ` Andrea Arcangeli
2003-05-19 18:11 Paul McKenney
2003-05-19 18:11 ` Paul McKenney
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20030515004915.GR1429@dualathlon.random \
--to=andrea@suse.de \
--cc=akpm@digeo.com \
--cc=dmccr@us.ibm.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mika.penttila@kolumbus.fi \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.