From: Konstantin Khlebnikov <khlebnikov@parallels.com>
To: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
"linux-mm@kvack.org" <linux-mm@kvack.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v2] tmpfs: fix race between umount and writepage
Date: Sat, 7 May 2011 09:33:26 +0400 [thread overview]
Message-ID: <4DC4D9A6.9070103@parallels.com> (raw)
In-Reply-To: <alpine.LSU.2.00.1105031223120.9845@sister.anvils>
Hugh Dickins wrote:
<cut>
>
> Here's the patch I was testing last night, but I do want to test it
> some more (I've not even tried your unmounting case yet), and I do want
> to make some changes to it (some comments, and see if I can move the
> mem_cgroup_cache_charge outside of the mutex, making it GFP_KERNEL
> rather than GFP_NOFS - at the time that mem_cgroup charging went in,
> we did not know here if it was actually a shmem swap page, whereas
> nowadays we can be sure, since that's noted in the swap_map).
>
> In shmem_unuse_inode I'm widening the shmem_swaplist_mutex to protect
> against shmem_evict_inode; and in shmem_writepage adding to the list
> earlier, while holding lock on page still in pagecache to protect it.
>
> But testing last night showed corruption on this laptop (no problem
> on other machines): I'm guessing it's unrelated, but I can't be sure
> of that without more extended testing.
>
> Hugh
This patch fixed my problem, I didn't catch any crashes on my test-case: swapout-unmount.
>
> --- 2.6.39-rc5/mm/shmem.c 2011-04-28 09:52:49.066135001 -0700
> +++ linux/mm/shmem.c 2011-05-02 21:02:21.745633214 -0700
> @@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_ent
>
> static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
> {
> - struct inode *inode;
> + struct address_space *mapping;
> unsigned long idx;
> unsigned long size;
> unsigned long limit;
> @@ -928,7 +928,7 @@ lost2:
> return 0;
> found:
> idx += offset;
> - inode = igrab(&info->vfs_inode);
> + mapping = info->vfs_inode.i_mapping;
> spin_unlock(&info->lock);
>
> /*
> @@ -940,20 +940,16 @@ found:
> */
> if (shmem_swaplist.next !=&info->swaplist)
> list_move_tail(&shmem_swaplist,&info->swaplist);
> - mutex_unlock(&shmem_swaplist_mutex);
>
> - error = 1;
> - if (!inode)
> - goto out;
> /*
> - * Charge page using GFP_KERNEL while we can wait.
> + * Charge page using GFP_NOFS while we can wait.
> * Charged back to the user(not to caller) when swap account is used.
> * add_to_page_cache() will be called with GFP_NOWAIT.
> */
> - error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
> + error = mem_cgroup_cache_charge(page, current->mm, GFP_NOFS);
> if (error)
> goto out;
> - error = radix_tree_preload(GFP_KERNEL);
> + error = radix_tree_preload(GFP_NOFS);
> if (error) {
> mem_cgroup_uncharge_cache_page(page);
> goto out;
> @@ -963,14 +959,14 @@ found:
> spin_lock(&info->lock);
> ptr = shmem_swp_entry(info, idx, NULL);
> if (ptr&& ptr->val == entry.val) {
> - error = add_to_page_cache_locked(page, inode->i_mapping,
> + error = add_to_page_cache_locked(page, mapping,
> idx, GFP_NOWAIT);
> /* does mem_cgroup_uncharge_cache_page on error */
> } else /* we must compensate for our precharge above */
> mem_cgroup_uncharge_cache_page(page);
>
> if (error == -EEXIST) {
> - struct page *filepage = find_get_page(inode->i_mapping, idx);
> + struct page *filepage = find_get_page(mapping, idx);
> error = 1;
> if (filepage) {
> /*
> @@ -995,9 +991,6 @@ found:
> spin_unlock(&info->lock);
> radix_tree_preload_end();
> out:
> - unlock_page(page);
> - page_cache_release(page);
> - iput(inode); /* allows for NULL */
> return error;
> }
>
> @@ -1016,7 +1009,7 @@ int shmem_unuse(swp_entry_t entry, struc
> found = shmem_unuse_inode(info, entry, page);
> cond_resched();
> if (found)
> - goto out;
> + break;
> }
> mutex_unlock(&shmem_swaplist_mutex);
> /*
> @@ -1025,7 +1018,6 @@ int shmem_unuse(swp_entry_t entry, struc
> */
> unlock_page(page);
> page_cache_release(page);
> -out:
> return (found< 0) ? found : 0;
> }
>
> @@ -1039,6 +1031,7 @@ static int shmem_writepage(struct page *
> struct address_space *mapping;
> unsigned long index;
> struct inode *inode;
> + bool unlock_mutex = false;
>
> BUG_ON(!PageLocked(page));
> mapping = page->mapping;
> @@ -1064,7 +1057,17 @@ static int shmem_writepage(struct page *
> else
> swap.val = 0;
>
> + if (swap.val&& list_empty(&info->swaplist)) {
> + mutex_lock(&shmem_swaplist_mutex);
> + /* move instead of add in case we're racing */
> + list_move_tail(&info->swaplist,&shmem_swaplist);
> + unlock_mutex = true;
> + }
> +
> spin_lock(&info->lock);
> + if (unlock_mutex)
> + mutex_unlock(&shmem_swaplist_mutex);
> +
> if (index>= info->next_index) {
> BUG_ON(!(info->flags& SHMEM_TRUNCATE));
> goto unlock;
> @@ -1084,21 +1087,10 @@ static int shmem_writepage(struct page *
> delete_from_page_cache(page);
> shmem_swp_set(info, entry, swap.val);
> shmem_swp_unmap(entry);
> - if (list_empty(&info->swaplist))
> - inode = igrab(inode);
> - else
> - inode = NULL;
> spin_unlock(&info->lock);
> swap_shmem_alloc(swap);
> BUG_ON(page_mapped(page));
> swap_writepage(page, wbc);
> - if (inode) {
> - mutex_lock(&shmem_swaplist_mutex);
> - /* move instead of add in case we're racing */
> - list_move_tail(&info->swaplist,&shmem_swaplist);
> - mutex_unlock(&shmem_swaplist_mutex);
> - iput(inode);
> - }
> return 0;
> }
>
WARNING: multiple messages have this Message-ID (diff)
From: Konstantin Khlebnikov <khlebnikov@parallels.com>
To: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
"linux-mm@kvack.org" <linux-mm@kvack.org>,
"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v2] tmpfs: fix race between umount and writepage
Date: Sat, 7 May 2011 09:33:26 +0400 [thread overview]
Message-ID: <4DC4D9A6.9070103@parallels.com> (raw)
In-Reply-To: <alpine.LSU.2.00.1105031223120.9845@sister.anvils>
Hugh Dickins wrote:
<cut>
>
> Here's the patch I was testing last night, but I do want to test it
> some more (I've not even tried your unmounting case yet), and I do want
> to make some changes to it (some comments, and see if I can move the
> mem_cgroup_cache_charge outside of the mutex, making it GFP_KERNEL
> rather than GFP_NOFS - at the time that mem_cgroup charging went in,
> we did not know here if it was actually a shmem swap page, whereas
> nowadays we can be sure, since that's noted in the swap_map).
>
> In shmem_unuse_inode I'm widening the shmem_swaplist_mutex to protect
> against shmem_evict_inode; and in shmem_writepage adding to the list
> earlier, while holding lock on page still in pagecache to protect it.
>
> But testing last night showed corruption on this laptop (no problem
> on other machines): I'm guessing it's unrelated, but I can't be sure
> of that without more extended testing.
>
> Hugh
This patch fixed my problem, I didn't catch any crashes on my test-case: swapout-unmount.
>
> --- 2.6.39-rc5/mm/shmem.c 2011-04-28 09:52:49.066135001 -0700
> +++ linux/mm/shmem.c 2011-05-02 21:02:21.745633214 -0700
> @@ -852,7 +852,7 @@ static inline int shmem_find_swp(swp_ent
>
> static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
> {
> - struct inode *inode;
> + struct address_space *mapping;
> unsigned long idx;
> unsigned long size;
> unsigned long limit;
> @@ -928,7 +928,7 @@ lost2:
> return 0;
> found:
> idx += offset;
> - inode = igrab(&info->vfs_inode);
> + mapping = info->vfs_inode.i_mapping;
> spin_unlock(&info->lock);
>
> /*
> @@ -940,20 +940,16 @@ found:
> */
> if (shmem_swaplist.next !=&info->swaplist)
> list_move_tail(&shmem_swaplist,&info->swaplist);
> - mutex_unlock(&shmem_swaplist_mutex);
>
> - error = 1;
> - if (!inode)
> - goto out;
> /*
> - * Charge page using GFP_KERNEL while we can wait.
> + * Charge page using GFP_NOFS while we can wait.
> * Charged back to the user(not to caller) when swap account is used.
> * add_to_page_cache() will be called with GFP_NOWAIT.
> */
> - error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
> + error = mem_cgroup_cache_charge(page, current->mm, GFP_NOFS);
> if (error)
> goto out;
> - error = radix_tree_preload(GFP_KERNEL);
> + error = radix_tree_preload(GFP_NOFS);
> if (error) {
> mem_cgroup_uncharge_cache_page(page);
> goto out;
> @@ -963,14 +959,14 @@ found:
> spin_lock(&info->lock);
> ptr = shmem_swp_entry(info, idx, NULL);
> if (ptr&& ptr->val == entry.val) {
> - error = add_to_page_cache_locked(page, inode->i_mapping,
> + error = add_to_page_cache_locked(page, mapping,
> idx, GFP_NOWAIT);
> /* does mem_cgroup_uncharge_cache_page on error */
> } else /* we must compensate for our precharge above */
> mem_cgroup_uncharge_cache_page(page);
>
> if (error == -EEXIST) {
> - struct page *filepage = find_get_page(inode->i_mapping, idx);
> + struct page *filepage = find_get_page(mapping, idx);
> error = 1;
> if (filepage) {
> /*
> @@ -995,9 +991,6 @@ found:
> spin_unlock(&info->lock);
> radix_tree_preload_end();
> out:
> - unlock_page(page);
> - page_cache_release(page);
> - iput(inode); /* allows for NULL */
> return error;
> }
>
> @@ -1016,7 +1009,7 @@ int shmem_unuse(swp_entry_t entry, struc
> found = shmem_unuse_inode(info, entry, page);
> cond_resched();
> if (found)
> - goto out;
> + break;
> }
> mutex_unlock(&shmem_swaplist_mutex);
> /*
> @@ -1025,7 +1018,6 @@ int shmem_unuse(swp_entry_t entry, struc
> */
> unlock_page(page);
> page_cache_release(page);
> -out:
> return (found< 0) ? found : 0;
> }
>
> @@ -1039,6 +1031,7 @@ static int shmem_writepage(struct page *
> struct address_space *mapping;
> unsigned long index;
> struct inode *inode;
> + bool unlock_mutex = false;
>
> BUG_ON(!PageLocked(page));
> mapping = page->mapping;
> @@ -1064,7 +1057,17 @@ static int shmem_writepage(struct page *
> else
> swap.val = 0;
>
> + if (swap.val&& list_empty(&info->swaplist)) {
> + mutex_lock(&shmem_swaplist_mutex);
> + /* move instead of add in case we're racing */
> + list_move_tail(&info->swaplist,&shmem_swaplist);
> + unlock_mutex = true;
> + }
> +
> spin_lock(&info->lock);
> + if (unlock_mutex)
> + mutex_unlock(&shmem_swaplist_mutex);
> +
> if (index>= info->next_index) {
> BUG_ON(!(info->flags& SHMEM_TRUNCATE));
> goto unlock;
> @@ -1084,21 +1087,10 @@ static int shmem_writepage(struct page *
> delete_from_page_cache(page);
> shmem_swp_set(info, entry, swap.val);
> shmem_swp_unmap(entry);
> - if (list_empty(&info->swaplist))
> - inode = igrab(inode);
> - else
> - inode = NULL;
> spin_unlock(&info->lock);
> swap_shmem_alloc(swap);
> BUG_ON(page_mapped(page));
> swap_writepage(page, wbc);
> - if (inode) {
> - mutex_lock(&shmem_swaplist_mutex);
> - /* move instead of add in case we're racing */
> - list_move_tail(&info->swaplist,&shmem_swaplist);
> - mutex_unlock(&shmem_swaplist_mutex);
> - iput(inode);
> - }
> return 0;
> }
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2011-05-07 5:33 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-04-05 10:34 [PATCH] tmpfs: fix race between umount and writepage Konstantin Khlebnikov
2011-04-05 10:34 ` Konstantin Khlebnikov
2011-04-08 12:27 ` Konstantin Khlebnikov
2011-04-08 12:27 ` Konstantin Khlebnikov
2011-04-20 20:04 ` Andrew Morton
2011-04-20 20:04 ` Andrew Morton
2011-04-21 6:37 ` Konstantin Khlebnikov
2011-04-21 6:37 ` Konstantin Khlebnikov
2011-04-21 6:41 ` [PATCH v2] " Konstantin Khlebnikov
2011-04-21 6:41 ` Konstantin Khlebnikov
2011-04-21 19:44 ` Andrew Morton
2011-04-21 19:44 ` Andrew Morton
2011-04-22 4:05 ` Konstantin Khlebnikov
2011-04-22 4:05 ` Konstantin Khlebnikov
2011-05-03 20:06 ` Hugh Dickins
2011-05-03 20:06 ` Hugh Dickins
2011-05-07 5:33 ` Konstantin Khlebnikov [this message]
2011-05-07 5:33 ` Konstantin Khlebnikov
2011-05-07 23:56 ` Hugh Dickins
2011-05-07 23:56 ` Hugh Dickins
2011-05-08 12:51 ` Konstantin Khlebnikov
2011-05-08 12:51 ` Konstantin Khlebnikov
2011-05-08 19:36 ` Hugh Dickins
2011-05-08 19:36 ` Hugh Dickins
2011-05-10 9:52 ` Konstantin Khlebnikov
2011-05-10 9:52 ` Konstantin Khlebnikov
2011-05-10 18:55 ` Hugh Dickins
2011-05-10 18:55 ` Hugh Dickins
2011-05-08 19:41 ` [PATCH 1/3] " Hugh Dickins
2011-05-08 19:41 ` Hugh Dickins
2011-05-08 19:43 ` [PATCH 2/3] tmpfs: fix race between umount and swapoff Hugh Dickins
2011-05-08 19:43 ` Hugh Dickins
2011-05-08 19:45 ` [PATCH 3/3] tmpfs: fix spurious ENOSPC when racing with unswap Hugh Dickins
2011-05-08 19:45 ` Hugh Dickins
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4DC4D9A6.9070103@parallels.com \
--to=khlebnikov@parallels.com \
--cc=akpm@linux-foundation.org \
--cc=hughd@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.