* Re: [PATCH 04/12] mm, thp, tmpfs: split huge page when moving from page cache to swap
2013-10-15 19:00 ` Ning Qu
@ 2013-10-18 18:16 ` Ning Qu
0 siblings, 0 replies; 4+ messages in thread
From: Ning Qu @ 2013-10-18 18:16 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: Andrea Arcangeli, Andrew Morton, Hugh Dickins, Al Viro,
Wu Fengguang, Jan Kara, Mel Gorman, linux-mm, Andi Kleen,
Matthew Wilcox, Hillf Danton, Dave Hansen, Alexander Shishkin,
linux-fsdevel, linux-kernel
[-- Attachment #1: Type: text/plain, Size: 7711 bytes --]
New patch below with handle all the pages after splitted.
---
include/linux/huge_mm.h | 2 ++
mm/shmem.c | 79 ++++++++++++++++++++++++++++++++++++-------------
2 files changed, 61 insertions(+), 20 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 65f90db..58b0208 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -64,6 +64,7 @@ extern pmd_t *page_check_address_pmd(struct page *page,
#define HPAGE_PMD_SHIFT PMD_SHIFT
#define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT)
#define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1))
+#define HPAGE_NR_PAGES HPAGE_PMD_NR
extern bool is_vma_temporary_stack(struct vm_area_struct *vma);
@@ -207,6 +208,7 @@ extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vm
#define THP_READ_ALLOC_FAILED ({ BUILD_BUG(); 0; })
#define hpage_nr_pages(x) 1
+#define HPAGE_NR_PAGES 1
#define transparent_hugepage_enabled(__vma) 0
#define transparent_hugepage_defrag(__vma) 0
diff --git a/mm/shmem.c b/mm/shmem.c
index 5bde8d0..b80ace7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -862,14 +862,16 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
struct shmem_inode_info *info;
struct address_space *mapping;
struct inode *inode;
- swp_entry_t swap;
+ swp_entry_t swap[HPAGE_NR_PAGES];
pgoff_t index;
+ int nr = 1;
+ int i;
BUG_ON(!PageLocked(page));
mapping = page->mapping;
- index = page->index;
inode = mapping->host;
info = SHMEM_I(inode);
+
if (info->flags & VM_LOCKED)
goto redirty;
if (!total_swap_pages)
@@ -887,6 +889,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
goto redirty;
}
+ index = page->index;
/*
* This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
* value into swapfile.c, the only way we can correctly account for a
@@ -906,21 +909,35 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (shmem_falloc &&
index >= shmem_falloc->start &&
index < shmem_falloc->next)
- shmem_falloc->nr_unswapped++;
+ shmem_falloc->nr_unswapped +=
+ hpagecache_nr_pages(page);
else
shmem_falloc = NULL;
spin_unlock(&inode->i_lock);
if (shmem_falloc)
goto redirty;
}
- clear_highpage(page);
+ clear_pagecache_page(page);
flush_dcache_page(page);
SetPageUptodate(page);
}
- swap = get_swap_page();
- if (!swap.val)
- goto redirty;
+ /* We can only have nr correct after huge page splitted,
+ * otherwise, it will fail the redirty logic
+ */
+ nr = hpagecache_nr_pages(page);
+ /* We have to break the huge page at this point,
+ * since we have no idea how to swap a huge page.
+ */
+ if (PageTransHugeCache(page))
+ split_huge_page(compound_trans_head(page));
+
+ /* Pre-allocate all the swap pages */
+ for (i = 0; i < nr; i++) {
+ swap[i] = get_swap_page();
+ if (!swap[i].val)
+ goto undo_alloc_swap;
+ }
/*
* Add inode to shmem_unuse()'s list of swapped-out inodes,
@@ -934,25 +951,47 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (list_empty(&info->swaplist))
list_add_tail(&info->swaplist, &shmem_swaplist);
- if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
- swap_shmem_alloc(swap);
- shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
+ for (i = 0; i < nr; i++) {
+ if (add_to_swap_cache(page + i, swap[i], GFP_ATOMIC))
+ goto undo_add_to_swap_cache;
+ }
- spin_lock(&info->lock);
- info->swapped++;
- shmem_recalc_inode(inode);
- spin_unlock(&info->lock);
+ /* We make sure everything is correct before moving further */
+ for (i = 0; i < nr; i++) {
+ swap_shmem_alloc(swap[i]);
+ shmem_delete_from_page_cache(page + i,
+ swp_to_radix_entry(swap[i]));
+ }
- mutex_unlock(&shmem_swaplist_mutex);
- BUG_ON(page_mapped(page));
- swap_writepage(page, wbc);
- return 0;
+ spin_lock(&info->lock);
+ info->swapped += nr;
+ shmem_recalc_inode(inode);
+ spin_unlock(&info->lock);
+
+ mutex_unlock(&shmem_swaplist_mutex);
+
+ for (i = 0; i < nr; i++) {
+ BUG_ON(page_mapped(page + i));
+ swap_writepage(page + i, wbc);
}
+ return 0;
+
+undo_add_to_swap_cache:
+ while (i) {
+ i--;
+ __delete_from_swap_cache(page + i);
+ }
mutex_unlock(&shmem_swaplist_mutex);
- swapcache_free(swap, NULL);
+ i = nr;
+undo_alloc_swap:
+ while (i) {
+ i--;
+ swapcache_free(swap[i], NULL);
+ }
redirty:
- set_page_dirty(page);
+ for (i = 0; i < nr; i++)
+ set_page_dirty(page + i);
if (wbc->for_reclaim)
return AOP_WRITEPAGE_ACTIVATE; /* Return with page locked */
unlock_page(page);
--
Best wishes,
--
Ning Qu (曲宁) | Software Engineer | quning@google.com | +1-408-418-6066
On Tue, Oct 15, 2013 at 12:00 PM, Ning Qu <quning@google.com> wrote:
> Let me take another look at that logic. Thanks!
> Best wishes,
> --
> Ning Qu (曲宁) | Software Engineer | quning@google.com | +1-408-418-6066
>
>
> On Tue, Oct 15, 2013 at 3:33 AM, Kirill A. Shutemov
> <kirill.shutemov@linux.intel.com> wrote:
> > Ning Qu wrote:
> >> in shmem_writepage, we have to split the huge page when moving pages
> >> from page cache to swap because we don't support huge page in swap
> >> yet.
> >>
> >> Signed-off-by: Ning Qu <quning@gmail.com>
> >> ---
> >> mm/shmem.c | 9 ++++++++-
> >> 1 file changed, 8 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/mm/shmem.c b/mm/shmem.c
> >> index 8fe17dd..68a0e1d 100644
> >> --- a/mm/shmem.c
> >> +++ b/mm/shmem.c
> >> @@ -898,6 +898,13 @@ static int shmem_writepage(struct page *page,
> struct writeback_control *wbc)
> >> swp_entry_t swap;
> >> pgoff_t index;
> >>
> >> + /* TODO: we have to break the huge page at this point,
> >> + * since we have no idea how to recover a huge page from
> >> + * swap.
> >> + */
> >> + if (PageTransCompound(page))
> >> + split_huge_page(compound_trans_head(page));
> >> +
> >
> > After the split you handle here only first small page of the huge page.
> > Is it what we want to do? Should we swap out all small pages of the huge
> > page?
> >
> > --
> > Kirill A. Shutemov
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a hrefmailto:"dont@kvack.org"> email@kvack.org </a>
>
[-- Attachment #2: Type: text/html, Size: 4834 bytes --]
^ permalink raw reply related [flat|nested] 4+ messages in thread