From: Matthew Wilcox <willy@infradead.org>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [PATCH v5 07/13] btrfs: Convert from readpages to readahead
Date: Mon, 10 Feb 2020 17:03:42 -0800 [thread overview]
Message-ID: <20200211010348.6872-8-willy@infradead.org> (raw)
In-Reply-To: <20200211010348.6872-1-willy@infradead.org>
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Use the new readahead operation in btrfs. Add a
readahead_for_each_batch() iterator to optimise the loop in the XArray.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/btrfs/extent_io.c | 48 ++++++++++++++---------------------------
fs/btrfs/extent_io.h | 3 +--
fs/btrfs/inode.c | 16 ++++++--------
include/linux/pagemap.h | 26 ++++++++++++++++++++++
4 files changed, 50 insertions(+), 43 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c0f202741e09..d9f66058e0a7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4278,52 +4278,36 @@ int extent_writepages(struct address_space *mapping,
return ret;
}
-int extent_readpages(struct address_space *mapping, struct list_head *pages,
- unsigned nr_pages)
+void extent_readahead(struct readahead_control *rac)
{
struct bio *bio = NULL;
unsigned long bio_flags = 0;
struct page *pagepool[16];
struct extent_map *em_cached = NULL;
- struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
- int nr = 0;
+ struct extent_io_tree *tree = &BTRFS_I(rac->mapping->host)->io_tree;
u64 prev_em_start = (u64)-1;
+ int nr;
- while (!list_empty(pages)) {
- u64 contig_end = 0;
-
- for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
- struct page *page = lru_to_page(pages);
-
- prefetchw(&page->flags);
- list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping, page->index,
- readahead_gfp_mask(mapping))) {
- put_page(page);
- break;
- }
-
- pagepool[nr++] = page;
- contig_end = page_offset(page) + PAGE_SIZE - 1;
- }
-
- if (nr) {
- u64 contig_start = page_offset(pagepool[0]);
+ readahead_for_each_batch(rac, pagepool, ARRAY_SIZE(pagepool), nr) {
+ u64 contig_start = page_offset(pagepool[0]);
+ u64 contig_end = page_offset(pagepool[nr - 1]) + PAGE_SIZE - 1;
- ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
+ ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
- contiguous_readpages(tree, pagepool, nr, contig_start,
- contig_end, &em_cached, &bio, &bio_flags,
- &prev_em_start);
- }
+ contiguous_readpages(tree, pagepool, nr, contig_start,
+ contig_end, &em_cached, &bio, &bio_flags,
+ &prev_em_start);
}
if (em_cached)
free_extent_map(em_cached);
- if (bio)
- return submit_one_bio(bio, 0, bio_flags);
- return 0;
+ if (bio) {
+ int ret = submit_one_bio(bio, 0, bio_flags);
+ if (ret < 0) {
+ /* XXX: unlock the pages here? */
+ }
+ }
}
/*
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5d205bbaafdc..bddac32948c7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -198,8 +198,7 @@ int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
-int extent_readpages(struct address_space *mapping, struct list_head *pages,
- unsigned nr_pages);
+void extent_readahead(struct readahead_control *rac);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
void set_page_extent_mapped(struct page *page);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5b3ec93ff911..d964b2a78ed8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4794,8 +4794,8 @@ static void evict_inode_truncate_pages(struct inode *inode)
/*
* Keep looping until we have no more ranges in the io tree.
- * We can have ongoing bios started by readpages (called from readahead)
- * that have their endio callback (extent_io.c:end_bio_extent_readpage)
+ * We can have ongoing bios started by readahead that have
+ * their endio callback (extent_io.c:end_bio_extent_readpage)
* still in progress (unlocked the pages in the bio but did not yet
* unlocked the ranges in the io tree). Therefore this means some
* ranges can still be locked and eviction started because before
@@ -6996,11 +6996,11 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
* for it to complete) and then invalidate the pages for
* this range (through invalidate_inode_pages2_range()),
* but that can lead us to a deadlock with a concurrent
- * call to readpages() (a buffered read or a defrag call
+ * call to readahead (a buffered read or a defrag call
* triggered a readahead) on a page lock due to an
* ordered dio extent we created before but did not have
* yet a corresponding bio submitted (whence it can not
- * complete), which makes readpages() wait for that
+ * complete), which makes readahead wait for that
* ordered extent to complete while holding a lock on
* that page.
*/
@@ -8239,11 +8239,9 @@ static int btrfs_writepages(struct address_space *mapping,
return extent_writepages(mapping, wbc);
}
-static int
-btrfs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+static void btrfs_readahead(struct readahead_control *rac)
{
- return extent_readpages(mapping, pages, nr_pages);
+ extent_readahead(rac);
}
static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
@@ -10448,7 +10446,7 @@ static const struct address_space_operations btrfs_aops = {
.readpage = btrfs_readpage,
.writepage = btrfs_writepage,
.writepages = btrfs_writepages,
- .readpages = btrfs_readpages,
+ .readahead = btrfs_readahead,
.direct_IO = btrfs_direct_IO,
.invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ddb2d1b43212..75bdfec49710 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -664,6 +664,32 @@ static inline struct page *readahead_page(struct readahead_control *rac)
#define readahead_for_each(rac, page) \
for (; (page = readahead_page(rac)); rac->nr_pages -= rac->batch_count)
+static inline unsigned int readahead_page_batch(struct readahead_control *rac,
+ struct page **array, unsigned int size)
+{
+ unsigned int batch = 0;
+ XA_STATE(xas, &rac->mapping->i_pages, rac->start);
+ struct page *page;
+
+ rac->batch_count = 0;
+ xas_for_each(&xas, page, rac->start + rac->nr_pages - 1) {
+ array[batch++] = page;
+ rac->batch_count += hpage_nr_pages(page);
+ rac->start += hpage_nr_pages(page);
+ if (PageHead(page))
+ xas_set(&xas, rac->start);
+
+ if (batch == size)
+ break;
+ }
+
+ return batch;
+}
+
+#define readahead_for_each_batch(rac, array, size, nr) \
+ for (; (nr = readahead_page_batch(rac, array, size)); \
+ rac->nr_pages -= rac->batch_count)
+
/* The byte offset into the file of this readahead block */
static inline loff_t readahead_offset(struct readahead_control *rac)
{
--
2.25.0
WARNING: multiple messages have this Message-ID (diff)
From: Matthew Wilcox <willy@infradead.org>
To: linux-fsdevel@vger.kernel.org
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
linux-btrfs@vger.kernel.org, linux-erofs@lists.ozlabs.org,
linux-ext4@vger.kernel.org,
linux-f2fs-devel@lists.sourceforge.net, cluster-devel@redhat.com,
ocfs2-devel@oss.oracle.com, linux-xfs@vger.kernel.org
Subject: [PATCH v5 07/13] btrfs: Convert from readpages to readahead
Date: Mon, 10 Feb 2020 17:03:42 -0800 [thread overview]
Message-ID: <20200211010348.6872-8-willy@infradead.org> (raw)
In-Reply-To: <20200211010348.6872-1-willy@infradead.org>
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Use the new readahead operation in btrfs. Add a
readahead_for_each_batch() iterator to optimise the loop in the XArray.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/btrfs/extent_io.c | 48 ++++++++++++++---------------------------
fs/btrfs/extent_io.h | 3 +--
fs/btrfs/inode.c | 16 ++++++--------
include/linux/pagemap.h | 26 ++++++++++++++++++++++
4 files changed, 50 insertions(+), 43 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c0f202741e09..d9f66058e0a7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4278,52 +4278,36 @@ int extent_writepages(struct address_space *mapping,
return ret;
}
-int extent_readpages(struct address_space *mapping, struct list_head *pages,
- unsigned nr_pages)
+void extent_readahead(struct readahead_control *rac)
{
struct bio *bio = NULL;
unsigned long bio_flags = 0;
struct page *pagepool[16];
struct extent_map *em_cached = NULL;
- struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
- int nr = 0;
+ struct extent_io_tree *tree = &BTRFS_I(rac->mapping->host)->io_tree;
u64 prev_em_start = (u64)-1;
+ int nr;
- while (!list_empty(pages)) {
- u64 contig_end = 0;
-
- for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
- struct page *page = lru_to_page(pages);
-
- prefetchw(&page->flags);
- list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping, page->index,
- readahead_gfp_mask(mapping))) {
- put_page(page);
- break;
- }
-
- pagepool[nr++] = page;
- contig_end = page_offset(page) + PAGE_SIZE - 1;
- }
-
- if (nr) {
- u64 contig_start = page_offset(pagepool[0]);
+ readahead_for_each_batch(rac, pagepool, ARRAY_SIZE(pagepool), nr) {
+ u64 contig_start = page_offset(pagepool[0]);
+ u64 contig_end = page_offset(pagepool[nr - 1]) + PAGE_SIZE - 1;
- ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
+ ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
- contiguous_readpages(tree, pagepool, nr, contig_start,
- contig_end, &em_cached, &bio, &bio_flags,
- &prev_em_start);
- }
+ contiguous_readpages(tree, pagepool, nr, contig_start,
+ contig_end, &em_cached, &bio, &bio_flags,
+ &prev_em_start);
}
if (em_cached)
free_extent_map(em_cached);
- if (bio)
- return submit_one_bio(bio, 0, bio_flags);
- return 0;
+ if (bio) {
+ int ret = submit_one_bio(bio, 0, bio_flags);
+ if (ret < 0) {
+ /* XXX: unlock the pages here? */
+ }
+ }
}
/*
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5d205bbaafdc..bddac32948c7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -198,8 +198,7 @@ int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
-int extent_readpages(struct address_space *mapping, struct list_head *pages,
- unsigned nr_pages);
+void extent_readahead(struct readahead_control *rac);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
void set_page_extent_mapped(struct page *page);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5b3ec93ff911..d964b2a78ed8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4794,8 +4794,8 @@ static void evict_inode_truncate_pages(struct inode *inode)
/*
* Keep looping until we have no more ranges in the io tree.
- * We can have ongoing bios started by readpages (called from readahead)
- * that have their endio callback (extent_io.c:end_bio_extent_readpage)
+ * We can have ongoing bios started by readahead that have
+ * their endio callback (extent_io.c:end_bio_extent_readpage)
* still in progress (unlocked the pages in the bio but did not yet
* unlocked the ranges in the io tree). Therefore this means some
* ranges can still be locked and eviction started because before
@@ -6996,11 +6996,11 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
* for it to complete) and then invalidate the pages for
* this range (through invalidate_inode_pages2_range()),
* but that can lead us to a deadlock with a concurrent
- * call to readpages() (a buffered read or a defrag call
+ * call to readahead (a buffered read or a defrag call
* triggered a readahead) on a page lock due to an
* ordered dio extent we created before but did not have
* yet a corresponding bio submitted (whence it can not
- * complete), which makes readpages() wait for that
+ * complete), which makes readahead wait for that
* ordered extent to complete while holding a lock on
* that page.
*/
@@ -8239,11 +8239,9 @@ static int btrfs_writepages(struct address_space *mapping,
return extent_writepages(mapping, wbc);
}
-static int
-btrfs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+static void btrfs_readahead(struct readahead_control *rac)
{
- return extent_readpages(mapping, pages, nr_pages);
+ extent_readahead(rac);
}
static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
@@ -10448,7 +10446,7 @@ static const struct address_space_operations btrfs_aops = {
.readpage = btrfs_readpage,
.writepage = btrfs_writepage,
.writepages = btrfs_writepages,
- .readpages = btrfs_readpages,
+ .readahead = btrfs_readahead,
.direct_IO = btrfs_direct_IO,
.invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ddb2d1b43212..75bdfec49710 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -664,6 +664,32 @@ static inline struct page *readahead_page(struct readahead_control *rac)
#define readahead_for_each(rac, page) \
for (; (page = readahead_page(rac)); rac->nr_pages -= rac->batch_count)
+static inline unsigned int readahead_page_batch(struct readahead_control *rac,
+ struct page **array, unsigned int size)
+{
+ unsigned int batch = 0;
+ XA_STATE(xas, &rac->mapping->i_pages, rac->start);
+ struct page *page;
+
+ rac->batch_count = 0;
+ xas_for_each(&xas, page, rac->start + rac->nr_pages - 1) {
+ array[batch++] = page;
+ rac->batch_count += hpage_nr_pages(page);
+ rac->start += hpage_nr_pages(page);
+ if (PageHead(page))
+ xas_set(&xas, rac->start);
+
+ if (batch == size)
+ break;
+ }
+
+ return batch;
+}
+
+#define readahead_for_each_batch(rac, array, size, nr) \
+ for (; (nr = readahead_page_batch(rac, array, size)); \
+ rac->nr_pages -= rac->batch_count)
+
/* The byte offset into the file of this readahead block */
static inline loff_t readahead_offset(struct readahead_control *rac)
{
--
2.25.0
WARNING: multiple messages have this Message-ID (diff)
From: Matthew Wilcox <willy@infradead.org>
To: linux-fsdevel@vger.kernel.org
Cc: linux-xfs@vger.kernel.org, linux-kernel@vger.kernel.org,
"Matthew Wilcox \(Oracle\)" <willy@infradead.org>,
linux-f2fs-devel@lists.sourceforge.net, cluster-devel@redhat.com,
linux-mm@kvack.org, ocfs2-devel@oss.oracle.com,
linux-ext4@vger.kernel.org, linux-erofs@lists.ozlabs.org,
linux-btrfs@vger.kernel.org
Subject: [PATCH v5 07/13] btrfs: Convert from readpages to readahead
Date: Mon, 10 Feb 2020 17:03:42 -0800 [thread overview]
Message-ID: <20200211010348.6872-8-willy@infradead.org> (raw)
In-Reply-To: <20200211010348.6872-1-willy@infradead.org>
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Use the new readahead operation in btrfs. Add a
readahead_for_each_batch() iterator to optimise the loop in the XArray.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/btrfs/extent_io.c | 48 ++++++++++++++---------------------------
fs/btrfs/extent_io.h | 3 +--
fs/btrfs/inode.c | 16 ++++++--------
include/linux/pagemap.h | 26 ++++++++++++++++++++++
4 files changed, 50 insertions(+), 43 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c0f202741e09..d9f66058e0a7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4278,52 +4278,36 @@ int extent_writepages(struct address_space *mapping,
return ret;
}
-int extent_readpages(struct address_space *mapping, struct list_head *pages,
- unsigned nr_pages)
+void extent_readahead(struct readahead_control *rac)
{
struct bio *bio = NULL;
unsigned long bio_flags = 0;
struct page *pagepool[16];
struct extent_map *em_cached = NULL;
- struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
- int nr = 0;
+ struct extent_io_tree *tree = &BTRFS_I(rac->mapping->host)->io_tree;
u64 prev_em_start = (u64)-1;
+ int nr;
- while (!list_empty(pages)) {
- u64 contig_end = 0;
-
- for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
- struct page *page = lru_to_page(pages);
-
- prefetchw(&page->flags);
- list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping, page->index,
- readahead_gfp_mask(mapping))) {
- put_page(page);
- break;
- }
-
- pagepool[nr++] = page;
- contig_end = page_offset(page) + PAGE_SIZE - 1;
- }
-
- if (nr) {
- u64 contig_start = page_offset(pagepool[0]);
+ readahead_for_each_batch(rac, pagepool, ARRAY_SIZE(pagepool), nr) {
+ u64 contig_start = page_offset(pagepool[0]);
+ u64 contig_end = page_offset(pagepool[nr - 1]) + PAGE_SIZE - 1;
- ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
+ ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
- contiguous_readpages(tree, pagepool, nr, contig_start,
- contig_end, &em_cached, &bio, &bio_flags,
- &prev_em_start);
- }
+ contiguous_readpages(tree, pagepool, nr, contig_start,
+ contig_end, &em_cached, &bio, &bio_flags,
+ &prev_em_start);
}
if (em_cached)
free_extent_map(em_cached);
- if (bio)
- return submit_one_bio(bio, 0, bio_flags);
- return 0;
+ if (bio) {
+ int ret = submit_one_bio(bio, 0, bio_flags);
+ if (ret < 0) {
+ /* XXX: unlock the pages here? */
+ }
+ }
}
/*
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5d205bbaafdc..bddac32948c7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -198,8 +198,7 @@ int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
-int extent_readpages(struct address_space *mapping, struct list_head *pages,
- unsigned nr_pages);
+void extent_readahead(struct readahead_control *rac);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
void set_page_extent_mapped(struct page *page);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5b3ec93ff911..d964b2a78ed8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4794,8 +4794,8 @@ static void evict_inode_truncate_pages(struct inode *inode)
/*
* Keep looping until we have no more ranges in the io tree.
- * We can have ongoing bios started by readpages (called from readahead)
- * that have their endio callback (extent_io.c:end_bio_extent_readpage)
+ * We can have ongoing bios started by readahead that have
+ * their endio callback (extent_io.c:end_bio_extent_readpage)
* still in progress (unlocked the pages in the bio but did not yet
* unlocked the ranges in the io tree). Therefore this means some
* ranges can still be locked and eviction started because before
@@ -6996,11 +6996,11 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
* for it to complete) and then invalidate the pages for
* this range (through invalidate_inode_pages2_range()),
* but that can lead us to a deadlock with a concurrent
- * call to readpages() (a buffered read or a defrag call
+ * call to readahead (a buffered read or a defrag call
* triggered a readahead) on a page lock due to an
* ordered dio extent we created before but did not have
* yet a corresponding bio submitted (whence it can not
- * complete), which makes readpages() wait for that
+ * complete), which makes readahead wait for that
* ordered extent to complete while holding a lock on
* that page.
*/
@@ -8239,11 +8239,9 @@ static int btrfs_writepages(struct address_space *mapping,
return extent_writepages(mapping, wbc);
}
-static int
-btrfs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+static void btrfs_readahead(struct readahead_control *rac)
{
- return extent_readpages(mapping, pages, nr_pages);
+ extent_readahead(rac);
}
static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
@@ -10448,7 +10446,7 @@ static const struct address_space_operations btrfs_aops = {
.readpage = btrfs_readpage,
.writepage = btrfs_writepage,
.writepages = btrfs_writepages,
- .readpages = btrfs_readpages,
+ .readahead = btrfs_readahead,
.direct_IO = btrfs_direct_IO,
.invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ddb2d1b43212..75bdfec49710 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -664,6 +664,32 @@ static inline struct page *readahead_page(struct readahead_control *rac)
#define readahead_for_each(rac, page) \
for (; (page = readahead_page(rac)); rac->nr_pages -= rac->batch_count)
+static inline unsigned int readahead_page_batch(struct readahead_control *rac,
+ struct page **array, unsigned int size)
+{
+ unsigned int batch = 0;
+ XA_STATE(xas, &rac->mapping->i_pages, rac->start);
+ struct page *page;
+
+ rac->batch_count = 0;
+ xas_for_each(&xas, page, rac->start + rac->nr_pages - 1) {
+ array[batch++] = page;
+ rac->batch_count += hpage_nr_pages(page);
+ rac->start += hpage_nr_pages(page);
+ if (PageHead(page))
+ xas_set(&xas, rac->start);
+
+ if (batch == size)
+ break;
+ }
+
+ return batch;
+}
+
+#define readahead_for_each_batch(rac, array, size, nr) \
+ for (; (nr = readahead_page_batch(rac, array, size)); \
+ rac->nr_pages -= rac->batch_count)
+
/* The byte offset into the file of this readahead block */
static inline loff_t readahead_offset(struct readahead_control *rac)
{
--
2.25.0
WARNING: multiple messages have this Message-ID (diff)
From: Matthew Wilcox <willy@infradead.org>
To: linux-fsdevel@vger.kernel.org
Cc: linux-xfs@vger.kernel.org, linux-kernel@vger.kernel.org,
"Matthew Wilcox \(Oracle\)" <willy@infradead.org>,
linux-f2fs-devel@lists.sourceforge.net, cluster-devel@redhat.com,
linux-mm@kvack.org, ocfs2-devel@oss.oracle.com,
linux-ext4@vger.kernel.org, linux-erofs@lists.ozlabs.org,
linux-btrfs@vger.kernel.org
Subject: [f2fs-dev] [PATCH v5 07/13] btrfs: Convert from readpages to readahead
Date: Mon, 10 Feb 2020 17:03:42 -0800 [thread overview]
Message-ID: <20200211010348.6872-8-willy@infradead.org> (raw)
In-Reply-To: <20200211010348.6872-1-willy@infradead.org>
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Use the new readahead operation in btrfs. Add a
readahead_for_each_batch() iterator to optimise the loop in the XArray.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
fs/btrfs/extent_io.c | 48 ++++++++++++++---------------------------
fs/btrfs/extent_io.h | 3 +--
fs/btrfs/inode.c | 16 ++++++--------
include/linux/pagemap.h | 26 ++++++++++++++++++++++
4 files changed, 50 insertions(+), 43 deletions(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c0f202741e09..d9f66058e0a7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4278,52 +4278,36 @@ int extent_writepages(struct address_space *mapping,
return ret;
}
-int extent_readpages(struct address_space *mapping, struct list_head *pages,
- unsigned nr_pages)
+void extent_readahead(struct readahead_control *rac)
{
struct bio *bio = NULL;
unsigned long bio_flags = 0;
struct page *pagepool[16];
struct extent_map *em_cached = NULL;
- struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
- int nr = 0;
+ struct extent_io_tree *tree = &BTRFS_I(rac->mapping->host)->io_tree;
u64 prev_em_start = (u64)-1;
+ int nr;
- while (!list_empty(pages)) {
- u64 contig_end = 0;
-
- for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) {
- struct page *page = lru_to_page(pages);
-
- prefetchw(&page->flags);
- list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping, page->index,
- readahead_gfp_mask(mapping))) {
- put_page(page);
- break;
- }
-
- pagepool[nr++] = page;
- contig_end = page_offset(page) + PAGE_SIZE - 1;
- }
-
- if (nr) {
- u64 contig_start = page_offset(pagepool[0]);
+ readahead_for_each_batch(rac, pagepool, ARRAY_SIZE(pagepool), nr) {
+ u64 contig_start = page_offset(pagepool[0]);
+ u64 contig_end = page_offset(pagepool[nr - 1]) + PAGE_SIZE - 1;
- ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
+ ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
- contiguous_readpages(tree, pagepool, nr, contig_start,
- contig_end, &em_cached, &bio, &bio_flags,
- &prev_em_start);
- }
+ contiguous_readpages(tree, pagepool, nr, contig_start,
+ contig_end, &em_cached, &bio, &bio_flags,
+ &prev_em_start);
}
if (em_cached)
free_extent_map(em_cached);
- if (bio)
- return submit_one_bio(bio, 0, bio_flags);
- return 0;
+ if (bio) {
+ int ret = submit_one_bio(bio, 0, bio_flags);
+ if (ret < 0) {
+ /* XXX: unlock the pages here? */
+ }
+ }
}
/*
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5d205bbaafdc..bddac32948c7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -198,8 +198,7 @@ int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
-int extent_readpages(struct address_space *mapping, struct list_head *pages,
- unsigned nr_pages);
+void extent_readahead(struct readahead_control *rac);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
void set_page_extent_mapped(struct page *page);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5b3ec93ff911..d964b2a78ed8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4794,8 +4794,8 @@ static void evict_inode_truncate_pages(struct inode *inode)
/*
* Keep looping until we have no more ranges in the io tree.
- * We can have ongoing bios started by readpages (called from readahead)
- * that have their endio callback (extent_io.c:end_bio_extent_readpage)
+ * We can have ongoing bios started by readahead that have
+ * their endio callback (extent_io.c:end_bio_extent_readpage)
* still in progress (unlocked the pages in the bio but did not yet
* unlocked the ranges in the io tree). Therefore this means some
* ranges can still be locked and eviction started because before
@@ -6996,11 +6996,11 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
* for it to complete) and then invalidate the pages for
* this range (through invalidate_inode_pages2_range()),
* but that can lead us to a deadlock with a concurrent
- * call to readpages() (a buffered read or a defrag call
+ * call to readahead (a buffered read or a defrag call
* triggered a readahead) on a page lock due to an
* ordered dio extent we created before but did not have
* yet a corresponding bio submitted (whence it can not
- * complete), which makes readpages() wait for that
+ * complete), which makes readahead wait for that
* ordered extent to complete while holding a lock on
* that page.
*/
@@ -8239,11 +8239,9 @@ static int btrfs_writepages(struct address_space *mapping,
return extent_writepages(mapping, wbc);
}
-static int
-btrfs_readpages(struct file *file, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+static void btrfs_readahead(struct readahead_control *rac)
{
- return extent_readpages(mapping, pages, nr_pages);
+ extent_readahead(rac);
}
static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
@@ -10448,7 +10446,7 @@ static const struct address_space_operations btrfs_aops = {
.readpage = btrfs_readpage,
.writepage = btrfs_writepage,
.writepages = btrfs_writepages,
- .readpages = btrfs_readpages,
+ .readahead = btrfs_readahead,
.direct_IO = btrfs_direct_IO,
.invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ddb2d1b43212..75bdfec49710 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -664,6 +664,32 @@ static inline struct page *readahead_page(struct readahead_control *rac)
#define readahead_for_each(rac, page) \
for (; (page = readahead_page(rac)); rac->nr_pages -= rac->batch_count)
+static inline unsigned int readahead_page_batch(struct readahead_control *rac,
+ struct page **array, unsigned int size)
+{
+ unsigned int batch = 0;
+ XA_STATE(xas, &rac->mapping->i_pages, rac->start);
+ struct page *page;
+
+ rac->batch_count = 0;
+ xas_for_each(&xas, page, rac->start + rac->nr_pages - 1) {
+ array[batch++] = page;
+ rac->batch_count += hpage_nr_pages(page);
+ rac->start += hpage_nr_pages(page);
+ if (PageHead(page))
+ xas_set(&xas, rac->start);
+
+ if (batch == size)
+ break;
+ }
+
+ return batch;
+}
+
+#define readahead_for_each_batch(rac, array, size, nr) \
+ for (; (nr = readahead_page_batch(rac, array, size)); \
+ rac->nr_pages -= rac->batch_count)
+
/* The byte offset into the file of this readahead block */
static inline loff_t readahead_offset(struct readahead_control *rac)
{
--
2.25.0
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
next prev parent reply other threads:[~2020-02-11 1:03 UTC|newest]
Thread overview: 138+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-11 1:03 [Cluster-devel] [PATCH v5 00/13] Change readahead API Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 01/13] mm: Fix the return type of __do_page_cache_readahead Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 8:19 ` [Cluster-devel] " Johannes Thumshirn
2020-02-11 8:19 ` [f2fs-dev] " Johannes Thumshirn
2020-02-11 8:19 ` Johannes Thumshirn
2020-02-11 8:19 ` Johannes Thumshirn
2020-02-11 12:34 ` [Cluster-devel] " Matthew Wilcox
2020-02-11 12:34 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 12:34 ` Matthew Wilcox
2020-02-11 12:34 ` Matthew Wilcox
2020-02-12 18:13 ` [Cluster-devel] " Christoph Hellwig
2020-02-12 18:13 ` [Ocfs2-devel] " Christoph Hellwig
2020-02-12 18:13 ` [f2fs-dev] " Christoph Hellwig
2020-02-12 18:13 ` Christoph Hellwig
2020-02-12 18:13 ` Christoph Hellwig
2020-02-14 3:19 ` [Cluster-devel] " John Hubbard
2020-02-14 3:19 ` [Ocfs2-devel] " John Hubbard
2020-02-14 3:19 ` [f2fs-dev] " John Hubbard
2020-02-14 3:19 ` John Hubbard
2020-02-14 3:19 ` John Hubbard
2020-02-14 4:21 ` [Cluster-devel] " Matthew Wilcox
2020-02-14 4:21 ` [Ocfs2-devel] " Matthew Wilcox
2020-02-14 4:21 ` [f2fs-dev] " Matthew Wilcox
2020-02-14 4:21 ` Matthew Wilcox
2020-02-14 4:21 ` Matthew Wilcox
2020-02-14 4:33 ` [Cluster-devel] " John Hubbard
2020-02-14 4:33 ` [Ocfs2-devel] " John Hubbard
2020-02-14 4:33 ` [f2fs-dev] " John Hubbard
2020-02-14 4:33 ` John Hubbard
2020-02-14 4:33 ` John Hubbard
2020-02-14 19:50 ` [Cluster-devel] " Matthew Wilcox
2020-02-14 19:50 ` [Ocfs2-devel] " Matthew Wilcox
2020-02-14 19:50 ` [f2fs-dev] " Matthew Wilcox
2020-02-14 19:50 ` Matthew Wilcox
2020-02-14 19:50 ` Matthew Wilcox
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 02/13] mm: Ignore return value of ->readpages Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-12 18:13 ` [Cluster-devel] " Christoph Hellwig
2020-02-12 18:13 ` [Ocfs2-devel] " Christoph Hellwig
2020-02-12 18:13 ` [f2fs-dev] " Christoph Hellwig
2020-02-12 18:13 ` Christoph Hellwig
2020-02-12 18:13 ` Christoph Hellwig
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 03/13] mm: Put readahead pages in cache earlier Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-14 3:36 ` [Cluster-devel] " John Hubbard
2020-02-14 3:36 ` [Ocfs2-devel] " John Hubbard
2020-02-14 3:36 ` [f2fs-dev] " John Hubbard
2020-02-14 3:36 ` John Hubbard
2020-02-14 3:36 ` John Hubbard
2020-02-15 1:15 ` [Cluster-devel] " Matthew Wilcox
2020-02-15 1:15 ` [Ocfs2-devel] " Matthew Wilcox
2020-02-15 1:15 ` [f2fs-dev] " Matthew Wilcox
2020-02-15 1:15 ` Matthew Wilcox
2020-02-15 1:15 ` Matthew Wilcox
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 04/13] mm: Add readahead address space operation Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 4:52 ` [Cluster-devel] " Dave Chinner
2020-02-11 4:52 ` [Ocfs2-devel] " Dave Chinner
2020-02-11 4:52 ` [f2fs-dev] " Dave Chinner
2020-02-11 4:52 ` Dave Chinner
2020-02-11 4:52 ` Dave Chinner
2020-02-11 12:54 ` [Cluster-devel] " Matthew Wilcox
2020-02-11 12:54 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 12:54 ` Matthew Wilcox
2020-02-11 12:54 ` Matthew Wilcox
2020-02-11 20:08 ` [Cluster-devel] " Dave Chinner
2020-02-11 20:08 ` [Ocfs2-devel] " Dave Chinner
2020-02-11 20:08 ` [f2fs-dev] " Dave Chinner
2020-02-11 20:08 ` Dave Chinner
2020-02-11 20:08 ` Dave Chinner
2020-02-12 18:18 ` [Cluster-devel] " Christoph Hellwig
2020-02-12 18:18 ` [Ocfs2-devel] " Christoph Hellwig
2020-02-12 18:18 ` [f2fs-dev] " Christoph Hellwig
2020-02-12 18:18 ` Christoph Hellwig
2020-02-12 18:18 ` Christoph Hellwig
2020-02-14 5:36 ` [Cluster-devel] " John Hubbard
2020-02-14 5:36 ` [Ocfs2-devel] " John Hubbard
2020-02-14 5:36 ` [f2fs-dev] " John Hubbard
2020-02-14 5:36 ` John Hubbard
2020-02-14 5:36 ` John Hubbard
2020-02-15 1:15 ` [Cluster-devel] " Matthew Wilcox
2020-02-15 1:15 ` [Ocfs2-devel] " Matthew Wilcox
2020-02-15 1:15 ` [f2fs-dev] " Matthew Wilcox
2020-02-15 1:15 ` Matthew Wilcox
2020-02-15 1:15 ` Matthew Wilcox
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 05/13] mm: Add page_cache_readahead_limit Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 06/13] fs: Convert mpage_readpages to mpage_readahead Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-13 22:09 ` [Cluster-devel] " Junxiao Bi
2020-02-13 22:09 ` [Ocfs2-devel] " Junxiao Bi
2020-02-13 22:09 ` [f2fs-dev] " Junxiao Bi
2020-02-13 22:09 ` Junxiao Bi
2020-02-13 22:09 ` Junxiao Bi
2020-02-11 1:03 ` Matthew Wilcox [this message]
2020-02-11 1:03 ` [f2fs-dev] [PATCH v5 07/13] btrfs: Convert from readpages to readahead Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 08/13] erofs: Convert uncompressed files " Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 09/13] erofs: Convert compressed " Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 10/13] ext4: Convert " Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 11/13] f2fs: " Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 12/13] fuse: " Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` [Cluster-devel] [PATCH v5 13/13] iomap: " Matthew Wilcox
2020-02-11 1:03 ` [f2fs-dev] " Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
2020-02-11 1:03 ` Matthew Wilcox
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200211010348.6872-8-willy@infradead.org \
--to=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.