From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from aserp1040.oracle.com ([141.146.126.69]:33482 "EHLO aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751936AbdHARP4 (ORCPT ); Tue, 1 Aug 2017 13:15:56 -0400 Received: from userv0021.oracle.com (userv0021.oracle.com [156.151.31.71]) by aserp1040.oracle.com (Sentrion-MTA-4.3.2/Sentrion-MTA-4.3.2) with ESMTP id v71HFsZg002160 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Tue, 1 Aug 2017 17:15:55 GMT Received: from userv0121.oracle.com (userv0121.oracle.com [156.151.31.72]) by userv0021.oracle.com (8.14.4/8.14.4) with ESMTP id v71HFsgS021635 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-GCM-SHA384 bits=256 verify=OK) for ; Tue, 1 Aug 2017 17:15:54 GMT Received: from abhmp0006.oracle.com (abhmp0006.oracle.com [141.146.116.12]) by userv0121.oracle.com (8.14.4/8.13.8) with ESMTP id v71HFsTe032281 for ; Tue, 1 Aug 2017 17:15:54 GMT From: Liu Bo To: linux-btrfs@vger.kernel.org Subject: [PATCH 09/14] Btrfs: raid56: add readahead for recovery Date: Tue, 1 Aug 2017 10:14:32 -0600 Message-Id: <20170801161439.13426-10-bo.li.liu@oracle.com> In-Reply-To: <20170801161439.13426-1-bo.li.liu@oracle.com> References: <20170801161439.13426-1-bo.li.liu@oracle.com> Sender: linux-btrfs-owner@vger.kernel.org List-ID: While doing recovery, blocks are read from the raid5/6 disk one by one, so this is adding readahead so that we can read at most 256 contiguous blocks in one read IO. Signed-off-by: Liu Bo --- fs/btrfs/raid56.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 109 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index dea33c4..24f7cbb 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1530,15 +1530,81 @@ static int btrfs_r5l_write_empty_meta_block(struct btrfs_r5l_log *log, u64 pos, return ret; } +#define BTRFS_R5L_RECOVER_IO_POOL_SIZE BIO_MAX_PAGES struct btrfs_r5l_recover_ctx { u64 pos; u64 seq; u64 total_size; struct page *meta_page; struct page *io_page; + + struct page *ra_pages[BTRFS_R5L_RECOVER_IO_POOL_SIZE]; + struct bio *ra_bio; + int total; + int valid; + u64 start_offset; + + struct btrfs_r5l_log *log; }; -static int btrfs_r5l_recover_load_meta(struct btrfs_r5l_log *log, struct btrfs_r5l_recover_ctx *ctx) +static int btrfs_r5l_recover_read_ra(struct btrfs_r5l_recover_ctx *ctx, u64 offset) +{ + bio_reset(ctx->ra_bio); + ctx->ra_bio->bi_bdev = ctx->log->dev->bdev; + ctx->ra_bio->bi_opf = REQ_OP_READ; + ctx->ra_bio->bi_iter.bi_sector = (ctx->log->data_offset + offset) >> 9; + + ctx->valid = 0; + ctx->start_offset = offset; + + while (ctx->valid < ctx->total) { + bio_add_page(ctx->ra_bio, ctx->ra_pages[ctx->valid++], PAGE_SIZE, 0); + + offset = btrfs_r5l_ring_add(ctx->log, offset, PAGE_SIZE); + if (offset == 0) + break; + } + +#ifdef BTRFS_DEBUG_R5LOG + trace_printk("to read %d pages starting from 0x%llx\n", ctx->valid, ctx->log->data_offset + ctx->start_offset); +#endif + return submit_bio_wait(ctx->ra_bio); +} + +static int btrfs_r5l_recover_read_page(struct btrfs_r5l_recover_ctx *ctx, struct page *page, u64 offset) +{ + struct page *tmp; + int index; + char *src; + char *dst; + int ret; + + if (offset < ctx->start_offset || offset >= (ctx->start_offset + ctx->valid * PAGE_SIZE)) { + ret = btrfs_r5l_recover_read_ra(ctx, offset); + if (ret) + return ret; + } + +#ifdef BTRFS_DEBUG_R5LOG + trace_printk("offset 0x%llx start->offset 0x%llx ctx->valid %d\n", offset, ctx->start_offset, ctx->valid); +#endif + + ASSERT(IS_ALIGNED(ctx->start_offset, PAGE_SIZE)); + ASSERT(IS_ALIGNED(offset, PAGE_SIZE)); + + index = (offset - ctx->start_offset) >> PAGE_SHIFT; + ASSERT(index < ctx->valid); + + tmp = ctx->ra_pages[index]; + src = kmap(tmp); + dst = kmap(page); + memcpy(dst, src, PAGE_SIZE); + kunmap(page); + kunmap(tmp); + return 0; +} + +static int btrfs_r5l_recover_load_meta(struct btrfs_r5l_recover_ctx *ctx) { struct btrfs_r5l_meta_block *mb; @@ -1642,6 +1708,42 @@ static int btrfs_r5l_recover_flush_log(struct btrfs_r5l_log *log, struct btrfs_r } return ret; + +static int btrfs_r5l_recover_allocate_ra(struct btrfs_r5l_recover_ctx *ctx) +{ + struct page *page; + ctx->ra_bio = btrfs_io_bio_alloc(GFP_NOFS, BIO_MAX_PAGES); + + ctx->total = 0; + ctx->valid = 0; + while (ctx->total < BTRFS_R5L_RECOVER_IO_POOL_SIZE) { + page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (!page) + break; + + ctx->ra_pages[ctx->total++] = page; + } + + if (ctx->total == 0) { + bio_put(ctx->ra_bio); + return -ENOMEM; + } + +#ifdef BTRFS_DEBUG_R5LOG + trace_printk("readahead: %d allocated pages\n", ctx->total); +#endif + return 0; +} + +static void btrfs_r5l_recover_free_ra(struct btrfs_r5l_recover_ctx *ctx) +{ + int i; +#ifdef BTRFS_DEBUG_R5LOG + trace_printk("readahead: %d to free pages\n", ctx->total); +#endif + for (i = 0; i < ctx->total; i++) + __free_page(ctx->ra_pages[i]); + bio_put(ctx->ra_bio); } static void btrfs_r5l_write_super(struct btrfs_fs_info *fs_info, u64 cp); @@ -1655,6 +1757,7 @@ static int btrfs_r5l_recover_log(struct btrfs_r5l_log *log) ctx = kzalloc(sizeof(*ctx), GFP_NOFS); ASSERT(ctx); + ctx->log = log; ctx->pos = log->last_checkpoint; ctx->seq = log->last_cp_seq; ctx->meta_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); @@ -1662,10 +1765,10 @@ static int btrfs_r5l_recover_log(struct btrfs_r5l_log *log) ctx->io_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); ASSERT(ctx->io_page); - ret = btrfs_r5l_recover_flush_log(log, ctx); - if (ret) { - ; - } + ret = btrfs_r5l_recover_allocate_ra(ctx); + ASSERT(ret == 0); + + btrfs_r5l_recover_flush_log(ctx); pos = ctx->pos; log->next_checkpoint = ctx->pos; @@ -1684,6 +1787,7 @@ static int btrfs_r5l_recover_log(struct btrfs_r5l_log *log) #endif __free_page(ctx->meta_page); __free_page(ctx->io_page); + btrfs_r5l_recover_free_ra(ctx); kfree(ctx); return 0; } -- 2.9.4