[PATCH v2] ext4: improve discard efficiency

linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2] ext4: improve discard efficiency
@ 2023-07-19  9:36 Fengnan Chang
  2023-07-24  3:16 ` Guoqing Jiang
  0 siblings, 1 reply; 3+ messages in thread
From: Fengnan Chang @ 2023-07-19  9:36 UTC (permalink / raw)
  To: adilger.kernel, tytso; +Cc: linux-ext4, Fengnan Chang, kernel test robot

In commit a015434480dc("ext4: send parallel discards on commit
completions"), issue all discard commands in parallel make all
bios could merged into one request, so lowlevel drive can issue
multi segments in one time which is more efficiency, but commit
55cdd0af2bc5 ("ext4: get discard out of jbd2 commit kthread contex")
seems broke this way, let's fix it.
In my test, the time of fstrim fs with multi big sparse file
reduce from 6.7s to 1.3s.

Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202307171455.ee68ef8b-oliver.sang@intel.com
Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
---
 fs/ext4/mballoc.c | 40 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a2475b8c9fb5..84685b746297 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -6790,7 +6790,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
  * be called with under the group lock.
  */
 static int ext4_trim_extent(struct super_block *sb,
-		int start, int count, struct ext4_buddy *e4b)
+		int start, int count, struct ext4_buddy *e4b,
+		struct bio **biop, struct ext4_free_data **entryp)
 __releases(bitlock)
 __acquires(bitlock)
 {
@@ -6812,9 +6813,16 @@ __acquires(bitlock)
 	 */
 	mb_mark_used(e4b, &ex);
 	ext4_unlock_group(sb, group);
-	ret = ext4_issue_discard(sb, group, start, count, NULL);
+	ret = ext4_issue_discard(sb, group, start, count, biop);
+	if (!ret) {
+		struct ext4_free_data *entry = kmem_cache_alloc(ext4_free_data_cachep,
+				GFP_NOFS|__GFP_NOFAIL);
+		entry->efd_start_cluster = start;
+		entry->efd_count = count;
+		*entryp  = entry;
+	}
+
 	ext4_lock_group(sb, group);
-	mb_free_blocks(NULL, e4b, start, ex.fe_len);
 	return ret;
 }
 
@@ -6826,6 +6834,12 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
 {
 	ext4_grpblk_t next, count, free_count;
 	void *bitmap;
+	struct ext4_free_data *entry = NULL, *fd, *nfd;
+	struct list_head discard_data_list;
+	struct bio *discard_bio = NULL;
+	struct blk_plug plug;
+
+	INIT_LIST_HEAD(&discard_data_list);
 
 	bitmap = e4b->bd_bitmap;
 	start = (e4b->bd_info->bb_first_free > start) ?
@@ -6833,6 +6847,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
 	count = 0;
 	free_count = 0;
 
+	blk_start_plug(&plug);
 	while (start <= max) {
 		start = mb_find_next_zero_bit(bitmap, max + 1, start);
 		if (start > max)
@@ -6840,10 +6855,13 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
 		next = mb_find_next_bit(bitmap, max + 1, start);
 
 		if ((next - start) >= minblocks) {
-			int ret = ext4_trim_extent(sb, start, next - start, e4b);
+			int ret = ext4_trim_extent(sb, start, next - start, e4b,
+							&discard_bio, &entry);
 
-			if (ret && ret != -EOPNOTSUPP)
+			if (ret < 0)
 				break;
+
+			list_add_tail(&entry->efd_list, &discard_data_list);
 			count += next - start;
 		}
 		free_count += next - start;
@@ -6863,6 +6881,18 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group))
 		if ((e4b->bd_info->bb_free - free_count) < minblocks)
 			break;
 	}
+	if (discard_bio) {
+		ext4_unlock_group(sb, e4b->bd_group);
+		submit_bio_wait(discard_bio);
+		bio_put(discard_bio);
+		ext4_lock_group(sb, e4b->bd_group);
+	}
+	blk_finish_plug(&plug);
+
+	list_for_each_entry_safe(fd, nfd, &discard_data_list, efd_list) {
+		mb_free_blocks(NULL, e4b, fd->efd_start_cluster, fd->efd_count);
+		kmem_cache_free(ext4_free_data_cachep, fd);
+	}
 
 	return count;
 }
-- 
2.37.1 (Apple Git-137.1)


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] ext4: improve discard efficiency
  2023-07-19  9:36 [PATCH v2] ext4: improve discard efficiency Fengnan Chang
@ 2023-07-24  3:16 ` Guoqing Jiang
  2023-07-24  7:22   ` fengnan chang
  0 siblings, 1 reply; 3+ messages in thread
From: Guoqing Jiang @ 2023-07-24  3:16 UTC (permalink / raw)
  To: Fengnan Chang, adilger.kernel, tytso; +Cc: linux-ext4, kernel test robot

Hi,

On 7/19/23 17:36, Fengnan Chang wrote:
> In commit a015434480dc("ext4: send parallel discards on commit
> completions"), issue all discard commands in parallel make all
> bios could merged into one request, so lowlevel drive can issue
> multi segments in one time which is more efficiency, but commit
> 55cdd0af2bc5 ("ext4: get discard out of jbd2 commit kthread contex")
> seems broke this way, let's fix it.
> In my test, the time of fstrim fs with multi big sparse file
> reduce from 6.7s to 1.3s.

I tried with a 20T sparse file with latest kernel (6.5-rc2+ commit 
f7e3a1baf).

truncate -s 20T sparse1.img
mkfs.ext4 sparse1.img
mount -o discard sparse1.img /mnt/
time fstrim /mnt

1. without the patch

[root@localhost ~]# time fstrim /mnt

real    0m13.496s
user    0m0.002s
sys     0m5.202s

2. with the patch

[root@localhost ~]# time fstrim /mnt

real    0m15.956s
user    0m0.000s
sys     0m7.251s

The result is different from your side, could you share your test?

Thanks,
Guoqing

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] ext4: improve discard efficiency
  2023-07-24  3:16 ` Guoqing Jiang
@ 2023-07-24  7:22   ` fengnan chang
  0 siblings, 0 replies; 3+ messages in thread
From: fengnan chang @ 2023-07-24  7:22 UTC (permalink / raw)
  To: Guoqing Jiang
  Cc: Fengnan Chang, adilger.kernel, tytso, linux-ext4,
	kernel test robot

[-- Attachment #1: Type: text/plain, Size: 1772 bytes --]

On Mon, Jul 24, 2023 at 11:42 AM Guoqing Jiang <guoqing.jiang@linux.dev> wrote:
>
> Hi,
>
> On 7/19/23 17:36, Fengnan Chang wrote:
> > In commit a015434480dc("ext4: send parallel discards on commit
> > completions"), issue all discard commands in parallel make all
> > bios could merged into one request, so lowlevel drive can issue
> > multi segments in one time which is more efficiency, but commit
> > 55cdd0af2bc5 ("ext4: get discard out of jbd2 commit kthread contex")
> > seems broke this way, let's fix it.
> > In my test, the time of fstrim fs with multi big sparse file
> > reduce from 6.7s to 1.3s.
>
> I tried with a 20T sparse file with latest kernel (6.5-rc2+ commit
> f7e3a1baf).
>
> truncate -s 20T sparse1.img
> mkfs.ext4 sparse1.img
> mount -o discard sparse1.img /mnt/
> time fstrim /mnt
>
> 1. without the patch
>
> [root@localhost ~]# time fstrim /mnt
>
> real    0m13.496s
> user    0m0.002s
> sys     0m5.202s
>
> 2. with the patch
>
> [root@localhost ~]# time fstrim /mnt
>
> real    0m15.956s
> user    0m0.000s
> sys     0m7.251s
>
> The result is different from your side, could you share your test?

Here are my test steps:
1. create 10 normal files, each file size is 10G.
2. deallocate file：punch holes every 16k. The attached file includes step 1&2.
3. trim all fs.
So why does trim a new fs become slow? because with my patch,  in
ext4_try_to_trim_range
we need do alloc and free memory, this might cause 9us cost in
addition. So in current
version,  benefits can only be gained if there are multiple
discontinuous segments that
need to be trimmed in  ext4_try_to_trim_range.
This problem needs to be fixed, so I'll send another version.

Thanks.
Fengnan

>
> Thanks,
> Guoqing

[-- Attachment #2: makefrag.c --]
[-- Type: application/octet-stream, Size: 1707 bytes --]

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <sys/stat.h>
#include <linux/falloc.h>
#define _GNU_SOURCE 
#include <fcntl.h>
#include <sys/ioctl.h>
#include <linux/fs.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <time.h>

int deallocate_block_range(int fd, int start_block, int count)
{
    unsigned long start = start_block * 4096; // 以字节为单位描述块的范围
    unsigned long len = count * 4096; // 以字节为单位描述块的范围

    if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                 start, len) == -1) {
        perror("fallocate");
        exit(EXIT_FAILURE);
    }
    return 0;
}

int create_file(char *file) {
    int fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0666);
    // 打开文件，以写入方式打开，若不存在则创建，权限为666，文件长度为0
    if (fd == -1) {
        printf("Fail to create file!\n");
        return -1;
    }

    unsigned long file_s = 10*1024*1024*1024;
    unsigned long block_size= 1024*1024;
    unsigned long count = file_s / (64* 1024);
    char* block = calloc(1, block_size);  // 用'A'填充每个块
    memset(block, 0x3f, block_size);

    for (int i = 0; i < file_s/block_size; i++) {
        write(fd, block, block_size);  // 写入一个块
    }
    fsync(fd);  // 将缓存中的数据刷新到磁盘
    int off = 0;
    for (int i = 0; i < count; i++) {
	deallocate_block_range(fd, off, 8);
	off += 16;
    }
    close(fd);  // 关闭文件
    return 0;
}

int
main(int argc, char **argv)
{
	for(int i = 0; i < 10; i++) {
		char name[128];
		sprintf(name, "testfile_%d",i);
		create_file(name);
	}
	return 0;
}

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-07-24  7:22 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-07-19  9:36 [PATCH v2] ext4: improve discard efficiency Fengnan Chang
2023-07-24  3:16 ` Guoqing Jiang
2023-07-24  7:22   ` fengnan chang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).