[PATCH 1/2] ext4: cleanup GFP flags inside resize path

linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/2] ext4: cleanup GFP flags inside resize path
@ 2014-11-09 16:37 Dmitry Monakhov
  2014-11-09 16:37 ` [PATCH 2/2] ext4: fix potential use after free issue while fsresize Dmitry Monakhov
  2014-11-14  9:37 ` [PATCH 1/2] ext4: cleanup GFP flags inside resize path Dmitry Monakhov
  0 siblings, 2 replies; 6+ messages in thread
From: Dmitry Monakhov @ 2014-11-09 16:37 UTC (permalink / raw)
  To: linux-ext4; +Cc: tytso, cmm, Dmitry Monakhov

We must use GFP_NOFS instead GFP_KERNEL inside ext4_mb_add_groupinfo because resizing call
 it inside journal transaction. Call trace:
ioctl
 ->ext4_group_add
   ->journal_start
   ->ext4_setup_new_descs
     ->ext4_mb_add_groupinfo

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/mballoc.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dbfe15c..654e70d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2385,7 +2385,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 	if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
 		metalen = sizeof(*meta_group_info) <<
 			EXT4_DESC_PER_BLOCK_BITS(sb);
-		meta_group_info = kmalloc(metalen, GFP_KERNEL);
+		meta_group_info = kmalloc(metalen, GFP_NOFS);
 		if (meta_group_info == NULL) {
 			ext4_msg(sb, KERN_ERR, "can't allocate mem "
 				 "for a buddy group");
@@ -2399,7 +2399,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
 	i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
 
-	meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
+	meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
 	if (meta_group_info[i] == NULL) {
 		ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
 		goto exit_group_info;
@@ -2428,7 +2428,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 	{
 		struct buffer_head *bh;
 		meta_group_info[i]->bb_bitmap =
-			kmalloc(sb->s_blocksize, GFP_KERNEL);
+			kmalloc(sb->s_blocksize, GFP_NOFS);
 		BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
 		bh = ext4_read_block_bitmap(sb, group);
 		BUG_ON(bh == NULL);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] ext4: fix potential use after free issue while fsresize
  2014-11-09 16:37 [PATCH 1/2] ext4: cleanup GFP flags inside resize path Dmitry Monakhov
@ 2014-11-09 16:37 ` Dmitry Monakhov
  2014-11-14  9:37 ` [PATCH 1/2] ext4: cleanup GFP flags inside resize path Dmitry Monakhov
  1 sibling, 0 replies; 6+ messages in thread
From: Dmitry Monakhov @ 2014-11-09 16:37 UTC (permalink / raw)
  To: linux-ext4; +Cc: tytso, cmm, Dmitry Monakhov

We need some sort of synchronization while updating ->s_group_desc because there
are a lot of users which can access old ->s_group_desc array after it was released.
It is reasonable to use lightweight seqcount_t here instead of RCU.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/balloc.c |   14 ++++++++++----
 fs/ext4/ext4.h   |    1 +
 fs/ext4/resize.c |    5 ++++-
 fs/ext4/super.c  |    1 +
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 83a6f49..4746907 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -280,8 +280,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
 {
 	unsigned int group_desc;
 	unsigned int offset;
+	unsigned int seq;
 	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	struct ext4_group_desc *desc;
+	struct buffer_head *gd_bh;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (block_group >= ngroups) {
@@ -293,7 +295,11 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
 
 	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
 	offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
-	if (!sbi->s_group_desc[group_desc]) {
+	do {
+		seq = read_seqcount_begin(&sbi->s_group_desc_seq);
+		gd_bh = sbi->s_group_desc[group_desc];
+	} while (unlikely(read_seqcount_retry(&sbi->s_group_desc_seq, seq)));
+	if (!gd_bh) {
 		ext4_error(sb, "Group descriptor not loaded - "
 			   "block_group = %u, group_desc = %u, desc = %u",
 			   block_group, group_desc, offset);
@@ -301,10 +307,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
 	}
 
 	desc = (struct ext4_group_desc *)(
-		(__u8 *)sbi->s_group_desc[group_desc]->b_data +
-		offset * EXT4_DESC_SIZE(sb));
+		(__u8 *)gd_bh->b_data + offset * EXT4_DESC_SIZE(sb));
 	if (bh)
-		*bh = sbi->s_group_desc[group_desc];
+		*bh = gd_bh;
+
 	return desc;
 }
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c55a1fa..2c039d2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1199,6 +1199,7 @@ struct ext4_sb_info {
 	struct buffer_head * s_sbh;	/* Buffer containing the super block */
 	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
 	struct buffer_head **s_group_desc;
+	seqcount_t s_group_desc_seq;	/* protects s_group_desc updates */
 	unsigned int s_mount_opt;
 	unsigned int s_mount_opt2;
 	unsigned int s_mount_flags;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ca45883..f9ec935 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -854,8 +854,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	memcpy(n_group_desc, o_group_desc,
 	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
 	n_group_desc[gdb_num] = gdb_bh;
+	write_seqcount_begin(&EXT4_SB(sb)->s_group_desc_seq);
 	EXT4_SB(sb)->s_group_desc = n_group_desc;
 	EXT4_SB(sb)->s_gdb_count++;
+	write_seqcount_end(&EXT4_SB(sb)->s_group_desc_seq);
 	ext4_kvfree(o_group_desc);
 
 	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
@@ -902,13 +904,14 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 			     gdb_num + 1);
 		return err;
 	}
-
 	o_group_desc = EXT4_SB(sb)->s_group_desc;
 	memcpy(n_group_desc, o_group_desc,
 	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
 	n_group_desc[gdb_num] = gdb_bh;
+	write_seqcount_begin(&EXT4_SB(sb)->s_group_desc_seq);
 	EXT4_SB(sb)->s_group_desc = n_group_desc;
 	EXT4_SB(sb)->s_gdb_count++;
+	write_seqcount_end(&EXT4_SB(sb)->s_group_desc_seq);
 	ext4_kvfree(o_group_desc);
 	BUFFER_TRACE(gdb_bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, gdb_bh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2c9e686..08d50a1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3872,6 +3872,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
 		   EXT4_DESC_PER_BLOCK(sb);
+	seqcount_init(&sbi->s_group_desc_seq);
 	sbi->s_group_desc = ext4_kvmalloc(db_count *
 					  sizeof(struct buffer_head *),
 					  GFP_KERNEL);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] ext4: cleanup GFP flags inside resize path
  2014-11-09 16:37 [PATCH 1/2] ext4: cleanup GFP flags inside resize path Dmitry Monakhov
  2014-11-09 16:37 ` [PATCH 2/2] ext4: fix potential use after free issue while fsresize Dmitry Monakhov
@ 2014-11-14  9:37 ` Dmitry Monakhov
  1 sibling, 0 replies; 6+ messages in thread
From: Dmitry Monakhov @ 2014-11-14  9:37 UTC (permalink / raw)
  To: linux-ext4; +Cc: tytso, cmm

Dmitry Monakhov <dmonakhov@openvz.org> writes:

> We must use GFP_NOFS instead GFP_KERNEL inside ext4_mb_add_groupinfo because resizing call
>  it inside journal transaction. Call trace:
> ioctl
>  ->ext4_group_add
>    ->journal_start
>    ->ext4_setup_new_descs
>      ->ext4_mb_add_groupinfo
It is appeared that there is one more case missed.
ext4_group_add
   ->journal_start
   ->ext4_flex_group_add
        ->ext4_update_super
               ->ext4_calculate_overhead  -> GFP_KERNEL
I'll send updated version soon.               
>
> Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
> ---
>  fs/ext4/mballoc.c |    6 +++---
>  1 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index dbfe15c..654e70d 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -2385,7 +2385,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
>  	if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
>  		metalen = sizeof(*meta_group_info) <<
>  			EXT4_DESC_PER_BLOCK_BITS(sb);
> -		meta_group_info = kmalloc(metalen, GFP_KERNEL);
> +		meta_group_info = kmalloc(metalen, GFP_NOFS);
>  		if (meta_group_info == NULL) {
>  			ext4_msg(sb, KERN_ERR, "can't allocate mem "
>  				 "for a buddy group");
> @@ -2399,7 +2399,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
>  		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
>  	i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
>  
> -	meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
> +	meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
>  	if (meta_group_info[i] == NULL) {
>  		ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
>  		goto exit_group_info;
> @@ -2428,7 +2428,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
>  	{
>  		struct buffer_head *bh;
>  		meta_group_info[i]->bb_bitmap =
> -			kmalloc(sb->s_blocksize, GFP_KERNEL);
> +			kmalloc(sb->s_blocksize, GFP_NOFS);
>  		BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
>  		bh = ext4_read_block_bitmap(sb, group);
>  		BUG_ON(bh == NULL);
> -- 
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] ext4: cleanup GFP flags inside resize path V2
@ 2014-11-14  9:40 Dmitry Monakhov
  2014-11-14  9:40 ` [PATCH 2/2] ext4: fix potential use after free issue while fsresize Dmitry Monakhov
  0 siblings, 1 reply; 6+ messages in thread
From: Dmitry Monakhov @ 2014-11-14  9:40 UTC (permalink / raw)
  To: linux-ext4; +Cc: tytso, cmm, Dmitry Monakhov

We must use GFP_NOFS instead GFP_KERNEL inside ext4_mb_add_groupinfo because resize-path
call  it inside journal transaction. Call trace:
ioctl
 ->ext4_group_add
   ->journal_start
   ->ext4_setup_new_descs
     ->ext4_mb_add_groupinfo -> GFP_KERNEL
   ->ext4_flex_group_add
     ->ext4_update_super
       ->ext4_calculate_overhead  -> GFP_KERNEL
   ->journal_stop

Changes from V1:
 - Fix up gfp flags for ext4_calculate_overhead

ext4_update_super

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/mballoc.c |    6 +++---
 fs/ext4/super.c   |    2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dbfe15c..654e70d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2385,7 +2385,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 	if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
 		metalen = sizeof(*meta_group_info) <<
 			EXT4_DESC_PER_BLOCK_BITS(sb);
-		meta_group_info = kmalloc(metalen, GFP_KERNEL);
+		meta_group_info = kmalloc(metalen, GFP_NOFS);
 		if (meta_group_info == NULL) {
 			ext4_msg(sb, KERN_ERR, "can't allocate mem "
 				 "for a buddy group");
@@ -2399,7 +2399,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
 	i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
 
-	meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
+	meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS);
 	if (meta_group_info[i] == NULL) {
 		ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
 		goto exit_group_info;
@@ -2428,7 +2428,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 	{
 		struct buffer_head *bh;
 		meta_group_info[i]->bb_bitmap =
-			kmalloc(sb->s_blocksize, GFP_KERNEL);
+			kmalloc(sb->s_blocksize, GFP_NOFS);
 		BUG_ON(meta_group_info[i]->bb_bitmap == NULL);
 		bh = ext4_read_block_bitmap(sb, group);
 		BUG_ON(bh == NULL);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2c9e686..5f6cab8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3310,7 +3310,7 @@ int ext4_calculate_overhead(struct super_block *sb)
 	struct ext4_super_block *es = sbi->s_es;
 	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
 	ext4_fsblk_t overhead = 0;
-	char *buf = (char *) get_zeroed_page(GFP_KERNEL);
+	char *buf = (char *) get_zeroed_page(GFP_NOFS);
 
 	if (!buf)
 		return -ENOMEM;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] ext4: fix potential use after free issue while fsresize
  2014-11-14  9:40 [PATCH 1/2] ext4: cleanup GFP flags inside resize path V2 Dmitry Monakhov
@ 2014-11-14  9:40 ` Dmitry Monakhov
  2014-11-25 18:27   ` Theodore Ts'o
  0 siblings, 1 reply; 6+ messages in thread
From: Dmitry Monakhov @ 2014-11-14  9:40 UTC (permalink / raw)
  To: linux-ext4; +Cc: tytso, cmm, Dmitry Monakhov

We need some sort of synchronization while updating ->s_group_desc because there
are a lot of users which can access old ->s_group_desc array after it was released.
It is reasonable to use lightweight seqcount_t here instead of RCU.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
---
 fs/ext4/balloc.c |   14 ++++++++++----
 fs/ext4/ext4.h   |    1 +
 fs/ext4/resize.c |    5 ++++-
 fs/ext4/super.c  |    1 +
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 83a6f49..4746907 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -280,8 +280,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
 {
 	unsigned int group_desc;
 	unsigned int offset;
+	unsigned int seq;
 	ext4_group_t ngroups = ext4_get_groups_count(sb);
 	struct ext4_group_desc *desc;
+	struct buffer_head *gd_bh;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (block_group >= ngroups) {
@@ -293,7 +295,11 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
 
 	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
 	offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
-	if (!sbi->s_group_desc[group_desc]) {
+	do {
+		seq = read_seqcount_begin(&sbi->s_group_desc_seq);
+		gd_bh = sbi->s_group_desc[group_desc];
+	} while (unlikely(read_seqcount_retry(&sbi->s_group_desc_seq, seq)));
+	if (!gd_bh) {
 		ext4_error(sb, "Group descriptor not loaded - "
 			   "block_group = %u, group_desc = %u, desc = %u",
 			   block_group, group_desc, offset);
@@ -301,10 +307,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
 	}
 
 	desc = (struct ext4_group_desc *)(
-		(__u8 *)sbi->s_group_desc[group_desc]->b_data +
-		offset * EXT4_DESC_SIZE(sb));
+		(__u8 *)gd_bh->b_data + offset * EXT4_DESC_SIZE(sb));
 	if (bh)
-		*bh = sbi->s_group_desc[group_desc];
+		*bh = gd_bh;
+
 	return desc;
 }
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c55a1fa..2c039d2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1199,6 +1199,7 @@ struct ext4_sb_info {
 	struct buffer_head * s_sbh;	/* Buffer containing the super block */
 	struct ext4_super_block *s_es;	/* Pointer to the super block in the buffer */
 	struct buffer_head **s_group_desc;
+	seqcount_t s_group_desc_seq;	/* protects s_group_desc updates */
 	unsigned int s_mount_opt;
 	unsigned int s_mount_opt2;
 	unsigned int s_mount_flags;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ca45883..f9ec935 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -854,8 +854,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	memcpy(n_group_desc, o_group_desc,
 	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
 	n_group_desc[gdb_num] = gdb_bh;
+	write_seqcount_begin(&EXT4_SB(sb)->s_group_desc_seq);
 	EXT4_SB(sb)->s_group_desc = n_group_desc;
 	EXT4_SB(sb)->s_gdb_count++;
+	write_seqcount_end(&EXT4_SB(sb)->s_group_desc_seq);
 	ext4_kvfree(o_group_desc);
 
 	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
@@ -902,13 +904,14 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 			     gdb_num + 1);
 		return err;
 	}
-
 	o_group_desc = EXT4_SB(sb)->s_group_desc;
 	memcpy(n_group_desc, o_group_desc,
 	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
 	n_group_desc[gdb_num] = gdb_bh;
+	write_seqcount_begin(&EXT4_SB(sb)->s_group_desc_seq);
 	EXT4_SB(sb)->s_group_desc = n_group_desc;
 	EXT4_SB(sb)->s_gdb_count++;
+	write_seqcount_end(&EXT4_SB(sb)->s_group_desc_seq);
 	ext4_kvfree(o_group_desc);
 	BUFFER_TRACE(gdb_bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, gdb_bh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 5f6cab8..722e537 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3872,6 +3872,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
 		   EXT4_DESC_PER_BLOCK(sb);
+	seqcount_init(&sbi->s_group_desc_seq);
 	sbi->s_group_desc = ext4_kvmalloc(db_count *
 					  sizeof(struct buffer_head *),
 					  GFP_KERNEL);
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] ext4: fix potential use after free issue while fsresize
  2014-11-14  9:40 ` [PATCH 2/2] ext4: fix potential use after free issue while fsresize Dmitry Monakhov
@ 2014-11-25 18:27   ` Theodore Ts'o
  2014-11-27 13:06     ` Dmitry Monakhov
  0 siblings, 1 reply; 6+ messages in thread
From: Theodore Ts'o @ 2014-11-25 18:27 UTC (permalink / raw)
  To: Dmitry Monakhov; +Cc: linux-ext4, cmm

On Fri, Nov 14, 2014 at 01:40:40PM +0400, Dmitry Monakhov wrote:
> We need some sort of synchronization while updating ->s_group_desc because there
> are a lot of users which can access old ->s_group_desc array after it was released.
> It is reasonable to use lightweight seqcount_t here instead of RCU.
> 
> Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>

I'm curious --- under what circumstances did you manage to hit this,
or was this something that you noticed?

> +	do {
> +		seq = read_seqcount_begin(&sbi->s_group_desc_seq);
> +		gd_bh = sbi->s_group_desc[group_desc];
> +	} while (unlikely(read_seqcount_retry(&sbi->s_group_desc_seq, seq)));

The problem is that s_group_desc is allocated using ext4_kvmalloc(),
which means it's possible that it was allocated using vmalloc().
Which means that it is possible (although unlikely) that the old
s_group_desc address could become invalidated after the call to
ext4_kvfree(o_group_desc).

This will only happen on 32-bit platforms if we get unlucky and the
kmap region gets recycled right after the vfree(); but we would only
see the bug in practice if the memory gets kfree'ed gets reused
immeidately afterwards, and we've been living with this with ext3 for
a long time.  Don't get me wrong; I'm not saying we should ignore this
bug, since I certainly agree with you that it is truly a bug.  But if
we are going to fix it, we should probably fix it all the way, which I
suspect means we may have to use RCU here.

OTOH, if you are hitting this in live production workloads, then we
can do the partial fix now, and then fix it all the way up later.  Is
the RCU mechanism really going to be that ugly?  It's not like we are
resizing all the time, so we don't need to worry about it being
heavyweight from a performance point of view, no?

	    	   	       	     	- Ted

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] ext4: fix potential use after free issue while fsresize
  2014-11-25 18:27   ` Theodore Ts'o
@ 2014-11-27 13:06     ` Dmitry Monakhov
  0 siblings, 0 replies; 6+ messages in thread
From: Dmitry Monakhov @ 2014-11-27 13:06 UTC (permalink / raw)
  To: Theodore Ts'o; +Cc: linux-ext4, cmm

[-- Attachment #1: Type: text/plain, Size: 2100 bytes --]

Theodore Ts'o <tytso@mit.edu> writes:

> On Fri, Nov 14, 2014 at 01:40:40PM +0400, Dmitry Monakhov wrote:
>> We need some sort of synchronization while updating ->s_group_desc because there
>> are a lot of users which can access old ->s_group_desc array after it was released.
>> It is reasonable to use lightweight seqcount_t here instead of RCU.
>> 
>> Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
>
> I'm curious --- under what circumstances did you manage to hit this,
> or was this something that you noticed?
>
>> +	do {
>> +		seq = read_seqcount_begin(&sbi->s_group_desc_seq);
>> +		gd_bh = sbi->s_group_desc[group_desc];
>> +	} while (unlikely(read_seqcount_retry(&sbi->s_group_desc_seq, seq)));
>
> The problem is that s_group_desc is allocated using ext4_kvmalloc(),
> which means it's possible that it was allocated using vmalloc().
> Which means that it is possible (although unlikely) that the old
> s_group_desc address could become invalidated after the call to
> ext4_kvfree(o_group_desc).
>
> This will only happen on 32-bit platforms if we get unlucky and the
> kmap region gets recycled right after the vfree(); but we would only
> see the bug in practice if the memory gets kfree'ed gets reused
> immeidately afterwards, and we've been living with this with ext3 for
> a long time.  Don't get me wrong; I'm not saying we should ignore this
> bug, since I certainly agree with you that it is truly a bug.  But if
> we are going to fix it, we should probably fix it all the way, which I
> suspect means we may have to use RCU here.
>
> OTOH, if you are hitting this in live production workloads, then we
> can do the partial fix now, and then fix it all the way up later.  Is
> the RCU mechanism really going to be that ugly?  It's not like we are
> resizing all the time, so we don't need to worry about it being
> heavyweight from a performance point of view, no?
I use seqcount_t because Doc/RCU/array.txt state that it is lighter,
but indeed I've missed vmalloc case so I'll rewrite it to RCU.
>
> 	    	   	       	     	- Ted

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 472 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-11-27 13:07 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-11-09 16:37 [PATCH 1/2] ext4: cleanup GFP flags inside resize path Dmitry Monakhov
2014-11-09 16:37 ` [PATCH 2/2] ext4: fix potential use after free issue while fsresize Dmitry Monakhov
2014-11-14  9:37 ` [PATCH 1/2] ext4: cleanup GFP flags inside resize path Dmitry Monakhov
  -- strict thread matches above, loose matches on Subject: below --
2014-11-14  9:40 [PATCH 1/2] ext4: cleanup GFP flags inside resize path V2 Dmitry Monakhov
2014-11-14  9:40 ` [PATCH 2/2] ext4: fix potential use after free issue while fsresize Dmitry Monakhov
2014-11-25 18:27   ` Theodore Ts'o
2014-11-27 13:06     ` Dmitry Monakhov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).